x86_64: Add asinh with FMA
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Test passed
|
Commit Message
FMA and non-FMA performance are similar on SPR.
Before After Improvement
reciprocal-throughput 12.226 12.4064 -1%
latency 49.4701 48.4424 2%
---
benchtests/asinh-inputs | 1 +
sysdeps/ieee754/dbl-64/s_asinh.c | 5 ++++
sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c | 7 +++++
sysdeps/x86_64/fpu/multiarch/s_asinh.c | 31 ++++++++++++++++++++++
5 files changed, 46 insertions(+)
create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh.c
Comments
On Thu, Mar 13, 2025 at 5:27 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>
> FMA and non-FMA performance are similar on SPR.
>
> Before After Improvement
> reciprocal-throughput 12.226 12.4064 -1%
> latency 49.4701 48.4424 2%
Since there is no real advantage for FMA, it isn't necessary.
> ---
> benchtests/asinh-inputs | 1 +
> sysdeps/ieee754/dbl-64/s_asinh.c | 5 ++++
> sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
> sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c | 7 +++++
> sysdeps/x86_64/fpu/multiarch/s_asinh.c | 31 ++++++++++++++++++++++
> 5 files changed, 46 insertions(+)
> create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh.c
>
> diff --git a/benchtests/asinh-inputs b/benchtests/asinh-inputs
> index 8dfa37c49f..946e7b4d22 100644
> --- a/benchtests/asinh-inputs
> +++ b/benchtests/asinh-inputs
> @@ -1,6 +1,7 @@
> ## args: double
> ## ret: double
> ## includes: math.h
> +## name: workload-random
> 0x1.408fb643484cep-11
> 0x1.f0763423f1d52p-23
> -0x1.681573418e494p4
> diff --git a/sysdeps/ieee754/dbl-64/s_asinh.c b/sysdeps/ieee754/dbl-64/s_asinh.c
> index 7fd281d791..04b686a28e 100644
> --- a/sysdeps/ieee754/dbl-64/s_asinh.c
> +++ b/sysdeps/ieee754/dbl-64/s_asinh.c
> @@ -32,6 +32,11 @@ static const double
> ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
> huge = 1.00000000000000000000e+300;
>
> +#ifndef SECTION
> +# define SECTION
> +#endif
> +
> +SECTION
> double
> __asinh (double x)
> {
> diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
> index 3403422443..1ab46bb446 100644
> --- a/sysdeps/x86_64/fpu/multiarch/Makefile
> +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
> @@ -7,6 +7,7 @@ CFLAGS-e_log-fma.c = -mfma -mavx2
> CFLAGS-e_log2-fma.c = -mfma -mavx2
> CFLAGS-e_pow-fma.c = -mfma -mavx2
> CFLAGS-e_sinh-fma.c = -mfma -mavx2
> +CFLAGS-s_asinh-fma.c = -mfma -mavx2
> CFLAGS-s_atan-fma.c = -mfma -mavx2
> CFLAGS-s_expm1-fma.c = -mfma -mavx2
> CFLAGS-s_log1p-fma.c = -mfma -mavx2
> @@ -73,6 +74,7 @@ libm-sysdep_routines += \
> e_pow-fma \
> e_powf-fma \
> e_sinh-fma \
> + s_asinh-fma \
> s_atan-avx \
> s_atan-fma \
> s_ceil-sse4_1 \
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
> new file mode 100644
> index 0000000000..293a255005
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
> @@ -0,0 +1,7 @@
> +#define __asinh __asinh_fma
> +#define __ieee754_log __ieee754_log_fma
> +#define __log1p __log1p_fma
> +
> +#define SECTION __attribute__ ((section (".text.fma")))
> +
> +#include <sysdeps/ieee754/dbl-64/s_asinh.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh.c b/sysdeps/x86_64/fpu/multiarch/s_asinh.c
> new file mode 100644
> index 0000000000..509d74c96b
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh.c
> @@ -0,0 +1,31 @@
> +/* Multiple versions of asinh.
> + Copyright (C) 2025 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdeps/x86/isa-level.h>
> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
> +
> +extern double __redirect_asinh (double);
> +
> +# define SYMBOL_NAME asinh
> +# include "ifunc-fma.h"
> +
> +libc_ifunc_redirected (__redirect_asinh, __asinh, IFUNC_SELECTOR ());
> +
> +# define __asinh __asinh_sse2
> +#endif
> +#include <sysdeps/ieee754/dbl-64/s_asinh.c>
> --
> 2.48.1
>
On Thu, Mar 13, 2025 at 6:04 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Mar 13, 2025 at 5:27 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
> >
> > FMA and non-FMA performance are similar on SPR.
> >
> > Before After Improvement
> > reciprocal-throughput 12.226 12.4064 -1%
> > latency 49.4701 48.4424 2%
>
> Since there is no real advantage for FMA, it isn't necessary.
>
>
This request comes from https://issues.redhat.com/browse/RHEL-1063
We don't see any real advantage because in upstream glibc underlying
function call
__ieee754_log
__log1p
are already FMA enabled and invoked via ifunc.
FMA version could be beneficial if the underlying function is replaced
by direct call.
> > ---
> > benchtests/asinh-inputs | 1 +
> > sysdeps/ieee754/dbl-64/s_asinh.c | 5 ++++
> > sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
> > sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c | 7 +++++
> > sysdeps/x86_64/fpu/multiarch/s_asinh.c | 31 ++++++++++++++++++++++
> > 5 files changed, 46 insertions(+)
> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh.c
> >
> > diff --git a/benchtests/asinh-inputs b/benchtests/asinh-inputs
> > index 8dfa37c49f..946e7b4d22 100644
> > --- a/benchtests/asinh-inputs
> > +++ b/benchtests/asinh-inputs
> > @@ -1,6 +1,7 @@
> > ## args: double
> > ## ret: double
> > ## includes: math.h
> > +## name: workload-random
> > 0x1.408fb643484cep-11
> > 0x1.f0763423f1d52p-23
> > -0x1.681573418e494p4
> > diff --git a/sysdeps/ieee754/dbl-64/s_asinh.c
> b/sysdeps/ieee754/dbl-64/s_asinh.c
> > index 7fd281d791..04b686a28e 100644
> > --- a/sysdeps/ieee754/dbl-64/s_asinh.c
> > +++ b/sysdeps/ieee754/dbl-64/s_asinh.c
> > @@ -32,6 +32,11 @@ static const double
> > ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
> > huge = 1.00000000000000000000e+300;
> >
> > +#ifndef SECTION
> > +# define SECTION
> > +#endif
> > +
> > +SECTION
> > double
> > __asinh (double x)
> > {
> > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile
> b/sysdeps/x86_64/fpu/multiarch/Makefile
> > index 3403422443..1ab46bb446 100644
> > --- a/sysdeps/x86_64/fpu/multiarch/Makefile
> > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
> > @@ -7,6 +7,7 @@ CFLAGS-e_log-fma.c = -mfma -mavx2
> > CFLAGS-e_log2-fma.c = -mfma -mavx2
> > CFLAGS-e_pow-fma.c = -mfma -mavx2
> > CFLAGS-e_sinh-fma.c = -mfma -mavx2
> > +CFLAGS-s_asinh-fma.c = -mfma -mavx2
> > CFLAGS-s_atan-fma.c = -mfma -mavx2
> > CFLAGS-s_expm1-fma.c = -mfma -mavx2
> > CFLAGS-s_log1p-fma.c = -mfma -mavx2
> > @@ -73,6 +74,7 @@ libm-sysdep_routines += \
> > e_pow-fma \
> > e_powf-fma \
> > e_sinh-fma \
> > + s_asinh-fma \
> > s_atan-avx \
> > s_atan-fma \
> > s_ceil-sse4_1 \
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
> b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
> > new file mode 100644
> > index 0000000000..293a255005
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
> > @@ -0,0 +1,7 @@
> > +#define __asinh __asinh_fma
> > +#define __ieee754_log __ieee754_log_fma
> > +#define __log1p __log1p_fma
> > +
> > +#define SECTION __attribute__ ((section (".text.fma")))
> > +
> > +#include <sysdeps/ieee754/dbl-64/s_asinh.c>
> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh.c
> b/sysdeps/x86_64/fpu/multiarch/s_asinh.c
> > new file mode 100644
> > index 0000000000..509d74c96b
> > --- /dev/null
> > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh.c
> > @@ -0,0 +1,31 @@
> > +/* Multiple versions of asinh.
> > + Copyright (C) 2025 Free Software Foundation, Inc.
> > + This file is part of the GNU C Library.
> > +
> > + The GNU C Library is free software; you can redistribute it and/or
> > + modify it under the terms of the GNU Lesser General Public
> > + License as published by the Free Software Foundation; either
> > + version 2.1 of the License, or (at your option) any later version.
> > +
> > + The GNU C Library is distributed in the hope that it will be useful,
> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> > + Lesser General Public License for more details.
> > +
> > + You should have received a copy of the GNU Lesser General Public
> > + License along with the GNU C Library; if not, see
> > + <https://www.gnu.org/licenses/>. */
> > +
> > +#include <sysdeps/x86/isa-level.h>
> > +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
> > +
> > +extern double __redirect_asinh (double);
> > +
> > +# define SYMBOL_NAME asinh
> > +# include "ifunc-fma.h"
> > +
> > +libc_ifunc_redirected (__redirect_asinh, __asinh, IFUNC_SELECTOR ());
> > +
> > +# define __asinh __asinh_sse2
> > +#endif
> > +#include <sysdeps/ieee754/dbl-64/s_asinh.c>
> > --
> > 2.48.1
> >
>
>
> --
> H.J.
>
On Thu, Mar 13, 2025 at 8:50 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
>
> On Thu, Mar 13, 2025 at 6:04 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> On Thu, Mar 13, 2025 at 5:27 PM Sunil K Pandey <skpgkp2@gmail.com> wrote:
>> >
>> > FMA and non-FMA performance are similar on SPR.
>> >
>> > Before After Improvement
>> > reciprocal-throughput 12.226 12.4064 -1%
>> > latency 49.4701 48.4424 2%
>>
>> Since there is no real advantage for FMA, it isn't necessary.
>>
>
> This request comes from https://issues.redhat.com/browse/RHEL-1063
> We don't see any real advantage because in upstream glibc underlying
> function call
>
> __ieee754_log
> __log1p
>
> are already FMA enabled and invoked via ifunc.
>
> FMA version could be beneficial if the underlying function is replaced
> by direct call.
Please do that to see if it improves performance.
Thanks.
>
>
>>
>> > ---
>> > benchtests/asinh-inputs | 1 +
>> > sysdeps/ieee754/dbl-64/s_asinh.c | 5 ++++
>> > sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
>> > sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c | 7 +++++
>> > sysdeps/x86_64/fpu/multiarch/s_asinh.c | 31 ++++++++++++++++++++++
>> > 5 files changed, 46 insertions(+)
>> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
>> > create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinh.c
>> >
>> > diff --git a/benchtests/asinh-inputs b/benchtests/asinh-inputs
>> > index 8dfa37c49f..946e7b4d22 100644
>> > --- a/benchtests/asinh-inputs
>> > +++ b/benchtests/asinh-inputs
>> > @@ -1,6 +1,7 @@
>> > ## args: double
>> > ## ret: double
>> > ## includes: math.h
>> > +## name: workload-random
>> > 0x1.408fb643484cep-11
>> > 0x1.f0763423f1d52p-23
>> > -0x1.681573418e494p4
>> > diff --git a/sysdeps/ieee754/dbl-64/s_asinh.c b/sysdeps/ieee754/dbl-64/s_asinh.c
>> > index 7fd281d791..04b686a28e 100644
>> > --- a/sysdeps/ieee754/dbl-64/s_asinh.c
>> > +++ b/sysdeps/ieee754/dbl-64/s_asinh.c
>> > @@ -32,6 +32,11 @@ static const double
>> > ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
>> > huge = 1.00000000000000000000e+300;
>> >
>> > +#ifndef SECTION
>> > +# define SECTION
>> > +#endif
>> > +
>> > +SECTION
>> > double
>> > __asinh (double x)
>> > {
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
>> > index 3403422443..1ab46bb446 100644
>> > --- a/sysdeps/x86_64/fpu/multiarch/Makefile
>> > +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
>> > @@ -7,6 +7,7 @@ CFLAGS-e_log-fma.c = -mfma -mavx2
>> > CFLAGS-e_log2-fma.c = -mfma -mavx2
>> > CFLAGS-e_pow-fma.c = -mfma -mavx2
>> > CFLAGS-e_sinh-fma.c = -mfma -mavx2
>> > +CFLAGS-s_asinh-fma.c = -mfma -mavx2
>> > CFLAGS-s_atan-fma.c = -mfma -mavx2
>> > CFLAGS-s_expm1-fma.c = -mfma -mavx2
>> > CFLAGS-s_log1p-fma.c = -mfma -mavx2
>> > @@ -73,6 +74,7 @@ libm-sysdep_routines += \
>> > e_pow-fma \
>> > e_powf-fma \
>> > e_sinh-fma \
>> > + s_asinh-fma \
>> > s_atan-avx \
>> > s_atan-fma \
>> > s_ceil-sse4_1 \
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
>> > new file mode 100644
>> > index 0000000000..293a255005
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh-fma.c
>> > @@ -0,0 +1,7 @@
>> > +#define __asinh __asinh_fma
>> > +#define __ieee754_log __ieee754_log_fma
>> > +#define __log1p __log1p_fma
>> > +
>> > +#define SECTION __attribute__ ((section (".text.fma")))
>> > +
>> > +#include <sysdeps/ieee754/dbl-64/s_asinh.c>
>> > diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinh.c b/sysdeps/x86_64/fpu/multiarch/s_asinh.c
>> > new file mode 100644
>> > index 0000000000..509d74c96b
>> > --- /dev/null
>> > +++ b/sysdeps/x86_64/fpu/multiarch/s_asinh.c
>> > @@ -0,0 +1,31 @@
>> > +/* Multiple versions of asinh.
>> > + Copyright (C) 2025 Free Software Foundation, Inc.
>> > + This file is part of the GNU C Library.
>> > +
>> > + The GNU C Library is free software; you can redistribute it and/or
>> > + modify it under the terms of the GNU Lesser General Public
>> > + License as published by the Free Software Foundation; either
>> > + version 2.1 of the License, or (at your option) any later version.
>> > +
>> > + The GNU C Library is distributed in the hope that it will be useful,
>> > + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> > + Lesser General Public License for more details.
>> > +
>> > + You should have received a copy of the GNU Lesser General Public
>> > + License along with the GNU C Library; if not, see
>> > + <https://www.gnu.org/licenses/>. */
>> > +
>> > +#include <sysdeps/x86/isa-level.h>
>> > +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
>> > +
>> > +extern double __redirect_asinh (double);
>> > +
>> > +# define SYMBOL_NAME asinh
>> > +# include "ifunc-fma.h"
>> > +
>> > +libc_ifunc_redirected (__redirect_asinh, __asinh, IFUNC_SELECTOR ());
>> > +
>> > +# define __asinh __asinh_sse2
>> > +#endif
>> > +#include <sysdeps/ieee754/dbl-64/s_asinh.c>
>> > --
>> > 2.48.1
>> >
>>
>>
>> --
>> H.J.
>
>
>
@@ -1,6 +1,7 @@
## args: double
## ret: double
## includes: math.h
+## name: workload-random
0x1.408fb643484cep-11
0x1.f0763423f1d52p-23
-0x1.681573418e494p4
@@ -32,6 +32,11 @@ static const double
ln2 = 6.93147180559945286227e-01, /* 0x3FE62E42, 0xFEFA39EF */
huge = 1.00000000000000000000e+300;
+#ifndef SECTION
+# define SECTION
+#endif
+
+SECTION
double
__asinh (double x)
{
@@ -7,6 +7,7 @@ CFLAGS-e_log-fma.c = -mfma -mavx2
CFLAGS-e_log2-fma.c = -mfma -mavx2
CFLAGS-e_pow-fma.c = -mfma -mavx2
CFLAGS-e_sinh-fma.c = -mfma -mavx2
+CFLAGS-s_asinh-fma.c = -mfma -mavx2
CFLAGS-s_atan-fma.c = -mfma -mavx2
CFLAGS-s_expm1-fma.c = -mfma -mavx2
CFLAGS-s_log1p-fma.c = -mfma -mavx2
@@ -73,6 +74,7 @@ libm-sysdep_routines += \
e_pow-fma \
e_powf-fma \
e_sinh-fma \
+ s_asinh-fma \
s_atan-avx \
s_atan-fma \
s_ceil-sse4_1 \
new file mode 100644
@@ -0,0 +1,7 @@
+#define __asinh __asinh_fma
+#define __ieee754_log __ieee754_log_fma
+#define __log1p __log1p_fma
+
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/dbl-64/s_asinh.c>
new file mode 100644
@@ -0,0 +1,31 @@
+/* Multiple versions of asinh.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+
+extern double __redirect_asinh (double);
+
+# define SYMBOL_NAME asinh
+# include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_asinh, __asinh, IFUNC_SELECTOR ());
+
+# define __asinh __asinh_sse2
+#endif
+#include <sysdeps/ieee754/dbl-64/s_asinh.c>