[15/15] x86_64: Add asinpif with FMA
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Test passed
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
Commit Message
The CORE-MATH asinpif implementation showed slight worse performance
when using x86_64 baseline ABI due its usage of fma on fast path.
This patch adds a ifunc variant with similar performance for x86_64-v3.
---
sysdeps/ieee754/flt-32/s_asinpif.c | 2 ++
sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c | 4 +++
sysdeps/x86_64/fpu/multiarch/s_asinpif.c | 33 ++++++++++++++++++++
4 files changed, 41 insertions(+)
create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c
create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinpif.c
Comments
On Fri, Jan 31, 2025 at 1:22 PM Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
> The CORE-MATH asinpif implementation showed slight worse performance
> when using x86_64 baseline ABI due its usage of fma on fast path.
> This patch adds a ifunc variant with similar performance for x86_64-v3.
> ---
> sysdeps/ieee754/flt-32/s_asinpif.c | 2 ++
> sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
> sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c | 4 +++
> sysdeps/x86_64/fpu/multiarch/s_asinpif.c | 33 ++++++++++++++++++++
> 4 files changed, 41 insertions(+)
> create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c
> create mode 100644 sysdeps/x86_64/fpu/multiarch/s_asinpif.c
>
> diff --git a/sysdeps/ieee754/flt-32/s_asinpif.c b/sysdeps/ieee754/flt-32/s_asinpif.c
> index 585dc3f06e..42dbafdf97 100644
> --- a/sysdeps/ieee754/flt-32/s_asinpif.c
> +++ b/sysdeps/ieee754/flt-32/s_asinpif.c
> @@ -133,4 +133,6 @@ __asinpif (float x)
> return r;
> }
> }
> +#ifndef __asinpif
> libm_alias_float (__asinpi, asinpi)
> +#endif
> diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
> index e823d2fcc6..e00d76eb81 100644
> --- a/sysdeps/x86_64/fpu/multiarch/Makefile
> +++ b/sysdeps/x86_64/fpu/multiarch/Makefile
> @@ -13,6 +13,7 @@ CFLAGS-s_tan-fma.c = -mfma -mavx2
> CFLAGS-s_sincos-fma.c = -mfma -mavx2
> CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
> CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
> +CFLAGS-s_asinpif-fma.c = -mfma -mavx2
>
> CFLAGS-e_exp2f-fma.c = -mfma -mavx2
> CFLAGS-e_expf-fma.c = -mfma -mavx2
> @@ -68,6 +69,7 @@ libm-sysdep_routines += \
> e_logf-fma \
> e_pow-fma \
> e_powf-fma \
> + s_asinpif-fma \
> s_atan-avx \
> s_atan-fma \
> s_ceil-sse4_1 \
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c b/sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c
> new file mode 100644
> index 0000000000..3c85bec1c9
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c
> @@ -0,0 +1,4 @@
> +#define __asinpif __asinpif_fma
> +#define SECTION __attribute__ ((section (".text.fma")))
> +
> +#include <sysdeps/ieee754/flt-32/s_asinpif.c>
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinpif.c b/sysdeps/x86_64/fpu/multiarch/s_asinpif.c
> new file mode 100644
> index 0000000000..98add04b7f
> --- /dev/null
> +++ b/sysdeps/x86_64/fpu/multiarch/s_asinpif.c
> @@ -0,0 +1,33 @@
> +/* Multiple versions of asinpif.
> + Copyright (C) 2024-2025 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdeps/x86/isa-level.h>
> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
> +# include <libm-alias-float.h>
> +
> +extern float __redirect_asinpif (float);
> +
> +# define SYMBOL_NAME asinpif
> +# include "ifunc-fma.h"
> +
> +libc_ifunc_redirected (__redirect_asinpif, __asinpif, IFUNC_SELECTOR ());
> +libm_alias_float (__asinpi, asinpi)
> +
> +# define __asinpif __asinpif_sse2
> +#endif
> +#include <sysdeps/ieee754/flt-32/s_asinpif.c>
> --
> 2.43.0
>
LGTM
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
LGTM
Reviewed-by: DJ Delorie <dj@redhat.com>
Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
> The CORE-MATH asinpif implementation showed slight worse performance
> when using x86_64 baseline ABI due its usage of fma on fast path.
> This patch adds a ifunc variant with similar performance for x86_64-v3.
Do you have the performance numbers to report?
> ---
> sysdeps/ieee754/flt-32/s_asinpif.c | 2 ++
> sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
> sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c | 4 +++
> sysdeps/x86_64/fpu/multiarch/s_asinpif.c | 33 ++++++++++++++++++++
So this is an ifunc of asinpf() itself, not fma...
> diff --git a/sysdeps/ieee754/flt-32/s_asinpif.c b/sysdeps/ieee754/flt-32/s_asinpif.c
> return r;
> }
> }
> +#ifndef __asinpif
> libm_alias_float (__asinpi, asinpi)
> +#endif
Ok.
> diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
> CFLAGS-s_sincos-fma.c = -mfma -mavx2
> CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
> CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
> +CFLAGS-s_asinpif-fma.c = -mfma -mavx2
>
> CFLAGS-e_exp2f-fma.c = -mfma -mavx2
> CFLAGS-e_expf-fma.c = -mfma -mavx2
> @@ -68,6 +69,7 @@ libm-sysdep_routines += \
> e_logf-fma \
> e_pow-fma \
> e_powf-fma \
> + s_asinpif-fma \
> s_atan-avx \
> s_atan-fma \
> s_ceil-sse4_1 \
Ok.
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c b/sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c
> +#define __asinpif __asinpif_fma
> +#define SECTION __attribute__ ((section (".text.fma")))
> +
> +#include <sysdeps/ieee754/flt-32/s_asinpif.c>
So same code, but without the alias and built with different options, to
make a variant. Ok.
> diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinpif.c b/sysdeps/x86_64/fpu/multiarch/s_asinpif.c
> +/* Multiple versions of asinpif.
> + Copyright (C) 2024-2025 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
Ok.
> +#include <sysdeps/x86/isa-level.h>
> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
> +# include <libm-alias-float.h>
> +
> +extern float __redirect_asinpif (float);
> +
> +# define SYMBOL_NAME asinpif
> +# include "ifunc-fma.h"
> +
> +libc_ifunc_redirected (__redirect_asinpif, __asinpif, IFUNC_SELECTOR ());
> +libm_alias_float (__asinpi, asinpi)
> +
> +# define __asinpif __asinpif_sse2
> +#endif
> +#include <sysdeps/ieee754/flt-32/s_asinpif.c>
So if the build picks this *.c file, it adds the ifunc code but also
includes the default asinpf() as __asinpif_sse2(). Ok.
On 10/02/25 22:31, DJ Delorie wrote:
>
> LGTM
>
> Reviewed-by: DJ Delorie <dj@redhat.com>
>
> Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
>> The CORE-MATH asinpif implementation showed slight worse performance
>> when using x86_64 baseline ABI due its usage of fma on fast path.
>> This patch adds a ifunc variant with similar performance for x86_64-v3.
>
> Do you have the performance numbers to report?
It turns out that Paul has suggested to just remove the fma operation [1]
on the fast-path; which makes the code faster on x86_64 and x86_64-v2
and the iFUNC variant not required anymore. I will update the asinpif
with the new performance numbers.
[1] https://gitlab.inria.fr/core-math/core-math/-/commit/ccdd955b6fa614c8d88bd1d6fd323abaf43cfb3f
>
>> ---
>> sysdeps/ieee754/flt-32/s_asinpif.c | 2 ++
>> sysdeps/x86_64/fpu/multiarch/Makefile | 2 ++
>> sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c | 4 +++
>> sysdeps/x86_64/fpu/multiarch/s_asinpif.c | 33 ++++++++++++++++++++
>
> So this is an ifunc of asinpf() itself, not fma...
>
>> diff --git a/sysdeps/ieee754/flt-32/s_asinpif.c b/sysdeps/ieee754/flt-32/s_asinpif.c
>> return r;
>> }
>> }
>> +#ifndef __asinpif
>> libm_alias_float (__asinpi, asinpi)
>> +#endif
>
> Ok.
>
>> diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
>> CFLAGS-s_sincos-fma.c = -mfma -mavx2
>> CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
>> CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
>> +CFLAGS-s_asinpif-fma.c = -mfma -mavx2
>>
>> CFLAGS-e_exp2f-fma.c = -mfma -mavx2
>> CFLAGS-e_expf-fma.c = -mfma -mavx2
>> @@ -68,6 +69,7 @@ libm-sysdep_routines += \
>> e_logf-fma \
>> e_pow-fma \
>> e_powf-fma \
>> + s_asinpif-fma \
>> s_atan-avx \
>> s_atan-fma \
>> s_ceil-sse4_1 \
>
> Ok.
>
>> diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c b/sysdeps/x86_64/fpu/multiarch/s_asinpif-fma.c
>> +#define __asinpif __asinpif_fma
>> +#define SECTION __attribute__ ((section (".text.fma")))
>> +
>> +#include <sysdeps/ieee754/flt-32/s_asinpif.c>
>
> So same code, but without the alias and built with different options, to
> make a variant. Ok.
>
>> diff --git a/sysdeps/x86_64/fpu/multiarch/s_asinpif.c b/sysdeps/x86_64/fpu/multiarch/s_asinpif.c
>> +/* Multiple versions of asinpif.
>> + Copyright (C) 2024-2025 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>
> Ok.
>
>> +#include <sysdeps/x86/isa-level.h>
>> +#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
>> +# include <libm-alias-float.h>
>> +
>> +extern float __redirect_asinpif (float);
>> +
>> +# define SYMBOL_NAME asinpif
>> +# include "ifunc-fma.h"
>> +
>> +libc_ifunc_redirected (__redirect_asinpif, __asinpif, IFUNC_SELECTOR ());
>> +libm_alias_float (__asinpi, asinpi)
>> +
>> +# define __asinpif __asinpif_sse2
>> +#endif
>> +#include <sysdeps/ieee754/flt-32/s_asinpif.c>
>
> So if the build picks this *.c file, it adds the ifunc code but also
> includes the default asinpf() as __asinpif_sse2(). Ok.
>
@@ -133,4 +133,6 @@ __asinpif (float x)
return r;
}
}
+#ifndef __asinpif
libm_alias_float (__asinpi, asinpi)
+#endif
@@ -13,6 +13,7 @@ CFLAGS-s_tan-fma.c = -mfma -mavx2
CFLAGS-s_sincos-fma.c = -mfma -mavx2
CFLAGS-s_exp10m1f-fma.c = -mfma -mavx2
CFLAGS-s_exp2m1f-fma.c = -mfma -mavx2
+CFLAGS-s_asinpif-fma.c = -mfma -mavx2
CFLAGS-e_exp2f-fma.c = -mfma -mavx2
CFLAGS-e_expf-fma.c = -mfma -mavx2
@@ -68,6 +69,7 @@ libm-sysdep_routines += \
e_logf-fma \
e_pow-fma \
e_powf-fma \
+ s_asinpif-fma \
s_atan-avx \
s_atan-fma \
s_ceil-sse4_1 \
new file mode 100644
@@ -0,0 +1,4 @@
+#define __asinpif __asinpif_fma
+#define SECTION __attribute__ ((section (".text.fma")))
+
+#include <sysdeps/ieee754/flt-32/s_asinpif.c>
new file mode 100644
@@ -0,0 +1,33 @@
+/* Multiple versions of asinpif.
+ Copyright (C) 2024-2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL
+# include <libm-alias-float.h>
+
+extern float __redirect_asinpif (float);
+
+# define SYMBOL_NAME asinpif
+# include "ifunc-fma.h"
+
+libc_ifunc_redirected (__redirect_asinpif, __asinpif, IFUNC_SELECTOR ());
+libm_alias_float (__asinpi, asinpi)
+
+# define __asinpif __asinpif_sse2
+#endif
+#include <sysdeps/ieee754/flt-32/s_asinpif.c>