[powerpc] fegetenv_and_set_rn now uses the builtins provided by GCC.
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
Commit Message
On powerpc, SET_RESTORE_ROUND uses inline assembly to optimize the
prologue get/save/set rounding mode operations for POWER9 and
later by using 'mffscrn' where possible, this was introduced by
commit f1c56cdff09f650ad721fae026eb6a3651631f3d.
GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn
which now returns the FPSCR fields in a double. This feature is
available on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro
is defined along with __builtin_set_fpscr_rn enabled.
GCC commit ef3bbc69d15707e4db6e2f198c621effb636cc26 adds
this feature.
Changes are done to use __builtin_set_fpscr_rn instead of mffscrn
or mffscrni in __fe_mffscrn(rn).
Suggested-by: Carl Love <cel@us.ibm.com>
---
sysdeps/powerpc/fpu/fenv_libc.h | 23 ++++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
Comments
On 12/09/23 05:27, Manjunath Matti wrote:
> On powerpc, SET_RESTORE_ROUND uses inline assembly to optimize the
> prologue get/save/set rounding mode operations for POWER9 and
> later by using 'mffscrn' where possible, this was introduced by
> commit f1c56cdff09f650ad721fae026eb6a3651631f3d.
>
> GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn
> which now returns the FPSCR fields in a double. This feature is
> available on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro
> is defined along with __builtin_set_fpscr_rn enabled.
> GCC commit ef3bbc69d15707e4db6e2f198c621effb636cc26 adds
> this feature.
>
> Changes are done to use __builtin_set_fpscr_rn instead of mffscrn
> or mffscrni in __fe_mffscrn(rn).
>
> Suggested-by: Carl Love <cel@us.ibm.com>
> ---
> sysdeps/powerpc/fpu/fenv_libc.h | 23 ++++++++++++++++++++---
> 1 file changed, 20 insertions(+), 3 deletions(-)
>
> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
> index fa5e1c697e..55484eb229 100644
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -84,8 +84,15 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
> __fr.fenv; \
> })
>
> +/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
> + now returns the FPSCR fields in a double. This support is available
> + on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined.
> + To retain backward compatibility with older GCC, we still retain the
> + old inline assembly implementation. */
> +#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
> +#define fegetenv_and_set_rn(rn) __builtin_set_fpscr_rn (rn)
> +#elif defined _ARCH_PWR9
> /* Like fegetenv_control, but also sets the rounding mode. */
> -#ifdef _ARCH_PWR9
> #define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
> #else
> /* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
I think the macro would be better defined as:
#ifdef __SET_FPSCR_RN_RETURNS_FPSCR__
# define __fe_mffscrn(rn) __builtin_set_fpscr_rn (rn)
#else
# define __fe_mffscrn(rn) [...]
#endif
Then there is no need to redefine fegetenv_and_set_rn nor __fesetround_inline.
> @@ -148,7 +155,12 @@ typedef union
> static inline int
> __fesetround_inline (int round)
> {
> -#ifdef _ARCH_PWR9
> +/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
> + now returns the FPSCR fields in a double. This support is available
> + on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. */
> +#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
> + __builtin_set_fpscr_rn (round);
> +#elif defined _ARCH_PWR9
> __fe_mffscrn (round);
> #else
> if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
> @@ -178,7 +190,12 @@ __fesetround_inline (int round)
> static inline void
> __fesetround_inline_nocheck (const int round)
> {
> -#ifdef _ARCH_PWR9
> +/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
> + now returns the FPSCR fields in a double. This support is available
> + on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. */
> +#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
> + __builtin_set_fpscr_rn (round);
> +#elif defined _ARCH_PWR9
> __fe_mffscrn (round);
> #else
> if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
On 12/09/23 6:57 pm, Adhemerval Zanella Netto wrote:
>
> On 12/09/23 05:27, Manjunath Matti wrote:
>> On powerpc, SET_RESTORE_ROUND uses inline assembly to optimize the
>> prologue get/save/set rounding mode operations for POWER9 and
>> later by using 'mffscrn' where possible, this was introduced by
>> commit f1c56cdff09f650ad721fae026eb6a3651631f3d.
>>
>> GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn
>> which now returns the FPSCR fields in a double. This feature is
>> available on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro
>> is defined along with __builtin_set_fpscr_rn enabled.
>> GCC commit ef3bbc69d15707e4db6e2f198c621effb636cc26 adds
>> this feature.
>>
>> Changes are done to use __builtin_set_fpscr_rn instead of mffscrn
>> or mffscrni in __fe_mffscrn(rn).
>>
>> Suggested-by: Carl Love <cel@us.ibm.com>
>> ---
>> sysdeps/powerpc/fpu/fenv_libc.h | 23 ++++++++++++++++++++---
>> 1 file changed, 20 insertions(+), 3 deletions(-)
>>
>> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
>> index fa5e1c697e..55484eb229 100644
>> --- a/sysdeps/powerpc/fpu/fenv_libc.h
>> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
>> @@ -84,8 +84,15 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>> __fr.fenv; \
>> })
>>
>> +/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
>> + now returns the FPSCR fields in a double. This support is available
>> + on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined.
>> + To retain backward compatibility with older GCC, we still retain the
>> + old inline assembly implementation. */
>> +#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
>> +#define fegetenv_and_set_rn(rn) __builtin_set_fpscr_rn (rn)
>> +#elif defined _ARCH_PWR9
>> /* Like fegetenv_control, but also sets the rounding mode. */
>> -#ifdef _ARCH_PWR9
>> #define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
>> #else
>> /* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
> I think the macro would be better defined as:
>
> #ifdef __SET_FPSCR_RN_RETURNS_FPSCR__
> # define __fe_mffscrn(rn) __builtin_set_fpscr_rn (rn)
> #else
> # define __fe_mffscrn(rn) [...]
> #endif
>
> Then there is no need to redefine fegetenv_and_set_rn nor __fesetround_inline.
So this is what you are asking me to do right ?
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -89,22 +89,11 @@ extern const fenv_t *__fe_mask_env (void)
attribute_hidden;
on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined.
To retain backward compatibility with older GCC, we still retain the
old inline assembly implementation. */
-#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
+#ifdef __SET_FPSCR_RN_RETURNS_FPSCR__
#define fegetenv_and_set_rn(rn) __builtin_set_fpscr_rn (rn)
-#elif defined _ARCH_PWR9
+#else
/* Like fegetenv_control, but also sets the rounding mode. */
#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
-#else
-/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
- but not sufficient, because it does not set the rounding mode.
- Explicitly set the rounding mode when 'mffscrn' actually doesn't. */
-#define fegetenv_and_set_rn(rn) \
- ({register fenv_union_t
__fr; \
- __fr.fenv = __fe_mffscrn (rn); \
- if (__glibc_unlikely (!(GLRO(dl_hwcap2) &
PPC_FEATURE2_ARCH_3_00))) \
- __fesetround_inline
(rn); \
- __fr.fenv; \
- })
#endif
I was under the impression that the redefine of fegetenv_and_set_rn and
__fesetround_inline
was needed for architectures below POWE9, i.e POWER8, 7, etc.
Please correct me if I am wrong.
>> @@ -148,7 +155,12 @@ typedef union
>> static inline int
>> __fesetround_inline (int round)
>> {
>> -#ifdef _ARCH_PWR9
>> +/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
>> + now returns the FPSCR fields in a double. This support is available
>> + on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. */
>> +#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
>> + __builtin_set_fpscr_rn (round);
>> +#elif defined _ARCH_PWR9
>> __fe_mffscrn (round);
>> #else
>> if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
>> @@ -178,7 +190,12 @@ __fesetround_inline (int round)
>> static inline void
>> __fesetround_inline_nocheck (const int round)
>> {
>> -#ifdef _ARCH_PWR9
>> +/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
>> + now returns the FPSCR fields in a double. This support is available
>> + on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. */
>> +#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
>> + __builtin_set_fpscr_rn (round);
>> +#elif defined _ARCH_PWR9
>> __fe_mffscrn (round);
>> #else
>> if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
So the rest it OK ? I will update the patch with suggested changes.
On 20/09/23 08:28, Manjunath S Matti wrote:
>
> On 12/09/23 6:57 pm, Adhemerval Zanella Netto wrote:
>>
>> On 12/09/23 05:27, Manjunath Matti wrote:
>>> On powerpc, SET_RESTORE_ROUND uses inline assembly to optimize the
>>> prologue get/save/set rounding mode operations for POWER9 and
>>> later by using 'mffscrn' where possible, this was introduced by
>>> commit f1c56cdff09f650ad721fae026eb6a3651631f3d.
>>>
>>> GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn
>>> which now returns the FPSCR fields in a double. This feature is
>>> available on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro
>>> is defined along with __builtin_set_fpscr_rn enabled.
>>> GCC commit ef3bbc69d15707e4db6e2f198c621effb636cc26 adds
>>> this feature.
>>>
>>> Changes are done to use __builtin_set_fpscr_rn instead of mffscrn
>>> or mffscrni in __fe_mffscrn(rn).
>>>
>>> Suggested-by: Carl Love <cel@us.ibm.com>
>>> ---
>>> sysdeps/powerpc/fpu/fenv_libc.h | 23 ++++++++++++++++++++---
>>> 1 file changed, 20 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
>>> index fa5e1c697e..55484eb229 100644
>>> --- a/sysdeps/powerpc/fpu/fenv_libc.h
>>> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
>>> @@ -84,8 +84,15 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
>>> __fr.fenv; \
>>> })
>>> +/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
>>> + now returns the FPSCR fields in a double. This support is available
>>> + on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined.
>>> + To retain backward compatibility with older GCC, we still retain the
>>> + old inline assembly implementation. */
>>> +#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
>>> +#define fegetenv_and_set_rn(rn) __builtin_set_fpscr_rn (rn)
>>> +#elif defined _ARCH_PWR9
>>> /* Like fegetenv_control, but also sets the rounding mode. */
>>> -#ifdef _ARCH_PWR9
>>> #define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
>>> #else
>>> /* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
>> I think the macro would be better defined as:
>>
>> #ifdef __SET_FPSCR_RN_RETURNS_FPSCR__
>> # define __fe_mffscrn(rn) __builtin_set_fpscr_rn (rn)
>> #else
>> # define __fe_mffscrn(rn) [...]
>> #endif
>>
>> Then there is no need to redefine fegetenv_and_set_rn nor __fesetround_inline.
>
> So this is what you are asking me to do right ?
>
> --- a/sysdeps/powerpc/fpu/fenv_libc.h
> +++ b/sysdeps/powerpc/fpu/fenv_libc.h
> @@ -89,22 +89,11 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
> on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined.
> To retain backward compatibility with older GCC, we still retain the
> old inline assembly implementation. */
> -#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
> +#ifdef __SET_FPSCR_RN_RETURNS_FPSCR__
> #define fegetenv_and_set_rn(rn) __builtin_set_fpscr_rn (rn)
> -#elif defined _ARCH_PWR9
> +#else
> /* Like fegetenv_control, but also sets the rounding mode. */
> #define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
> -#else
> -/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
> - but not sufficient, because it does not set the rounding mode.
> - Explicitly set the rounding mode when 'mffscrn' actually doesn't. */
> -#define fegetenv_and_set_rn(rn) \
> - ({register fenv_union_t __fr; \
> - __fr.fenv = __fe_mffscrn (rn); \
> - if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))) \
> - __fesetround_inline (rn); \
> - __fr.fenv; \
> - })
> #endif
>
>
> I was under the impression that the redefine of fegetenv_and_set_rn and __fesetround_inline
>
> was needed for architectures below POWE9, i.e POWER8, 7, etc.
>
> Please correct me if I am wrong.
Not really, my understanding is it only requires:
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
index fa5e1c697e..8cdce1d6e7 100644
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -68,7 +68,12 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
__fr; \
})
-#define __fe_mffscrn(rn) \
+/* Starting with GCC 14 __builtin_set_fpscr_rn can be used to return the
+ FPSCR fields as a double. */
+#ifdef __SET_FPSCR_RN_RETURNS_FPSCR__
+# define __fe_mffscrn(rn) __builtin_set_fpscr_rn (rn)
+#else
+# define __fe_mffscrn(rn) \
({register fenv_union_t __fr; \
if (__builtin_constant_p (rn)) \
__asm__ __volatile__ ( \
@@ -83,6 +88,7 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
} \
__fr.fenv; \
})
+#endif
/* Like fegetenv_control, but also sets the rounding mode. */
#ifdef _ARCH_PWR9
Since fegetenv_and_set_rn will call the __fe_mffscrn macro, and this will
be used by libc_feresetround_ppc macros.
>
>>> @@ -148,7 +155,12 @@ typedef union
>>> static inline int
>>> __fesetround_inline (int round)
>>> {
>>> -#ifdef _ARCH_PWR9
>>> +/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
>>> + now returns the FPSCR fields in a double. This support is available
>>> + on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. */
>>> +#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
>>> + __builtin_set_fpscr_rn (round);
>>> +#elif defined _ARCH_PWR9
>>> __fe_mffscrn (round);
>>> #else
>>> if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
>>> @@ -178,7 +190,12 @@ __fesetround_inline (int round)
>>> static inline void
>>> __fesetround_inline_nocheck (const int round)
>>> {
>>> -#ifdef _ARCH_PWR9
>>> +/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
>>> + now returns the FPSCR fields in a double. This support is available
>>> + on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. */
>>> +#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
>>> + __builtin_set_fpscr_rn (round);
>>> +#elif defined _ARCH_PWR9
>>> __fe_mffscrn (round);
>>> #else
>>> if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
>
>
> So the rest it OK ? I will update the patch with suggested changes.
>
On 9/20/23 6:28 AM, Manjunath S Matti wrote:
>>> GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn
>>> which now returns the FPSCR fields in a double. This feature is
>>> available on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro
>>> is defined along with __builtin_set_fpscr_rn enabled.
>>> GCC commit ef3bbc69d15707e4db6e2f198c621effb636cc26 adds
>>> this feature.
[snip]
> I was under the impression that the redefine of fegetenv_and_set_rn and __fesetround_inline
>
> was needed for architectures below POWE9, i.e POWER8, 7, etc.
>
> Please correct me if I am wrong.
The __builtin_set_fpscr_rn(rn) built-in was changed so that it now
always returns the FPSCR as a return value. It is cpu agnostic, so
that means it works on all Power cpus, including Power8 and earlier.
The only difference you'll see on Power9/Power10 versus the earlier cpus,
is that we'll use the new to Power9 mffscrn and mffscrni instructions
which automatically return the updated FPSCR value. On the older cpus,
we'll emit some extra code to copy the FPSCR value as the return value.
The optimizer will eliminate that if you don't want/need the return value.
The __SET_FPSCR_RN_RETURNS_FPSCR__ macro allows you to tell whether you're
compiling the old built-in with a void return value, or the new one which
has a double return value. It doesn't care which cpu you use it on.
So short answer is, Adhemerval's suggested change looks correct to me.
Peter
@@ -84,8 +84,15 @@ extern const fenv_t *__fe_mask_env (void) attribute_hidden;
__fr.fenv; \
})
+/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
+ now returns the FPSCR fields in a double. This support is available
+ on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined.
+ To retain backward compatibility with older GCC, we still retain the
+ old inline assembly implementation. */
+#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
+#define fegetenv_and_set_rn(rn) __builtin_set_fpscr_rn (rn)
+#elif defined _ARCH_PWR9
/* Like fegetenv_control, but also sets the rounding mode. */
-#ifdef _ARCH_PWR9
#define fegetenv_and_set_rn(rn) __fe_mffscrn (rn)
#else
/* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary
@@ -148,7 +155,12 @@ typedef union
static inline int
__fesetround_inline (int round)
{
-#ifdef _ARCH_PWR9
+/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
+ now returns the FPSCR fields in a double. This support is available
+ on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. */
+#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
+ __builtin_set_fpscr_rn (round);
+#elif defined _ARCH_PWR9
__fe_mffscrn (round);
#else
if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))
@@ -178,7 +190,12 @@ __fesetround_inline (int round)
static inline void
__fesetround_inline_nocheck (const int round)
{
-#ifdef _ARCH_PWR9
+/* GCC version 14 onwards supports builtins as __builtin_set_fpscr_rn and
+ now returns the FPSCR fields in a double. This support is available
+ on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. */
+#if defined _ARCH_PWR9 && defined __SET_FPSCR_RN_RETURNS_FPSCR__
+ __builtin_set_fpscr_rn (round);
+#elif defined _ARCH_PWR9
__fe_mffscrn (round);
#else
if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))