x86-64: Don't use SSE resolvers for ISA level 3 or above
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
fail
|
Patch failed to apply
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
fail
|
Patch failed to apply
|
Commit Message
When glibc is built with ISA level 3 or above enabled, SSE resolvers
aren't available and glibc fails to build:
ld: .../elf/librtld.os: in function `init_cpu_features':
.../elf/../sysdeps/x86/cpu-features.c:1200:(.text+0x1445f): undefined reference to `_dl_runtime_resolve_fxsave'
ld: .../elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object
/usr/local/bin/ld: final link failed: bad value
For ISA level 3 or above, don't use _dl_runtime_resolve_fxsave nor
_dl_tlsdesc_dynamic_fxsave and also exclude _dl_tlsdesc_dynamic_fxsave.
This fixes BZ #31429.
---
sysdeps/x86/cpu-features.c | 17 +++++++++++------
sysdeps/x86_64/dl-tlsdesc.S | 15 +++++++++------
2 files changed, 20 insertions(+), 12 deletions(-)
Comments
On Wed, Feb 28, 2024 at 11:51 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> When glibc is built with ISA level 3 or above enabled, SSE resolvers
> aren't available and glibc fails to build:
>
> ld: .../elf/librtld.os: in function `init_cpu_features':
> .../elf/../sysdeps/x86/cpu-features.c:1200:(.text+0x1445f): undefined reference to `_dl_runtime_resolve_fxsave'
> ld: .../elf/librtld.os: relocation R_X86_64_PC32 against undefined hidden symbol `_dl_runtime_resolve_fxsave' can not be used when making a shared object
> /usr/local/bin/ld: final link failed: bad value
>
> For ISA level 3 or above, don't use _dl_runtime_resolve_fxsave nor
> _dl_tlsdesc_dynamic_fxsave and also exclude _dl_tlsdesc_dynamic_fxsave.
>
> This fixes BZ #31429.
> ---
> sysdeps/x86/cpu-features.c | 17 +++++++++++------
> sysdeps/x86_64/dl-tlsdesc.S | 15 +++++++++------
> 2 files changed, 20 insertions(+), 12 deletions(-)
>
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index d71e8d3d2e..e7c7ece462 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -18,6 +18,7 @@
>
> #include <dl-hwcap.h>
> #include <libc-pointer-arith.h>
> +#include <isa-level.h>
> #include <get-isa-level.h>
> #include <cacheinfo.h>
> #include <dl-cacheinfo.h>
> @@ -1195,7 +1196,9 @@ no_cpuid:
> TUNABLE_CALLBACK (set_x86_shstk));
> #endif
>
> +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> +#endif
> {
> if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
> {
> @@ -1216,22 +1219,24 @@ no_cpuid:
> #endif
> }
> }
> +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> else
> {
> -#ifdef __x86_64__
> +# ifdef __x86_64__
> GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
> -# ifdef SHARED
> +# ifdef SHARED
> GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
> -# endif
> -#else
> -# ifdef SHARED
> +# endif
> +# else
> +# ifdef SHARED
> if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
> GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
> else
> GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
> +# endif
> # endif
> -#endif
> }
> +#endif
>
> #ifdef SHARED
> # ifdef __x86_64__
> diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
> index ea69f5223a..057a10862a 100644
> --- a/sysdeps/x86_64/dl-tlsdesc.S
> +++ b/sysdeps/x86_64/dl-tlsdesc.S
> @@ -20,6 +20,7 @@
> #include <tls.h>
> #include <cpu-features-offsets.h>
> #include <features-offsets.h>
> +#include <isa-level.h>
> #include "tlsdesc.h"
> #include "dl-trampoline-save.h"
>
> @@ -79,12 +80,14 @@ _dl_tlsdesc_undefweak:
> .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
>
> #ifdef SHARED
> -# define USE_FXSAVE
> -# define STATE_SAVE_ALIGNMENT 16
> -# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
> -# include "dl-tlsdesc-dynamic.h"
> -# undef _dl_tlsdesc_dynamic
> -# undef USE_FXSAVE
> +# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> +# define USE_FXSAVE
> +# define STATE_SAVE_ALIGNMENT 16
> +# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
> +# include "dl-tlsdesc-dynamic.h"
> +# undef _dl_tlsdesc_dynamic
> +# undef USE_FXSAVE
> +# endif
>
> # define USE_XSAVE
> # define STATE_SAVE_ALIGNMENT 64
> --
> 2.43.2
>
LGTM.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
On 2/28/24 07:51, H.J. Lu wrote:
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -18,6 +18,7 @@
>
> #include <dl-hwcap.h>
> #include <libc-pointer-arith.h>
> +#include <isa-level.h>
> #include <get-isa-level.h>
> #include <cacheinfo.h>
> #include <dl-cacheinfo.h>
> @@ -1195,7 +1196,9 @@ no_cpuid:
> TUNABLE_CALLBACK (set_x86_shstk));
> #endif
>
> +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> +#endif
> {
> if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
> {
Surely this can be written with IF and not IFDEF:
if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
|| GLRO(dl_x86_cpu_features).xsave_state_size != 0)
r~
On Thu, Feb 29, 2024 at 12:55 PM Richard Henderson <
richard.henderson@linaro.org> wrote:
> On 2/28/24 07:51, H.J. Lu wrote:
> > --- a/sysdeps/x86/cpu-features.c
> > +++ b/sysdeps/x86/cpu-features.c
> > @@ -18,6 +18,7 @@
> >
> > #include <dl-hwcap.h>
> > #include <libc-pointer-arith.h>
> > +#include <isa-level.h>
> > #include <get-isa-level.h>
> > #include <cacheinfo.h>
> > #include <dl-cacheinfo.h>
> > @@ -1195,7 +1196,9 @@ no_cpuid:
> > TUNABLE_CALLBACK (set_x86_shstk));
> > #endif
> >
> > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> > +#endif
> > {
> > if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
> > {
>
> Surely this can be written with IF and not IFDEF:
>
> if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
> || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
>
For sure it can be written like IF, but using IFDEF has the following perf
and code size advantage.
For MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL case
it will remove conditional
if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
and else block code of this conditional during preprocessing.
--Sunil
On 2/29/24 16:40, Sunil Pandey wrote:
>
>
> On Thu, Feb 29, 2024 at 12:55 PM Richard Henderson <richard.henderson@linaro.org
> <mailto:richard.henderson@linaro.org>> wrote:
>
> On 2/28/24 07:51, H.J. Lu wrote:
> > --- a/sysdeps/x86/cpu-features.c
> > +++ b/sysdeps/x86/cpu-features.c
> > @@ -18,6 +18,7 @@
> >
> > #include <dl-hwcap.h>
> > #include <libc-pointer-arith.h>
> > +#include <isa-level.h>
> > #include <get-isa-level.h>
> > #include <cacheinfo.h>
> > #include <dl-cacheinfo.h>
> > @@ -1195,7 +1196,9 @@ no_cpuid:
> > TUNABLE_CALLBACK (set_x86_shstk));
> > #endif
> >
> > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> > +#endif
> > {
> > if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
> > {
>
> Surely this can be written with IF and not IFDEF:
>
> if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
> || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
>
>
> For sure it can be written like IF, but using IFDEF has the following perf and code size
> advantage.
>
> For MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL case
>
> it will remove conditional
>
> if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
>
> and else block code of this conditional during preprocessing.
You miss the point -- both of these are constants and will be folded by the compiler.
There is no perf or code size advantage for cpp.
r~
On Fri, Mar 1, 2024 at 2:23 PM Richard Henderson <
richard.henderson@linaro.org> wrote:
> On 2/29/24 16:40, Sunil Pandey wrote:
> >
> >
> > On Thu, Feb 29, 2024 at 12:55 PM Richard Henderson <
> richard.henderson@linaro.org
> > <mailto:richard.henderson@linaro.org>> wrote:
> >
> > On 2/28/24 07:51, H.J. Lu wrote:
> > > --- a/sysdeps/x86/cpu-features.c
> > > +++ b/sysdeps/x86/cpu-features.c
> > > @@ -18,6 +18,7 @@
> > >
> > > #include <dl-hwcap.h>
> > > #include <libc-pointer-arith.h>
> > > +#include <isa-level.h>
> > > #include <get-isa-level.h>
> > > #include <cacheinfo.h>
> > > #include <dl-cacheinfo.h>
> > > @@ -1195,7 +1196,9 @@ no_cpuid:
> > > TUNABLE_CALLBACK (set_x86_shstk));
> > > #endif
> > >
> > > +#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
> > > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> > > +#endif
> > > {
> > > if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
> > > {
> >
> > Surely this can be written with IF and not IFDEF:
> >
> > if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
> > || GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> >
> >
> > For sure it can be written like IF, but using IFDEF has the following
> perf and code size
> > advantage.
> >
> > For MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL case
> >
> > it will remove conditional
> >
> > if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
> >
> > and else block code of this conditional during preprocessing.
>
> You miss the point -- both of these are constants and will be folded by
> the compiler.
> There is no perf or code size advantage for cpp.
>
>
Got it. Will submit a patch to fix it.
--Sunil
@@ -18,6 +18,7 @@
#include <dl-hwcap.h>
#include <libc-pointer-arith.h>
+#include <isa-level.h>
#include <get-isa-level.h>
#include <cacheinfo.h>
#include <dl-cacheinfo.h>
@@ -1195,7 +1196,9 @@ no_cpuid:
TUNABLE_CALLBACK (set_x86_shstk));
#endif
+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
+#endif
{
if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC))
{
@@ -1216,22 +1219,24 @@ no_cpuid:
#endif
}
}
+#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
else
{
-#ifdef __x86_64__
+# ifdef __x86_64__
GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave;
-# ifdef SHARED
+# ifdef SHARED
GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
-# endif
-#else
-# ifdef SHARED
+# endif
+# else
+# ifdef SHARED
if (CPU_FEATURE_USABLE_P (cpu_features, FXSR))
GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave;
else
GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave;
+# endif
# endif
-#endif
}
+#endif
#ifdef SHARED
# ifdef __x86_64__
@@ -20,6 +20,7 @@
#include <tls.h>
#include <cpu-features-offsets.h>
#include <features-offsets.h>
+#include <isa-level.h>
#include "tlsdesc.h"
#include "dl-trampoline-save.h"
@@ -79,12 +80,14 @@ _dl_tlsdesc_undefweak:
.size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
#ifdef SHARED
-# define USE_FXSAVE
-# define STATE_SAVE_ALIGNMENT 16
-# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
-# include "dl-tlsdesc-dynamic.h"
-# undef _dl_tlsdesc_dynamic
-# undef USE_FXSAVE
+# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
+# define USE_FXSAVE
+# define STATE_SAVE_ALIGNMENT 16
+# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave
+# include "dl-tlsdesc-dynamic.h"
+# undef _dl_tlsdesc_dynamic
+# undef USE_FXSAVE
+# endif
# define USE_XSAVE
# define STATE_SAVE_ALIGNMENT 64