[v8,1/2] x86-64: Save APX registers in ld.so trampoline
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
Commit Message
Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
ld.so trampoline. This fixes BZ #31371.
Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
be used by i386 _dl_tlsdesc_dynamic.
---
sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
1 file changed, 46 insertions(+), 6 deletions(-)
Comments
On Fri, Feb 16, 2024 at 9:17 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
> ld.so trampoline. This fixes BZ #31371.
>
> Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
> be used by i386 _dl_tlsdesc_dynamic.
> ---
> sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 46 insertions(+), 6 deletions(-)
>
> diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> index 85d0a8c943..837fd28734 100644
> --- a/sysdeps/x86/sysdep.h
> +++ b/sysdeps/x86/sysdep.h
> @@ -21,14 +21,54 @@
>
> #include <sysdeps/generic/sysdep.h>
>
> +/* The extended state feature IDs in the state component bitmap. */
> +#define X86_XSTATE_X87_ID 0
> +#define X86_XSTATE_SSE_ID 1
> +#define X86_XSTATE_AVX_ID 2
> +#define X86_XSTATE_BNDREGS_ID 3
> +#define X86_XSTATE_BNDCFG_ID 4
> +#define X86_XSTATE_K_ID 5
> +#define X86_XSTATE_ZMM_H_ID 6
> +#define X86_XSTATE_ZMM_ID 7
> +#define X86_XSTATE_PKRU_ID 9
> +#define X86_XSTATE_TILECFG_ID 17
> +#define X86_XSTATE_TILEDATA_ID 18
> +#define X86_XSTATE_APX_F_ID 19
> +
> +#ifdef __x86_64__
> /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
> space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
> - aligned to 16 bytes for fxsave and 64 bytes for xsave. */
> -#define STATE_SAVE_OFFSET (8 * 7 + 8)
> -
> -/* Save SSE, AVX, AVX512, mask and bound registers. */
> -#define STATE_SAVE_MASK \
> - ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
> + aligned to 16 bytes for fxsave and 64 bytes for xsave.
> +
> + NB: Is is non-zero because of the 128-byte red-zone. Some registers
> + are saved on stack without adjusting stack pointer first. When we
> + update stack pointer to allocate more space, we need to take the
> + red-zone into account. */
> +# define STATE_SAVE_OFFSET (8 * 7 + 8)
> +
> +/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
> + registers are mutually exclusive. */
> +# define STATE_SAVE_MASK \
> + ((1 << X86_XSTATE_SSE_ID) \
> + | (1 << X86_XSTATE_AVX_ID) \
> + | (1 << X86_XSTATE_BNDREGS_ID) \
> + | (1 << X86_XSTATE_K_ID) \
> + | (1 << X86_XSTATE_ZMM_H_ID) \
> + | (1 << X86_XSTATE_ZMM_ID) \
> + | (1 << X86_XSTATE_APX_F_ID))
> +#else
> +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
> + doesn't have red-zone, use 0 here. */
> +# define STATE_SAVE_OFFSET 0
> +
> +/* Save SSE, AVX, AXV512, mask and bound registers. */
> +# define STATE_SAVE_MASK \
> + ((1 << X86_XSTATE_SSE_ID) \
> + | (1 << X86_XSTATE_AVX_ID) \
> + | (1 << X86_XSTATE_BNDREGS_ID) \
> + | (1 << X86_XSTATE_K_ID) \
> + | (1 << X86_XSTATE_ZMM_H_ID))
> +#endif
>
> /* Constants for bits in __x86_string_control: */
>
> --
> 2.43.0
>
LGTM.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
On Sat, Feb 24, 2024 at 11:01 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> On Fri, Feb 16, 2024 at 9:17 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
> > ld.so trampoline. This fixes BZ #31371.
> >
> > Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
> > be used by i386 _dl_tlsdesc_dynamic.
> > ---
> > sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
> > 1 file changed, 46 insertions(+), 6 deletions(-)
> >
> > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > index 85d0a8c943..837fd28734 100644
> > --- a/sysdeps/x86/sysdep.h
> > +++ b/sysdeps/x86/sysdep.h
> > @@ -21,14 +21,54 @@
> >
> > #include <sysdeps/generic/sysdep.h>
> >
> > +/* The extended state feature IDs in the state component bitmap. */
> > +#define X86_XSTATE_X87_ID 0
> > +#define X86_XSTATE_SSE_ID 1
> > +#define X86_XSTATE_AVX_ID 2
> > +#define X86_XSTATE_BNDREGS_ID 3
> > +#define X86_XSTATE_BNDCFG_ID 4
> > +#define X86_XSTATE_K_ID 5
> > +#define X86_XSTATE_ZMM_H_ID 6
> > +#define X86_XSTATE_ZMM_ID 7
> > +#define X86_XSTATE_PKRU_ID 9
> > +#define X86_XSTATE_TILECFG_ID 17
> > +#define X86_XSTATE_TILEDATA_ID 18
> > +#define X86_XSTATE_APX_F_ID 19
> > +
> > +#ifdef __x86_64__
> > /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
> > space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
> > - aligned to 16 bytes for fxsave and 64 bytes for xsave. */
> > -#define STATE_SAVE_OFFSET (8 * 7 + 8)
> > -
> > -/* Save SSE, AVX, AVX512, mask and bound registers. */
> > -#define STATE_SAVE_MASK \
> > - ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
> > + aligned to 16 bytes for fxsave and 64 bytes for xsave.
> > +
> > + NB: Is is non-zero because of the 128-byte red-zone. Some registers
> > + are saved on stack without adjusting stack pointer first. When we
> > + update stack pointer to allocate more space, we need to take the
> > + red-zone into account. */
> > +# define STATE_SAVE_OFFSET (8 * 7 + 8)
> > +
> > +/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
> > + registers are mutually exclusive. */
> > +# define STATE_SAVE_MASK \
> > + ((1 << X86_XSTATE_SSE_ID) \
> > + | (1 << X86_XSTATE_AVX_ID) \
> > + | (1 << X86_XSTATE_BNDREGS_ID) \
> > + | (1 << X86_XSTATE_K_ID) \
> > + | (1 << X86_XSTATE_ZMM_H_ID) \
> > + | (1 << X86_XSTATE_ZMM_ID) \
> > + | (1 << X86_XSTATE_APX_F_ID))
> > +#else
> > +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
> > + doesn't have red-zone, use 0 here. */
> > +# define STATE_SAVE_OFFSET 0
> > +
> > +/* Save SSE, AVX, AXV512, mask and bound registers. */
> > +# define STATE_SAVE_MASK \
> > + ((1 << X86_XSTATE_SSE_ID) \
> > + | (1 << X86_XSTATE_AVX_ID) \
> > + | (1 << X86_XSTATE_BNDREGS_ID) \
> > + | (1 << X86_XSTATE_K_ID) \
> > + | (1 << X86_XSTATE_ZMM_H_ID))
> > +#endif
> >
> > /* Constants for bits in __x86_string_control: */
> >
> > --
> > 2.43.0
> >
>
>
> LGTM.
> Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
OK to backport it to release branches?
Thanks.
On Fri, Mar 8, 2024 at 2:09 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Sat, Feb 24, 2024 at 11:01 AM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > On Fri, Feb 16, 2024 at 9:17 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Add APX registers to STATE_SAVE_MASK so that APX registers are saved in
> > > ld.so trampoline. This fixes BZ #31371.
> > >
> > > Also update STATE_SAVE_OFFSET and STATE_SAVE_MASK for i386 which will
> > > be used by i386 _dl_tlsdesc_dynamic.
> > > ---
> > > sysdeps/x86/sysdep.h | 52 +++++++++++++++++++++++++++++++++++++++-----
> > > 1 file changed, 46 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> > > index 85d0a8c943..837fd28734 100644
> > > --- a/sysdeps/x86/sysdep.h
> > > +++ b/sysdeps/x86/sysdep.h
> > > @@ -21,14 +21,54 @@
> > >
> > > #include <sysdeps/generic/sysdep.h>
> > >
> > > +/* The extended state feature IDs in the state component bitmap. */
> > > +#define X86_XSTATE_X87_ID 0
> > > +#define X86_XSTATE_SSE_ID 1
> > > +#define X86_XSTATE_AVX_ID 2
> > > +#define X86_XSTATE_BNDREGS_ID 3
> > > +#define X86_XSTATE_BNDCFG_ID 4
> > > +#define X86_XSTATE_K_ID 5
> > > +#define X86_XSTATE_ZMM_H_ID 6
> > > +#define X86_XSTATE_ZMM_ID 7
> > > +#define X86_XSTATE_PKRU_ID 9
> > > +#define X86_XSTATE_TILECFG_ID 17
> > > +#define X86_XSTATE_TILEDATA_ID 18
> > > +#define X86_XSTATE_APX_F_ID 19
> > > +
> > > +#ifdef __x86_64__
> > > /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
> > > space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
> > > - aligned to 16 bytes for fxsave and 64 bytes for xsave. */
> > > -#define STATE_SAVE_OFFSET (8 * 7 + 8)
> > > -
> > > -/* Save SSE, AVX, AVX512, mask and bound registers. */
> > > -#define STATE_SAVE_MASK \
> > > - ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
> > > + aligned to 16 bytes for fxsave and 64 bytes for xsave.
> > > +
> > > + NB: Is is non-zero because of the 128-byte red-zone. Some registers
> > > + are saved on stack without adjusting stack pointer first. When we
> > > + update stack pointer to allocate more space, we need to take the
> > > + red-zone into account. */
> > > +# define STATE_SAVE_OFFSET (8 * 7 + 8)
> > > +
> > > +/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
> > > + registers are mutually exclusive. */
> > > +# define STATE_SAVE_MASK \
> > > + ((1 << X86_XSTATE_SSE_ID) \
> > > + | (1 << X86_XSTATE_AVX_ID) \
> > > + | (1 << X86_XSTATE_BNDREGS_ID) \
> > > + | (1 << X86_XSTATE_K_ID) \
> > > + | (1 << X86_XSTATE_ZMM_H_ID) \
> > > + | (1 << X86_XSTATE_ZMM_ID) \
> > > + | (1 << X86_XSTATE_APX_F_ID))
> > > +#else
> > > +/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
> > > + doesn't have red-zone, use 0 here. */
> > > +# define STATE_SAVE_OFFSET 0
> > > +
> > > +/* Save SSE, AVX, AXV512, mask and bound registers. */
> > > +# define STATE_SAVE_MASK \
> > > + ((1 << X86_XSTATE_SSE_ID) \
> > > + | (1 << X86_XSTATE_AVX_ID) \
> > > + | (1 << X86_XSTATE_BNDREGS_ID) \
> > > + | (1 << X86_XSTATE_K_ID) \
> > > + | (1 << X86_XSTATE_ZMM_H_ID))
> > > +#endif
> > >
> > > /* Constants for bits in __x86_string_control: */
> > >
> > > --
> > > 2.43.0
> > >
> >
> >
> > LGTM.
> > Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
>
> OK to backport it to release branches?
>
> Thanks.
Okay with it, but imo this type of system level change should have
a bit longer of being tested in the realworld before backport.
Maybe give it another week first?>
> --
> H.J.
@@ -21,14 +21,54 @@
#include <sysdeps/generic/sysdep.h>
+/* The extended state feature IDs in the state component bitmap. */
+#define X86_XSTATE_X87_ID 0
+#define X86_XSTATE_SSE_ID 1
+#define X86_XSTATE_AVX_ID 2
+#define X86_XSTATE_BNDREGS_ID 3
+#define X86_XSTATE_BNDCFG_ID 4
+#define X86_XSTATE_K_ID 5
+#define X86_XSTATE_ZMM_H_ID 6
+#define X86_XSTATE_ZMM_ID 7
+#define X86_XSTATE_PKRU_ID 9
+#define X86_XSTATE_TILECFG_ID 17
+#define X86_XSTATE_TILEDATA_ID 18
+#define X86_XSTATE_APX_F_ID 19
+
+#ifdef __x86_64__
/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
- aligned to 16 bytes for fxsave and 64 bytes for xsave. */
-#define STATE_SAVE_OFFSET (8 * 7 + 8)
-
-/* Save SSE, AVX, AVX512, mask and bound registers. */
-#define STATE_SAVE_MASK \
- ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7))
+ aligned to 16 bytes for fxsave and 64 bytes for xsave.
+
+ NB: Is is non-zero because of the 128-byte red-zone. Some registers
+ are saved on stack without adjusting stack pointer first. When we
+ update stack pointer to allocate more space, we need to take the
+ red-zone into account. */
+# define STATE_SAVE_OFFSET (8 * 7 + 8)
+
+/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
+ registers are mutually exclusive. */
+# define STATE_SAVE_MASK \
+ ((1 << X86_XSTATE_SSE_ID) \
+ | (1 << X86_XSTATE_AVX_ID) \
+ | (1 << X86_XSTATE_BNDREGS_ID) \
+ | (1 << X86_XSTATE_K_ID) \
+ | (1 << X86_XSTATE_ZMM_H_ID) \
+ | (1 << X86_XSTATE_ZMM_ID) \
+ | (1 << X86_XSTATE_APX_F_ID))
+#else
+/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
+ doesn't have red-zone, use 0 here. */
+# define STATE_SAVE_OFFSET 0
+
+/* Save SSE, AVX, AXV512, mask and bound registers. */
+# define STATE_SAVE_MASK \
+ ((1 << X86_XSTATE_SSE_ID) \
+ | (1 << X86_XSTATE_AVX_ID) \
+ | (1 << X86_XSTATE_BNDREGS_ID) \
+ | (1 << X86_XSTATE_K_ID) \
+ | (1 << X86_XSTATE_ZMM_H_ID))
+#endif
/* Constants for bits in __x86_string_control: */