x86-64: Update _dl_tlsdesc_dynamic to preserve AMX registers
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
Commit Message
_dl_tlsdesc_dynamic should also preserve AMX registers which are
caller-saved. Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID
to x86-64 TLSDESC_CALL_STATE_SAVE_MASK. Compute the AMX state size
and save it in xsave_state_full_size which is only used by
_dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec. This fixes
the AMX part of BZ #31372. Tested on AMX processor.
AMX test is enabled only for compilers with the fix for
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098
GCC 14 and GCC 11/12/13 branches have the bug fix.
---
sysdeps/unix/sysv/linux/x86_64/Makefile | 27 ++++++
.../sysv/linux/x86_64/include/asm/prctl.h | 5 ++
.../linux/x86_64/tst-gnu2-tls2-amx-mod0.c | 2 +
.../linux/x86_64/tst-gnu2-tls2-amx-mod1.c | 2 +
.../linux/x86_64/tst-gnu2-tls2-amx-mod2.c | 2 +
.../sysv/linux/x86_64/tst-gnu2-tls2-amx.c | 86 +++++++++++++++++++
.../sysv/linux/x86_64/tst-gnu2-tls2-amx.h | 63 ++++++++++++++
sysdeps/x86/cpu-features-offsets.sym | 1 +
sysdeps/x86/cpu-features.c | 55 +++++++++++-
sysdeps/x86/include/cpu-features.h | 2 +
sysdeps/x86/sysdep.h | 18 +++-
sysdeps/x86_64/configure | 28 ++++++
sysdeps/x86_64/configure.ac | 15 ++++
sysdeps/x86_64/dl-tlsdesc-dynamic.h | 2 +-
14 files changed, 302 insertions(+), 6 deletions(-)
create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
Comments
On Wed, Feb 28, 2024 at 9:50 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> _dl_tlsdesc_dynamic should also preserve AMX registers which are
> caller-saved. Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID
> to x86-64 TLSDESC_CALL_STATE_SAVE_MASK. Compute the AMX state size
> and save it in xsave_state_full_size which is only used by
> _dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec. This fixes
> the AMX part of BZ #31372. Tested on AMX processor.
>
> AMX test is enabled only for compilers with the fix for
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098
>
> GCC 14 and GCC 11/12/13 branches have the bug fix.
> ---
> sysdeps/unix/sysv/linux/x86_64/Makefile | 27 ++++++
> .../sysv/linux/x86_64/include/asm/prctl.h | 5 ++
> .../linux/x86_64/tst-gnu2-tls2-amx-mod0.c | 2 +
> .../linux/x86_64/tst-gnu2-tls2-amx-mod1.c | 2 +
> .../linux/x86_64/tst-gnu2-tls2-amx-mod2.c | 2 +
> .../sysv/linux/x86_64/tst-gnu2-tls2-amx.c | 86 +++++++++++++++++++
> .../sysv/linux/x86_64/tst-gnu2-tls2-amx.h | 63 ++++++++++++++
> sysdeps/x86/cpu-features-offsets.sym | 1 +
> sysdeps/x86/cpu-features.c | 55 +++++++++++-
> sysdeps/x86/include/cpu-features.h | 2 +
> sysdeps/x86/sysdep.h | 18 +++-
> sysdeps/x86_64/configure | 28 ++++++
> sysdeps/x86_64/configure.ac | 15 ++++
> sysdeps/x86_64/dl-tlsdesc-dynamic.h | 2 +-
> 14 files changed, 302 insertions(+), 6 deletions(-)
> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile
> b/sysdeps/unix/sysv/linux/x86_64/Makefile
> index 7d1d205fa0..fcbffd81cb 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/Makefile
> +++ b/sysdeps/unix/sysv/linux/x86_64/Makefile
> @@ -66,6 +66,33 @@ $(objpfx)libx86-64-isa-level%.os:
> $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is
> $(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so
> cp $< $@
> endif
> +
> +ifeq (yes,$(have-mamx-tile))
> +tests += \
> + tst-gnu2-tls2-amx \
> +# tests
> +
> +modules-names += \
> + tst-gnu2-tls2-amx-mod0 \
> + tst-gnu2-tls2-amx-mod1 \
> + tst-gnu2-tls2-amx-mod2 \
> +# modules-names
> +
> +$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library)
> +$(objpfx)tst-gnu2-tls2-amx.out: \
> + $(objpfx)tst-gnu2-tls2-amx-mod0.so \
> + $(objpfx)tst-gnu2-tls2-amx-mod1.so \
> + $(objpfx)tst-gnu2-tls2-amx-mod2.so
> +$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
> +$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
> +$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
> +
> +CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
> +CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
> +CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
> +CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
> +endif
> +
> endif # $(subdir) == elf
>
> ifneq ($(enable-cet),no)
> diff --git a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
> b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
> index 2f511321ad..ef4631bf4b 100644
> --- a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
> +++ b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
> @@ -20,3 +20,8 @@
> # define ARCH_SHSTK_SHSTK 0x1
> # define ARCH_SHSTK_WRSS 0x2
> #endif
> +
> +#ifndef ARCH_GET_XCOMP_PERM
> +# define ARCH_GET_XCOMP_PERM 0x1022
> +# define ARCH_REQ_XCOMP_PERM 0x1023
> +#endif
> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
> b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
> new file mode 100644
> index 0000000000..2e0c7b91b7
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
> @@ -0,0 +1,2 @@
> +#include "tst-gnu2-tls2-amx.h"
> +#include <tst-gnu2-tls2mod0.c>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
> b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
> new file mode 100644
> index 0000000000..b8a8ccf1c1
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
> @@ -0,0 +1,2 @@
> +#include "tst-gnu2-tls2-amx.h"
> +#include <tst-gnu2-tls2mod1.c>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
> b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
> new file mode 100644
> index 0000000000..cdf4a8f363
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
> @@ -0,0 +1,2 @@
> +#include "tst-gnu2-tls2-amx.h"
> +#include <tst-gnu2-tls2mod2.c>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
> b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
> new file mode 100644
> index 0000000000..9efa2f3270
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
> @@ -0,0 +1,86 @@
> +/* Test TLSDESC relocation with AMX.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#include <stdbool.h>
> +#include <asm/prctl.h>
> +#include <support/check.h>
> +#include "tst-gnu2-tls2-amx.h"
> +
> +extern int arch_prctl (int, ...);
> +
> +#define X86_XSTATE_TILECFG_ID 17
> +#define X86_XSTATE_TILEDATA_ID 18
> +
> +/* Initialize tile config. */
> +__attribute__ ((noinline, noclone))
> +static void
> +init_tile_config (__tilecfg *tileinfo)
> +{
> + int i;
> + tileinfo->palette_id = 1;
> + tileinfo->start_row = 0;
> +
> + for (i = 0; i < 1; ++i)
> + {
> + tileinfo->colsb[i] = MAX_ROWS;
> + tileinfo->rows[i] = MAX_ROWS;
> + }
> +
> + for (i = 1; i < 4; ++i)
> + {
> + tileinfo->colsb[i] = MAX_COLS;
> + tileinfo->rows[i] = MAX_ROWS;
> + }
>
Can it be written as
for (i = 0; i < 4; ++i)
{
...
}
If not, please add some comments on the above 2 loops.
> +
> + _tile_loadconfig (tileinfo);
> +}
> +
> +static bool
> +enable_amx (void)
> +{
> + uint64_t bitmask;
> + if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0)
> + return false;
> +
> + if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0)
> + return false;
> +
> + if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0)
> + return false;
> +
> + /* Load tile configuration. */
> + __tilecfg tile_data = { 0 };
> + init_tile_config (&tile_data);
> +
> + return true;
> +}
> +
> +/* An architecture can define it to clobber caller-saved registers in
> + malloc below to verify that the implicit TLSDESC call won't change
> + caller-saved registers. */
> +static void
> +clear_tile_register (void)
> +{
> + _tile_zero (2);
> +}
> +
> +#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so"
> +#define IS_SUPPORTED() enable_amx ()
> +#define PREPARE_MALLOC() clear_tile_register ()
> +
> +#include <elf/tst-gnu2-tls2.c>
> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
> b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
> new file mode 100644
> index 0000000000..1845a3caba
> --- /dev/null
> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
> @@ -0,0 +1,63 @@
> +/* Test TLSDESC relocation with AMX.
> + Copyright (C) 2024 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#include <stdint.h>
> +#include <string.h>
> +#include <x86intrin.h>
> +#include <support/check.h>
> +
> +#define MAX_ROWS 16
> +#define MAX_COLS 64
> +#define MAX 1024
> +#define STRIDE 64
> +
> +typedef struct __tile_config
> +{
> + uint8_t palette_id;
> + uint8_t start_row;
> + uint8_t reserved_0[14];
> + uint16_t colsb[16];
> + uint8_t rows[16];
> +} __tilecfg __attribute__ ((aligned (64)));
> +
> +/* Initialize int8_t buffer */
> +static inline void
> +init_buffer (int8_t *buf, int8_t value)
> +{
> + int rows, colsb, i, j;
> + rows = MAX_ROWS;
> + colsb = MAX_COLS;
> +
> + for (i = 0; i < rows; i++)
> + for (j = 0; j < colsb; j++)
> + buf[i * colsb + j] = value;
> +}
> +
> +#define BEFORE_TLSDESC_CALL() \
> + int8_t src[MAX]; \
> + int8_t res[MAX]; \
> + /* Initialize src with data */ \
> + init_buffer (src, 2); \
> + /* Load tile rows from memory. */ \
> + _tile_loadd (2, src, STRIDE);
> +
> +#define AFTER_TLSDESC_CALL() \
> + /* Store the tile data to memory. */ \
> + _tile_stored (2, res, STRIDE); \
> + _tile_release (); \
> + TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0);
> diff --git a/sysdeps/x86/cpu-features-offsets.sym
> b/sysdeps/x86/cpu-features-offsets.sym
> index 6a8fd29813..21fc88d651 100644
> --- a/sysdeps/x86/cpu-features-offsets.sym
> +++ b/sysdeps/x86/cpu-features-offsets.sym
> @@ -3,3 +3,4 @@
> #include <ldsodefs.h>
>
> XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features,
> xsave_state_size)
> +XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features,
> xsave_state_full_size)
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index 835113b42f..d71e8d3d2e 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -307,6 +307,8 @@ update_active (struct cpu_features *cpu_features)
> __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
> if (ebx != 0)
> {
> + /* NB: On AMX capable processors, ebx always includes AMX
> + states. */
> unsigned int xsave_state_full_size
> = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
>
> @@ -320,6 +322,11 @@ update_active (struct cpu_features *cpu_features)
> {
> unsigned int xstate_comp_offsets[32];
> unsigned int xstate_comp_sizes[32];
> +#ifdef __x86_64__
> + unsigned int xstate_amx_comp_offsets[32];
> + unsigned int xstate_amx_comp_sizes[32];
> + unsigned int amx_ecx;
> +#endif
> unsigned int i;
>
> xstate_comp_offsets[0] = 0;
> @@ -327,16 +334,39 @@ update_active (struct cpu_features *cpu_features)
> xstate_comp_offsets[2] = 576;
> xstate_comp_sizes[0] = 160;
> xstate_comp_sizes[1] = 256;
> +#ifdef __x86_64__
> + xstate_amx_comp_offsets[0] = 0;
> + xstate_amx_comp_offsets[1] = 160;
> + xstate_amx_comp_offsets[2] = 576;
> + xstate_amx_comp_sizes[0] = 160;
> + xstate_amx_comp_sizes[1] = 256;
> +#endif
>
> for (i = 2; i < 32; i++)
> {
> - if ((STATE_SAVE_MASK & (1 << i)) != 0)
> + if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
> {
> __cpuid_count (0xd, i, eax, ebx, ecx, edx);
> - xstate_comp_sizes[i] = eax;
> +#ifdef __x86_64__
> + /* Include this in xsave_state_full_size. */
> + amx_ecx = ecx;
> + xstate_amx_comp_sizes[i] = eax;
> + if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
> + {
> + /* Exclude this from xsave_state_size. */
> + ecx = 0;
> + xstate_comp_sizes[i] = 0;
> + }
> + else
> +#endif
> + xstate_comp_sizes[i] = eax;
> }
> else
> {
> +#ifdef __x86_64__
> + amx_ecx = 0;
> + xstate_amx_comp_sizes[i] = 0;
> +#endif
> ecx = 0;
> xstate_comp_sizes[i] = 0;
> }
> @@ -349,6 +379,15 @@ update_active (struct cpu_features *cpu_features)
> if ((ecx & (1 << 1)) != 0)
> xstate_comp_offsets[i]
> = ALIGN_UP (xstate_comp_offsets[i], 64);
> +#ifdef __x86_64__
> + xstate_amx_comp_offsets[i]
> + = (xstate_amx_comp_offsets[i - 1]
> + + xstate_amx_comp_sizes[i - 1]);
> + if ((amx_ecx & (1 << 1)) != 0)
> + xstate_amx_comp_offsets[i]
> + = ALIGN_UP (xstate_amx_comp_offsets[i],
> + 64);
> +#endif
> }
> }
>
> @@ -357,6 +396,18 @@ update_active (struct cpu_features *cpu_features)
> = xstate_comp_offsets[31] + xstate_comp_sizes[31];
> if (size)
> {
> +#ifdef __x86_64__
> + unsigned int amx_size
> + = (xstate_amx_comp_offsets[31]
> + + xstate_amx_comp_sizes[31]);
> + amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
> + 64);
> + /* Set xsave_state_full_size to the compact AMX
> + state size for XSAVEC. NB: xsave_state_full_size
> + is only used in _dl_tlsdesc_dynamic_xsave and
> + _dl_tlsdesc_dynamic_xsavec. */
> + cpu_features->xsave_state_full_size = amx_size;
> +#endif
> cpu_features->xsave_state_size
> = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
> CPU_FEATURE_SET (cpu_features, XSAVEC);
> diff --git a/sysdeps/x86/include/cpu-features.h
> b/sysdeps/x86/include/cpu-features.h
> index b9bf3115b6..cd7bd27cf3 100644
> --- a/sysdeps/x86/include/cpu-features.h
> +++ b/sysdeps/x86/include/cpu-features.h
> @@ -934,6 +934,8 @@ struct cpu_features
> /* The full state size for XSAVE when XSAVEC is disabled by
>
> GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
> +
> + and the AMX state size when XSAVEC is available.
> */
> unsigned int xsave_state_full_size;
> /* Data cache size for use in memory and string routines, typically
> diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
> index 485cad9c02..db8e576e91 100644
> --- a/sysdeps/x86/sysdep.h
> +++ b/sysdeps/x86/sysdep.h
> @@ -56,6 +56,14 @@
> | (1 << X86_XSTATE_ZMM_H_ID) \
> | (1 << X86_XSTATE_ZMM_ID) \
> | (1 << X86_XSTATE_APX_F_ID))
> +
> +/* AMX state mask. */
> +# define AMX_STATE_SAVE_MASK \
> + ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
> +
> +/* States to be included in xsave_state_full_size. */
> +# define FULL_STATE_SAVE_MASK \
> + (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
> #else
> /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
> doesn't have red-zone, use 0 here. */
> @@ -68,13 +76,17 @@
> | (1 << X86_XSTATE_BNDREGS_ID) \
> | (1 << X86_XSTATE_K_ID) \
> | (1 << X86_XSTATE_ZMM_H_ID))
> +
> +/* States to be included in xsave_state_size. */
> +# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK
> #endif
>
> /* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
> - Compiler assumes that all registers, including x87 FPU stack registers,
> - are unchanged after CALL, except for EFLAGS and RAX/EAX. */
> + Compiler assumes that all registers, including AMX and x87 FPU
> + stack registers, are unchanged after CALL, except for EFLAGS and
> + RAX/EAX. */
> #define TLSDESC_CALL_STATE_SAVE_MASK \
> - (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
> + (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
>
> /* Constants for bits in __x86_string_control: */
>
> diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
> index 418cc4a9b8..04a534fa12 100755
> --- a/sysdeps/x86_64/configure
> +++ b/sysdeps/x86_64/configure
> @@ -134,6 +134,34 @@ fi
> config_vars="$config_vars
> enable-cet = $enable_cet"
>
> +# Check if -mamx-tile works properly.
> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile
> works properly" >&5
> +printf %s "checking whether -mamx-tile works properly... " >&6; }
> +if test ${libc_cv_x86_have_amx_tile+y}
> +then :
> + printf %s "(cached) " >&6
> +else $as_nop
> + cat > conftest.c <<EOF
> +#include <x86intrin.h>
> +EOF
> + libc_cv_x86_have_amx_tile=no
> + if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c >
> conftest.i'
> + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> + (eval $ac_try) 2>&5
> + ac_status=$?
> + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> + test $ac_status = 0; }; }; then
> + if grep -q __builtin_ia32_ldtilecfg conftest.i; then
> + libc_cv_x86_have_amx_tile=yes
> + fi
> + fi
> + rm -rf conftest*
> +fi
> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result:
> $libc_cv_x86_have_amx_tile" >&5
> +printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; }
> +config_vars="$config_vars
> +have-mamx-tile = $libc_cv_x86_have_amx_tile"
> +
> test -n "$critic_missing" && as_fn_error $? "
> *** $critic_missing" "$LINENO" 5
>
> diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
> index d1f803c02e..c714c47351 100644
> --- a/sysdeps/x86_64/configure.ac
> +++ b/sysdeps/x86_64/configure.ac
> @@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then
> fi
> LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
>
> +# Check if -mamx-tile works properly.
> +AC_CACHE_CHECK(whether -mamx-tile works properly,
> + libc_cv_x86_have_amx_tile, [dnl
> +cat > conftest.c <<EOF
> +#include <x86intrin.h>
> +EOF
> + libc_cv_x86_have_amx_tile=no
> + if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c
> > conftest.i); then
> + if grep -q __builtin_ia32_ldtilecfg conftest.i; then
> + libc_cv_x86_have_amx_tile=yes
> + fi
> + fi
> + rm -rf conftest*])
> +LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile])
> +
> test -n "$critic_missing" && AC_MSG_ERROR([
> *** $critic_missing])
> diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h
> b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
> index 0c2e8d5320..9f02cfc3eb 100644
> --- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h
> +++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
> @@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic:
> # endif
> #else
> /* Allocate stack space of the required size to save the state. */
> - sub
> _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip),
> %RSP_LP
> + sub
> _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip),
> %RSP_LP
> #endif
> /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
> r10 and r11. */
> --
> 2.43.2
>
>
On Wed, Feb 28, 2024 at 11:27 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
>
> On Wed, Feb 28, 2024 at 9:50 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> _dl_tlsdesc_dynamic should also preserve AMX registers which are
>> caller-saved. Add X86_XSTATE_TILECFG_ID and X86_XSTATE_TILEDATA_ID
>> to x86-64 TLSDESC_CALL_STATE_SAVE_MASK. Compute the AMX state size
>> and save it in xsave_state_full_size which is only used by
>> _dl_tlsdesc_dynamic_xsave and _dl_tlsdesc_dynamic_xsavec. This fixes
>> the AMX part of BZ #31372. Tested on AMX processor.
>>
>> AMX test is enabled only for compilers with the fix for
>>
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114098
>>
>> GCC 14 and GCC 11/12/13 branches have the bug fix.
>> ---
>> sysdeps/unix/sysv/linux/x86_64/Makefile | 27 ++++++
>> .../sysv/linux/x86_64/include/asm/prctl.h | 5 ++
>> .../linux/x86_64/tst-gnu2-tls2-amx-mod0.c | 2 +
>> .../linux/x86_64/tst-gnu2-tls2-amx-mod1.c | 2 +
>> .../linux/x86_64/tst-gnu2-tls2-amx-mod2.c | 2 +
>> .../sysv/linux/x86_64/tst-gnu2-tls2-amx.c | 86 +++++++++++++++++++
>> .../sysv/linux/x86_64/tst-gnu2-tls2-amx.h | 63 ++++++++++++++
>> sysdeps/x86/cpu-features-offsets.sym | 1 +
>> sysdeps/x86/cpu-features.c | 55 +++++++++++-
>> sysdeps/x86/include/cpu-features.h | 2 +
>> sysdeps/x86/sysdep.h | 18 +++-
>> sysdeps/x86_64/configure | 28 ++++++
>> sysdeps/x86_64/configure.ac | 15 ++++
>> sysdeps/x86_64/dl-tlsdesc-dynamic.h | 2 +-
>> 14 files changed, 302 insertions(+), 6 deletions(-)
>> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
>> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
>> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
>> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
>> create mode 100644 sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
>>
>> diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile
>> index 7d1d205fa0..fcbffd81cb 100644
>> --- a/sysdeps/unix/sysv/linux/x86_64/Makefile
>> +++ b/sysdeps/unix/sysv/linux/x86_64/Makefile
>> @@ -66,6 +66,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is
>> $(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so
>> cp $< $@
>> endif
>> +
>> +ifeq (yes,$(have-mamx-tile))
>> +tests += \
>> + tst-gnu2-tls2-amx \
>> +# tests
>> +
>> +modules-names += \
>> + tst-gnu2-tls2-amx-mod0 \
>> + tst-gnu2-tls2-amx-mod1 \
>> + tst-gnu2-tls2-amx-mod2 \
>> +# modules-names
>> +
>> +$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library)
>> +$(objpfx)tst-gnu2-tls2-amx.out: \
>> + $(objpfx)tst-gnu2-tls2-amx-mod0.so \
>> + $(objpfx)tst-gnu2-tls2-amx-mod1.so \
>> + $(objpfx)tst-gnu2-tls2-amx-mod2.so
>> +$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
>> +$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
>> +$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
>> +
>> +CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
>> +CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
>> +CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
>> +CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
>> +endif
>> +
>> endif # $(subdir) == elf
>>
>> ifneq ($(enable-cet),no)
>> diff --git a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
>> index 2f511321ad..ef4631bf4b 100644
>> --- a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
>> +++ b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h
>> @@ -20,3 +20,8 @@
>> # define ARCH_SHSTK_SHSTK 0x1
>> # define ARCH_SHSTK_WRSS 0x2
>> #endif
>> +
>> +#ifndef ARCH_GET_XCOMP_PERM
>> +# define ARCH_GET_XCOMP_PERM 0x1022
>> +# define ARCH_REQ_XCOMP_PERM 0x1023
>> +#endif
>> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
>> new file mode 100644
>> index 0000000000..2e0c7b91b7
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c
>> @@ -0,0 +1,2 @@
>> +#include "tst-gnu2-tls2-amx.h"
>> +#include <tst-gnu2-tls2mod0.c>
>> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
>> new file mode 100644
>> index 0000000000..b8a8ccf1c1
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c
>> @@ -0,0 +1,2 @@
>> +#include "tst-gnu2-tls2-amx.h"
>> +#include <tst-gnu2-tls2mod1.c>
>> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
>> new file mode 100644
>> index 0000000000..cdf4a8f363
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c
>> @@ -0,0 +1,2 @@
>> +#include "tst-gnu2-tls2-amx.h"
>> +#include <tst-gnu2-tls2mod2.c>
>> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
>> new file mode 100644
>> index 0000000000..9efa2f3270
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c
>> @@ -0,0 +1,86 @@
>> +/* Test TLSDESC relocation with AMX.
>> + Copyright (C) 2024 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <http://www.gnu.org/licenses/>. */
>> +
>> +#include <stdbool.h>
>> +#include <asm/prctl.h>
>> +#include <support/check.h>
>> +#include "tst-gnu2-tls2-amx.h"
>> +
>> +extern int arch_prctl (int, ...);
>> +
>> +#define X86_XSTATE_TILECFG_ID 17
>> +#define X86_XSTATE_TILEDATA_ID 18
>> +
>> +/* Initialize tile config. */
>> +__attribute__ ((noinline, noclone))
>> +static void
>> +init_tile_config (__tilecfg *tileinfo)
>> +{
>> + int i;
>> + tileinfo->palette_id = 1;
>> + tileinfo->start_row = 0;
>> +
>> + for (i = 0; i < 1; ++i)
>> + {
>> + tileinfo->colsb[i] = MAX_ROWS;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>> + tileinfo->rows[i] = MAX_ROWS;
>> + }
>> +
>> + for (i = 1; i < 4; ++i)
>> + {
>> + tileinfo->colsb[i] = MAX_COLS;
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
>> + tileinfo->rows[i] = MAX_ROWS;
>> + }
>
>
> Can it be written as
> for (i = 0; i < 4; ++i)
> {
> ...
> }
>
> If not, please add some comments on the above 2 loops.
It is MAX_ROWS vs MAX_COLS. But the first loop can
be unrolled. Will send v2.
>>
>> +
>> + _tile_loadconfig (tileinfo);
>> +}
>> +
>> +static bool
>> +enable_amx (void)
>> +{
>> + uint64_t bitmask;
>> + if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0)
>> + return false;
>> +
>> + if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0)
>> + return false;
>> +
>> + if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0)
>> + return false;
>> +
>> + /* Load tile configuration. */
>> + __tilecfg tile_data = { 0 };
>> + init_tile_config (&tile_data);
>> +
>> + return true;
>> +}
>> +
>> +/* An architecture can define it to clobber caller-saved registers in
>> + malloc below to verify that the implicit TLSDESC call won't change
>> + caller-saved registers. */
>> +static void
>> +clear_tile_register (void)
>> +{
>> + _tile_zero (2);
>> +}
>> +
>> +#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so"
>> +#define IS_SUPPORTED() enable_amx ()
>> +#define PREPARE_MALLOC() clear_tile_register ()
>> +
>> +#include <elf/tst-gnu2-tls2.c>
>> diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
>> new file mode 100644
>> index 0000000000..1845a3caba
>> --- /dev/null
>> +++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h
>> @@ -0,0 +1,63 @@
>> +/* Test TLSDESC relocation with AMX.
>> + Copyright (C) 2024 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <http://www.gnu.org/licenses/>. */
>> +
>> +#include <stdint.h>
>> +#include <string.h>
>> +#include <x86intrin.h>
>> +#include <support/check.h>
>> +
>> +#define MAX_ROWS 16
>> +#define MAX_COLS 64
>> +#define MAX 1024
>> +#define STRIDE 64
>> +
>> +typedef struct __tile_config
>> +{
>> + uint8_t palette_id;
>> + uint8_t start_row;
>> + uint8_t reserved_0[14];
>> + uint16_t colsb[16];
>> + uint8_t rows[16];
>> +} __tilecfg __attribute__ ((aligned (64)));
>> +
>> +/* Initialize int8_t buffer */
>> +static inline void
>> +init_buffer (int8_t *buf, int8_t value)
>> +{
>> + int rows, colsb, i, j;
>> + rows = MAX_ROWS;
>> + colsb = MAX_COLS;
>> +
>> + for (i = 0; i < rows; i++)
>> + for (j = 0; j < colsb; j++)
>> + buf[i * colsb + j] = value;
>> +}
>> +
>> +#define BEFORE_TLSDESC_CALL() \
>> + int8_t src[MAX]; \
>> + int8_t res[MAX]; \
>> + /* Initialize src with data */ \
>> + init_buffer (src, 2); \
>> + /* Load tile rows from memory. */ \
>> + _tile_loadd (2, src, STRIDE);
>> +
>> +#define AFTER_TLSDESC_CALL() \
>> + /* Store the tile data to memory. */ \
>> + _tile_stored (2, res, STRIDE); \
>> + _tile_release (); \
>> + TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0);
>> diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym
>> index 6a8fd29813..21fc88d651 100644
>> --- a/sysdeps/x86/cpu-features-offsets.sym
>> +++ b/sysdeps/x86/cpu-features-offsets.sym
>> @@ -3,3 +3,4 @@
>> #include <ldsodefs.h>
>>
>> XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size)
>> +XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size)
>> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
>> index 835113b42f..d71e8d3d2e 100644
>> --- a/sysdeps/x86/cpu-features.c
>> +++ b/sysdeps/x86/cpu-features.c
>> @@ -307,6 +307,8 @@ update_active (struct cpu_features *cpu_features)
>> __cpuid_count (0xd, 0, eax, ebx, ecx, edx);
>> if (ebx != 0)
>> {
>> + /* NB: On AMX capable processors, ebx always includes AMX
>> + states. */
>> unsigned int xsave_state_full_size
>> = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
>>
>> @@ -320,6 +322,11 @@ update_active (struct cpu_features *cpu_features)
>> {
>> unsigned int xstate_comp_offsets[32];
>> unsigned int xstate_comp_sizes[32];
>> +#ifdef __x86_64__
>> + unsigned int xstate_amx_comp_offsets[32];
>> + unsigned int xstate_amx_comp_sizes[32];
>> + unsigned int amx_ecx;
>> +#endif
>> unsigned int i;
>>
>> xstate_comp_offsets[0] = 0;
>> @@ -327,16 +334,39 @@ update_active (struct cpu_features *cpu_features)
>> xstate_comp_offsets[2] = 576;
>> xstate_comp_sizes[0] = 160;
>> xstate_comp_sizes[1] = 256;
>> +#ifdef __x86_64__
>> + xstate_amx_comp_offsets[0] = 0;
>> + xstate_amx_comp_offsets[1] = 160;
>> + xstate_amx_comp_offsets[2] = 576;
>> + xstate_amx_comp_sizes[0] = 160;
>> + xstate_amx_comp_sizes[1] = 256;
>> +#endif
>>
>> for (i = 2; i < 32; i++)
>> {
>> - if ((STATE_SAVE_MASK & (1 << i)) != 0)
>> + if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
>> {
>> __cpuid_count (0xd, i, eax, ebx, ecx, edx);
>> - xstate_comp_sizes[i] = eax;
>> +#ifdef __x86_64__
>> + /* Include this in xsave_state_full_size. */
>> + amx_ecx = ecx;
>> + xstate_amx_comp_sizes[i] = eax;
>> + if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
>> + {
>> + /* Exclude this from xsave_state_size. */
>> + ecx = 0;
>> + xstate_comp_sizes[i] = 0;
>> + }
>> + else
>> +#endif
>> + xstate_comp_sizes[i] = eax;
>> }
>> else
>> {
>> +#ifdef __x86_64__
>> + amx_ecx = 0;
>> + xstate_amx_comp_sizes[i] = 0;
>> +#endif
>> ecx = 0;
>> xstate_comp_sizes[i] = 0;
>> }
>> @@ -349,6 +379,15 @@ update_active (struct cpu_features *cpu_features)
>> if ((ecx & (1 << 1)) != 0)
>> xstate_comp_offsets[i]
>> = ALIGN_UP (xstate_comp_offsets[i], 64);
>> +#ifdef __x86_64__
>> + xstate_amx_comp_offsets[i]
>> + = (xstate_amx_comp_offsets[i - 1]
>> + + xstate_amx_comp_sizes[i - 1]);
>> + if ((amx_ecx & (1 << 1)) != 0)
>> + xstate_amx_comp_offsets[i]
>> + = ALIGN_UP (xstate_amx_comp_offsets[i],
>> + 64);
>> +#endif
>> }
>> }
>>
>> @@ -357,6 +396,18 @@ update_active (struct cpu_features *cpu_features)
>> = xstate_comp_offsets[31] + xstate_comp_sizes[31];
>> if (size)
>> {
>> +#ifdef __x86_64__
>> + unsigned int amx_size
>> + = (xstate_amx_comp_offsets[31]
>> + + xstate_amx_comp_sizes[31]);
>> + amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
>> + 64);
>> + /* Set xsave_state_full_size to the compact AMX
>> + state size for XSAVEC. NB: xsave_state_full_size
>> + is only used in _dl_tlsdesc_dynamic_xsave and
>> + _dl_tlsdesc_dynamic_xsavec. */
>> + cpu_features->xsave_state_full_size = amx_size;
>> +#endif
>> cpu_features->xsave_state_size
>> = ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
>> CPU_FEATURE_SET (cpu_features, XSAVEC);
>> diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
>> index b9bf3115b6..cd7bd27cf3 100644
>> --- a/sysdeps/x86/include/cpu-features.h
>> +++ b/sysdeps/x86/include/cpu-features.h
>> @@ -934,6 +934,8 @@ struct cpu_features
>> /* The full state size for XSAVE when XSAVEC is disabled by
>>
>> GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
>> +
>> + and the AMX state size when XSAVEC is available.
>> */
>> unsigned int xsave_state_full_size;
>> /* Data cache size for use in memory and string routines, typically
>> diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h
>> index 485cad9c02..db8e576e91 100644
>> --- a/sysdeps/x86/sysdep.h
>> +++ b/sysdeps/x86/sysdep.h
>> @@ -56,6 +56,14 @@
>> | (1 << X86_XSTATE_ZMM_H_ID) \
>> | (1 << X86_XSTATE_ZMM_ID) \
>> | (1 << X86_XSTATE_APX_F_ID))
>> +
>> +/* AMX state mask. */
>> +# define AMX_STATE_SAVE_MASK \
>> + ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
>> +
>> +/* States to be included in xsave_state_full_size. */
>> +# define FULL_STATE_SAVE_MASK \
>> + (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
>> #else
>> /* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
>> doesn't have red-zone, use 0 here. */
>> @@ -68,13 +76,17 @@
>> | (1 << X86_XSTATE_BNDREGS_ID) \
>> | (1 << X86_XSTATE_K_ID) \
>> | (1 << X86_XSTATE_ZMM_H_ID))
>> +
>> +/* States to be included in xsave_state_size. */
>> +# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK
>> #endif
>>
>> /* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
>> - Compiler assumes that all registers, including x87 FPU stack registers,
>> - are unchanged after CALL, except for EFLAGS and RAX/EAX. */
>> + Compiler assumes that all registers, including AMX and x87 FPU
>> + stack registers, are unchanged after CALL, except for EFLAGS and
>> + RAX/EAX. */
>> #define TLSDESC_CALL_STATE_SAVE_MASK \
>> - (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
>> + (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
>>
>> /* Constants for bits in __x86_string_control: */
>>
>> diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
>> index 418cc4a9b8..04a534fa12 100755
>> --- a/sysdeps/x86_64/configure
>> +++ b/sysdeps/x86_64/configure
>> @@ -134,6 +134,34 @@ fi
>> config_vars="$config_vars
>> enable-cet = $enable_cet"
>>
>> +# Check if -mamx-tile works properly.
>> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5
>> +printf %s "checking whether -mamx-tile works properly... " >&6; }
>> +if test ${libc_cv_x86_have_amx_tile+y}
>> +then :
>> + printf %s "(cached) " >&6
>> +else $as_nop
>> + cat > conftest.c <<EOF
>> +#include <x86intrin.h>
>> +EOF
>> + libc_cv_x86_have_amx_tile=no
>> + if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i'
>> + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
>> + (eval $ac_try) 2>&5
>> + ac_status=$?
>> + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
>> + test $ac_status = 0; }; }; then
>> + if grep -q __builtin_ia32_ldtilecfg conftest.i; then
>> + libc_cv_x86_have_amx_tile=yes
>> + fi
>> + fi
>> + rm -rf conftest*
>> +fi
>> +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5
>> +printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; }
>> +config_vars="$config_vars
>> +have-mamx-tile = $libc_cv_x86_have_amx_tile"
>> +
>> test -n "$critic_missing" && as_fn_error $? "
>> *** $critic_missing" "$LINENO" 5
>>
>> diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
>> index d1f803c02e..c714c47351 100644
>> --- a/sysdeps/x86_64/configure.ac
>> +++ b/sysdeps/x86_64/configure.ac
>> @@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then
>> fi
>> LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
>>
>> +# Check if -mamx-tile works properly.
>> +AC_CACHE_CHECK(whether -mamx-tile works properly,
>> + libc_cv_x86_have_amx_tile, [dnl
>> +cat > conftest.c <<EOF
>> +#include <x86intrin.h>
>> +EOF
>> + libc_cv_x86_have_amx_tile=no
>> + if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then
>> + if grep -q __builtin_ia32_ldtilecfg conftest.i; then
>> + libc_cv_x86_have_amx_tile=yes
>> + fi
>> + fi
>> + rm -rf conftest*])
>> +LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile])
>> +
>> test -n "$critic_missing" && AC_MSG_ERROR([
>> *** $critic_missing])
>> diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
>> index 0c2e8d5320..9f02cfc3eb 100644
>> --- a/sysdeps/x86_64/dl-tlsdesc-dynamic.h
>> +++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h
>> @@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic:
>> # endif
>> #else
>> /* Allocate stack space of the required size to save the state. */
>> - sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
>> + sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP
>> #endif
>> /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
>> r10 and r11. */
>> --
>> 2.43.2
>>
@@ -66,6 +66,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is
$(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so
cp $< $@
endif
+
+ifeq (yes,$(have-mamx-tile))
+tests += \
+ tst-gnu2-tls2-amx \
+# tests
+
+modules-names += \
+ tst-gnu2-tls2-amx-mod0 \
+ tst-gnu2-tls2-amx-mod1 \
+ tst-gnu2-tls2-amx-mod2 \
+# modules-names
+
+$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library)
+$(objpfx)tst-gnu2-tls2-amx.out: \
+ $(objpfx)tst-gnu2-tls2-amx-mod0.so \
+ $(objpfx)tst-gnu2-tls2-amx-mod1.so \
+ $(objpfx)tst-gnu2-tls2-amx-mod2.so
+$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport)
+$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport)
+$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport)
+
+CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile
+CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2
+CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2
+endif
+
endif # $(subdir) == elf
ifneq ($(enable-cet),no)
@@ -20,3 +20,8 @@
# define ARCH_SHSTK_SHSTK 0x1
# define ARCH_SHSTK_WRSS 0x2
#endif
+
+#ifndef ARCH_GET_XCOMP_PERM
+# define ARCH_GET_XCOMP_PERM 0x1022
+# define ARCH_REQ_XCOMP_PERM 0x1023
+#endif
new file mode 100644
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod0.c>
new file mode 100644
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod1.c>
new file mode 100644
@@ -0,0 +1,2 @@
+#include "tst-gnu2-tls2-amx.h"
+#include <tst-gnu2-tls2mod2.c>
new file mode 100644
@@ -0,0 +1,86 @@
+/* Test TLSDESC relocation with AMX.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdbool.h>
+#include <asm/prctl.h>
+#include <support/check.h>
+#include "tst-gnu2-tls2-amx.h"
+
+extern int arch_prctl (int, ...);
+
+#define X86_XSTATE_TILECFG_ID 17
+#define X86_XSTATE_TILEDATA_ID 18
+
+/* Initialize tile config. */
+__attribute__ ((noinline, noclone))
+static void
+init_tile_config (__tilecfg *tileinfo)
+{
+ int i;
+ tileinfo->palette_id = 1;
+ tileinfo->start_row = 0;
+
+ for (i = 0; i < 1; ++i)
+ {
+ tileinfo->colsb[i] = MAX_ROWS;
+ tileinfo->rows[i] = MAX_ROWS;
+ }
+
+ for (i = 1; i < 4; ++i)
+ {
+ tileinfo->colsb[i] = MAX_COLS;
+ tileinfo->rows[i] = MAX_ROWS;
+ }
+
+ _tile_loadconfig (tileinfo);
+}
+
+static bool
+enable_amx (void)
+{
+ uint64_t bitmask;
+ if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0)
+ return false;
+
+ if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0)
+ return false;
+
+ if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0)
+ return false;
+
+ /* Load tile configuration. */
+ __tilecfg tile_data = { 0 };
+ init_tile_config (&tile_data);
+
+ return true;
+}
+
+/* An architecture can define it to clobber caller-saved registers in
+ malloc below to verify that the implicit TLSDESC call won't change
+ caller-saved registers. */
+static void
+clear_tile_register (void)
+{
+ _tile_zero (2);
+}
+
+#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so"
+#define IS_SUPPORTED() enable_amx ()
+#define PREPARE_MALLOC() clear_tile_register ()
+
+#include <elf/tst-gnu2-tls2.c>
new file mode 100644
@@ -0,0 +1,63 @@
+/* Test TLSDESC relocation with AMX.
+ Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdint.h>
+#include <string.h>
+#include <x86intrin.h>
+#include <support/check.h>
+
+#define MAX_ROWS 16
+#define MAX_COLS 64
+#define MAX 1024
+#define STRIDE 64
+
+typedef struct __tile_config
+{
+ uint8_t palette_id;
+ uint8_t start_row;
+ uint8_t reserved_0[14];
+ uint16_t colsb[16];
+ uint8_t rows[16];
+} __tilecfg __attribute__ ((aligned (64)));
+
+/* Initialize int8_t buffer */
+static inline void
+init_buffer (int8_t *buf, int8_t value)
+{
+ int rows, colsb, i, j;
+ rows = MAX_ROWS;
+ colsb = MAX_COLS;
+
+ for (i = 0; i < rows; i++)
+ for (j = 0; j < colsb; j++)
+ buf[i * colsb + j] = value;
+}
+
+#define BEFORE_TLSDESC_CALL() \
+ int8_t src[MAX]; \
+ int8_t res[MAX]; \
+ /* Initialize src with data */ \
+ init_buffer (src, 2); \
+ /* Load tile rows from memory. */ \
+ _tile_loadd (2, src, STRIDE);
+
+#define AFTER_TLSDESC_CALL() \
+ /* Store the tile data to memory. */ \
+ _tile_stored (2, res, STRIDE); \
+ _tile_release (); \
+ TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0);
@@ -3,3 +3,4 @@
#include <ldsodefs.h>
XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size)
+XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size)
@@ -307,6 +307,8 @@ update_active (struct cpu_features *cpu_features)
__cpuid_count (0xd, 0, eax, ebx, ecx, edx);
if (ebx != 0)
{
+ /* NB: On AMX capable processors, ebx always includes AMX
+ states. */
unsigned int xsave_state_full_size
= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
@@ -320,6 +322,11 @@ update_active (struct cpu_features *cpu_features)
{
unsigned int xstate_comp_offsets[32];
unsigned int xstate_comp_sizes[32];
+#ifdef __x86_64__
+ unsigned int xstate_amx_comp_offsets[32];
+ unsigned int xstate_amx_comp_sizes[32];
+ unsigned int amx_ecx;
+#endif
unsigned int i;
xstate_comp_offsets[0] = 0;
@@ -327,16 +334,39 @@ update_active (struct cpu_features *cpu_features)
xstate_comp_offsets[2] = 576;
xstate_comp_sizes[0] = 160;
xstate_comp_sizes[1] = 256;
+#ifdef __x86_64__
+ xstate_amx_comp_offsets[0] = 0;
+ xstate_amx_comp_offsets[1] = 160;
+ xstate_amx_comp_offsets[2] = 576;
+ xstate_amx_comp_sizes[0] = 160;
+ xstate_amx_comp_sizes[1] = 256;
+#endif
for (i = 2; i < 32; i++)
{
- if ((STATE_SAVE_MASK & (1 << i)) != 0)
+ if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
{
__cpuid_count (0xd, i, eax, ebx, ecx, edx);
- xstate_comp_sizes[i] = eax;
+#ifdef __x86_64__
+ /* Include this in xsave_state_full_size. */
+ amx_ecx = ecx;
+ xstate_amx_comp_sizes[i] = eax;
+ if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
+ {
+ /* Exclude this from xsave_state_size. */
+ ecx = 0;
+ xstate_comp_sizes[i] = 0;
+ }
+ else
+#endif
+ xstate_comp_sizes[i] = eax;
}
else
{
+#ifdef __x86_64__
+ amx_ecx = 0;
+ xstate_amx_comp_sizes[i] = 0;
+#endif
ecx = 0;
xstate_comp_sizes[i] = 0;
}
@@ -349,6 +379,15 @@ update_active (struct cpu_features *cpu_features)
if ((ecx & (1 << 1)) != 0)
xstate_comp_offsets[i]
= ALIGN_UP (xstate_comp_offsets[i], 64);
+#ifdef __x86_64__
+ xstate_amx_comp_offsets[i]
+ = (xstate_amx_comp_offsets[i - 1]
+ + xstate_amx_comp_sizes[i - 1]);
+ if ((amx_ecx & (1 << 1)) != 0)
+ xstate_amx_comp_offsets[i]
+ = ALIGN_UP (xstate_amx_comp_offsets[i],
+ 64);
+#endif
}
}
@@ -357,6 +396,18 @@ update_active (struct cpu_features *cpu_features)
= xstate_comp_offsets[31] + xstate_comp_sizes[31];
if (size)
{
+#ifdef __x86_64__
+ unsigned int amx_size
+ = (xstate_amx_comp_offsets[31]
+ + xstate_amx_comp_sizes[31]);
+ amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
+ 64);
+ /* Set xsave_state_full_size to the compact AMX
+ state size for XSAVEC. NB: xsave_state_full_size
+ is only used in _dl_tlsdesc_dynamic_xsave and
+ _dl_tlsdesc_dynamic_xsavec. */
+ cpu_features->xsave_state_full_size = amx_size;
+#endif
cpu_features->xsave_state_size
= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
CPU_FEATURE_SET (cpu_features, XSAVEC);
@@ -934,6 +934,8 @@ struct cpu_features
/* The full state size for XSAVE when XSAVEC is disabled by
GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC
+
+ and the AMX state size when XSAVEC is available.
*/
unsigned int xsave_state_full_size;
/* Data cache size for use in memory and string routines, typically
@@ -56,6 +56,14 @@
| (1 << X86_XSTATE_ZMM_H_ID) \
| (1 << X86_XSTATE_ZMM_ID) \
| (1 << X86_XSTATE_APX_F_ID))
+
+/* AMX state mask. */
+# define AMX_STATE_SAVE_MASK \
+ ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
+
+/* States to be included in xsave_state_full_size. */
+# define FULL_STATE_SAVE_MASK \
+ (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
#else
/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
doesn't have red-zone, use 0 here. */
@@ -68,13 +76,17 @@
| (1 << X86_XSTATE_BNDREGS_ID) \
| (1 << X86_XSTATE_K_ID) \
| (1 << X86_XSTATE_ZMM_H_ID))
+
+/* States to be included in xsave_state_size. */
+# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK
#endif
/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
- Compiler assumes that all registers, including x87 FPU stack registers,
- are unchanged after CALL, except for EFLAGS and RAX/EAX. */
+ Compiler assumes that all registers, including AMX and x87 FPU
+ stack registers, are unchanged after CALL, except for EFLAGS and
+ RAX/EAX. */
#define TLSDESC_CALL_STATE_SAVE_MASK \
- (STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
+ (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
/* Constants for bits in __x86_string_control: */
@@ -134,6 +134,34 @@ fi
config_vars="$config_vars
enable-cet = $enable_cet"
+# Check if -mamx-tile works properly.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5
+printf %s "checking whether -mamx-tile works properly... " >&6; }
+if test ${libc_cv_x86_have_amx_tile+y}
+then :
+ printf %s "(cached) " >&6
+else $as_nop
+ cat > conftest.c <<EOF
+#include <x86intrin.h>
+EOF
+ libc_cv_x86_have_amx_tile=no
+ if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then
+ if grep -q __builtin_ia32_ldtilecfg conftest.i; then
+ libc_cv_x86_have_amx_tile=yes
+ fi
+ fi
+ rm -rf conftest*
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5
+printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; }
+config_vars="$config_vars
+have-mamx-tile = $libc_cv_x86_have_amx_tile"
+
test -n "$critic_missing" && as_fn_error $? "
*** $critic_missing" "$LINENO" 5
@@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then
fi
LIBC_CONFIG_VAR([enable-cet], [$enable_cet])
+# Check if -mamx-tile works properly.
+AC_CACHE_CHECK(whether -mamx-tile works properly,
+ libc_cv_x86_have_amx_tile, [dnl
+cat > conftest.c <<EOF
+#include <x86intrin.h>
+EOF
+ libc_cv_x86_have_amx_tile=no
+ if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then
+ if grep -q __builtin_ia32_ldtilecfg conftest.i; then
+ libc_cv_x86_have_amx_tile=yes
+ fi
+ fi
+ rm -rf conftest*])
+LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile])
+
test -n "$critic_missing" && AC_MSG_ERROR([
*** $critic_missing])
@@ -99,7 +99,7 @@ _dl_tlsdesc_dynamic:
# endif
#else
/* Allocate stack space of the required size to save the state. */
- sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
+ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP
#endif
/* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9,
r10 and r11. */