x86: Add support for AVX10 preset and vec size in cpu-features
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
Commit Message
This commit add support for the new AVX10 cpu features:
https://cdrdv2-public.intel.com/784267/355989-intel-avx10-spec.pdf
We add checks for:
- `AVX10`: Check if AVX10 is present.
- `AVX10_{X,Y,Z}MM`: Check if a given vec class has AVX10 support.
`make check` passes and cpuid output was checked against GNR/DMR on an
emulator.
---
manual/platform.texi | 12 ++++++++++++
sysdeps/x86/bits/platform/x86.h | 14 ++++++++++++--
sysdeps/x86/cpu-features.c | 25 +++++++++++++++++++++++++
sysdeps/x86/include/cpu-features.h | 27 ++++++++++++++++++++++++++-
sysdeps/x86/tst-get-cpu-features.c | 8 ++++++++
5 files changed, 83 insertions(+), 3 deletions(-)
Comments
On Wed, Sep 20, 2023 at 1:44 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> This commit add support for the new AVX10 cpu features:
> https://cdrdv2-public.intel.com/784267/355989-intel-avx10-spec.pdf
>
> We add checks for:
> - `AVX10`: Check if AVX10 is present.
> - `AVX10_{X,Y,Z}MM`: Check if a given vec class has AVX10 support.
>
> `make check` passes and cpuid output was checked against GNR/DMR on an
> emulator.
> ---
> manual/platform.texi | 12 ++++++++++++
> sysdeps/x86/bits/platform/x86.h | 14 ++++++++++++--
> sysdeps/x86/cpu-features.c | 25 +++++++++++++++++++++++++
> sysdeps/x86/include/cpu-features.h | 27 ++++++++++++++++++++++++++-
> sysdeps/x86/tst-get-cpu-features.c | 8 ++++++++
> 5 files changed, 83 insertions(+), 3 deletions(-)
>
> diff --git a/manual/platform.texi b/manual/platform.texi
> index 2a2d557067..478b6fdcdf 100644
> --- a/manual/platform.texi
> +++ b/manual/platform.texi
> @@ -222,6 +222,18 @@ Leaf (EAX = 23H).
> @item
> @code{AVX} -- The AVX instruction extensions.
>
> +@item
> +@code{AVX10} -- The AVX10 instruction extensions.
> +
> +@item
> +@code{AVX10_XMM} -- Whether AVX10 includes xmm registers.
> +
> +@item
> +@code{AVX10_YMM} -- Whether AVX10 includes ymm registers.
> +
> +@item
> +@code{AVX10_ZMM} -- Whether AVX10 includes zmm registers.
> +
> @item
> @code{AVX2} -- The AVX2 instruction extensions.
>
> diff --git a/sysdeps/x86/bits/platform/x86.h b/sysdeps/x86/bits/platform/x86.h
> index 88ca071aa7..1e23d53ba2 100644
> --- a/sysdeps/x86/bits/platform/x86.h
> +++ b/sysdeps/x86/bits/platform/x86.h
> @@ -30,7 +30,8 @@ enum
> CPUID_INDEX_80000008,
> CPUID_INDEX_7_ECX_1,
> CPUID_INDEX_19,
> - CPUID_INDEX_14_ECX_0
> + CPUID_INDEX_14_ECX_0,
> + CPUID_INDEX_24_ECX_0
> };
>
> struct cpuid_feature
> @@ -312,6 +313,7 @@ enum
> x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5,
> x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8,
> x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14,
> + x86_cpu_AVX10 = x86_cpu_index_7_ecx_1_edx + 19,
> x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21,
>
> x86_cpu_index_19_ebx
> @@ -325,5 +327,13 @@ enum
> = (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
> + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
>
> - x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4
> + x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4,
> +
> + x86_cpu_index_24_ecx_0_ebx
> + = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int)
> + + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
> +
> + x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16,
> + x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17,
> + x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18,
> };
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index badf088874..0bf923d48b 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features)
> CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
> #endif
>
> + enum
> + {
> + os_xmm = 1,
> + os_ymm = 2,
> + os_zmm = 4
> + } os_vector_size = os_xmm;
> /* Can we call xgetbv? */
> if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
> {
> unsigned int xcrlow;
> unsigned int xcrhigh;
> + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
> asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
> /* Is YMM and XMM state usable? */
> if ((xcrlow & (bit_YMM_state | bit_XMM_state))
> @@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features)
> /* Determine if AVX is usable. */
> if (CPU_FEATURES_CPU_P (cpu_features, AVX))
> {
> + os_vector_size |= os_ymm;
> CPU_FEATURE_SET (cpu_features, AVX);
> /* The following features depend on AVX being usable. */
> /* Determine if AVX2 is usable. */
> @@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features)
> | bit_ZMM16_31_state))
> == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
> {
> + os_vector_size |= os_zmm;
> /* Determine if AVX512F is usable. */
> if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
> {
> @@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features)
> }
> }
>
> + if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
> + && cpu_features->basic.max_cpuid >= 0x24)
> + {
> + __cpuid_count (
> + 0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
> + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
> + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
> + cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
> + if (os_vector_size & os_xmm)
> + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
> + if (os_vector_size & os_ymm)
> + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
> + if (os_vector_size & os_zmm)
> + CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
> + }
> +
> /* Are XTILECFG and XTILEDATA states usable? */
> if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
> == (bit_XTILECFG_state | bit_XTILEDATA_state))
> diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
> index eb30d342a6..2d7427a6c0 100644
> --- a/sysdeps/x86/include/cpu-features.h
> +++ b/sysdeps/x86/include/cpu-features.h
> @@ -29,7 +29,7 @@
>
> enum
> {
> - CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1
> + CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1
> };
>
> enum
> @@ -319,6 +319,7 @@ enum
> #define bit_cpu_AVX_NE_CONVERT (1u << 5)
> #define bit_cpu_AMX_COMPLEX (1u << 8)
> #define bit_cpu_PREFETCHI (1u << 14)
> +#define bit_cpu_AVX10 (1u << 19)
> #define bit_cpu_APX_F (1u << 21)
>
> /* CPUID_INDEX_19. */
> @@ -332,6 +333,13 @@ enum
> /* EBX. */
> #define bit_cpu_PTWRITE (1u << 4)
>
> +/* CPUID_INDEX_24_ECX_0. */
> +
> +/* EBX. */
> +#define bit_cpu_AVX10_XMM (1u << 16)
> +#define bit_cpu_AVX10_YMM (1u << 17)
> +#define bit_cpu_AVX10_ZMM (1u << 18)
> +
> /* CPUID_INDEX_1. */
>
> /* ECX. */
> @@ -563,6 +571,7 @@ enum
> #define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
> #define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1
> #define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1
> +#define index_cpu_AVX10 CPUID_INDEX_7_ECX_1
> #define index_cpu_APX_F CPUID_INDEX_7_ECX_1
>
> /* CPUID_INDEX_19. */
> @@ -576,6 +585,13 @@ enum
> /* EBX. */
> #define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0
>
> +/* CPUID_INDEX_24_ECX_0. */
> +
> +/* EBX. */
> +#define index_cpu_AVX10_XMM CPUID_INDEX_24_ECX_0
> +#define index_cpu_AVX10_YMM CPUID_INDEX_24_ECX_0
> +#define index_cpu_AVX10_ZMM CPUID_INDEX_24_ECX_0
> +
> /* CPUID_INDEX_1. */
>
> /* ECX. */
> @@ -809,6 +825,7 @@ enum
> #define reg_AVX_NE_CONVERT edx
> #define reg_AMX_COMPLEX edx
> #define reg_PREFETCHI edx
> +#define reg_AVX10 edx
> #define reg_APX_F edx
>
> /* CPUID_INDEX_19. */
> @@ -822,6 +839,14 @@ enum
> /* EBX. */
> #define reg_PTWRITE ebx
>
> +/* CPUID_INDEX_24_ECX_0. */
> +
> +/* EBX. */
> +#define reg_AVX10_XMM ebx
> +#define reg_AVX10_YMM ebx
> +#define reg_AVX10_ZMM ebx
> +
> +
> /* PREFERRED_FEATURE_INDEX_1. First define the bitindex values
> sequentially, then define the bit_arch* and index_arch_* lookup
> constants. */
> diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
> index b27fa7324a..44edd18df2 100644
> --- a/sysdeps/x86/tst-get-cpu-features.c
> +++ b/sysdeps/x86/tst-get-cpu-features.c
> @@ -219,6 +219,7 @@ do_test (void)
> CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
> CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
> CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
> + CHECK_CPU_FEATURE_PRESENT (AVX10);
> CHECK_CPU_FEATURE_PRESENT (APX_F);
> CHECK_CPU_FEATURE_PRESENT (AESKLE);
> CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
> @@ -391,11 +392,18 @@ do_test (void)
> CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
> CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
> CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
> + CHECK_CPU_FEATURE_ACTIVE (AVX10);
> CHECK_CPU_FEATURE_ACTIVE (APX_F);
> CHECK_CPU_FEATURE_ACTIVE (AESKLE);
> CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
> CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
>
> + if (CPU_FEATURE_ACTIVE (AVX10))
> + {
> + CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM);
> + CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM);
> + CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM);
> + }
> return 0;
> }
>
> --
> 2.34.1
>
LGTM.
Thanks.
@@ -222,6 +222,18 @@ Leaf (EAX = 23H).
@item
@code{AVX} -- The AVX instruction extensions.
+@item
+@code{AVX10} -- The AVX10 instruction extensions.
+
+@item
+@code{AVX10_XMM} -- Whether AVX10 includes xmm registers.
+
+@item
+@code{AVX10_YMM} -- Whether AVX10 includes ymm registers.
+
+@item
+@code{AVX10_ZMM} -- Whether AVX10 includes zmm registers.
+
@item
@code{AVX2} -- The AVX2 instruction extensions.
@@ -30,7 +30,8 @@ enum
CPUID_INDEX_80000008,
CPUID_INDEX_7_ECX_1,
CPUID_INDEX_19,
- CPUID_INDEX_14_ECX_0
+ CPUID_INDEX_14_ECX_0,
+ CPUID_INDEX_24_ECX_0
};
struct cpuid_feature
@@ -312,6 +313,7 @@ enum
x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5,
x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8,
x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14,
+ x86_cpu_AVX10 = x86_cpu_index_7_ecx_1_edx + 19,
x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21,
x86_cpu_index_19_ebx
@@ -325,5 +327,13 @@ enum
= (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
+ cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
- x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4
+ x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4,
+
+ x86_cpu_index_24_ecx_0_ebx
+ = (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int)
+ + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
+
+ x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16,
+ x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17,
+ x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18,
};
@@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features)
CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
#endif
+ enum
+ {
+ os_xmm = 1,
+ os_ymm = 2,
+ os_zmm = 4
+ } os_vector_size = os_xmm;
/* Can we call xgetbv? */
if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
{
unsigned int xcrlow;
unsigned int xcrhigh;
+ CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
/* Is YMM and XMM state usable? */
if ((xcrlow & (bit_YMM_state | bit_XMM_state))
@@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features)
/* Determine if AVX is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX))
{
+ os_vector_size |= os_ymm;
CPU_FEATURE_SET (cpu_features, AVX);
/* The following features depend on AVX being usable. */
/* Determine if AVX2 is usable. */
@@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features)
| bit_ZMM16_31_state))
== (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
{
+ os_vector_size |= os_zmm;
/* Determine if AVX512F is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
{
@@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features)
}
}
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
+ && cpu_features->basic.max_cpuid >= 0x24)
+ {
+ __cpuid_count (
+ 0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
+ cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
+ if (os_vector_size & os_xmm)
+ CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
+ if (os_vector_size & os_ymm)
+ CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
+ if (os_vector_size & os_zmm)
+ CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
+ }
+
/* Are XTILECFG and XTILEDATA states usable? */
if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
== (bit_XTILECFG_state | bit_XTILEDATA_state))
@@ -29,7 +29,7 @@
enum
{
- CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1
+ CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1
};
enum
@@ -319,6 +319,7 @@ enum
#define bit_cpu_AVX_NE_CONVERT (1u << 5)
#define bit_cpu_AMX_COMPLEX (1u << 8)
#define bit_cpu_PREFETCHI (1u << 14)
+#define bit_cpu_AVX10 (1u << 19)
#define bit_cpu_APX_F (1u << 21)
/* CPUID_INDEX_19. */
@@ -332,6 +333,13 @@ enum
/* EBX. */
#define bit_cpu_PTWRITE (1u << 4)
+/* CPUID_INDEX_24_ECX_0. */
+
+/* EBX. */
+#define bit_cpu_AVX10_XMM (1u << 16)
+#define bit_cpu_AVX10_YMM (1u << 17)
+#define bit_cpu_AVX10_ZMM (1u << 18)
+
/* CPUID_INDEX_1. */
/* ECX. */
@@ -563,6 +571,7 @@ enum
#define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
#define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1
#define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1
+#define index_cpu_AVX10 CPUID_INDEX_7_ECX_1
#define index_cpu_APX_F CPUID_INDEX_7_ECX_1
/* CPUID_INDEX_19. */
@@ -576,6 +585,13 @@ enum
/* EBX. */
#define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0
+/* CPUID_INDEX_24_ECX_0. */
+
+/* EBX. */
+#define index_cpu_AVX10_XMM CPUID_INDEX_24_ECX_0
+#define index_cpu_AVX10_YMM CPUID_INDEX_24_ECX_0
+#define index_cpu_AVX10_ZMM CPUID_INDEX_24_ECX_0
+
/* CPUID_INDEX_1. */
/* ECX. */
@@ -809,6 +825,7 @@ enum
#define reg_AVX_NE_CONVERT edx
#define reg_AMX_COMPLEX edx
#define reg_PREFETCHI edx
+#define reg_AVX10 edx
#define reg_APX_F edx
/* CPUID_INDEX_19. */
@@ -822,6 +839,14 @@ enum
/* EBX. */
#define reg_PTWRITE ebx
+/* CPUID_INDEX_24_ECX_0. */
+
+/* EBX. */
+#define reg_AVX10_XMM ebx
+#define reg_AVX10_YMM ebx
+#define reg_AVX10_ZMM ebx
+
+
/* PREFERRED_FEATURE_INDEX_1. First define the bitindex values
sequentially, then define the bit_arch* and index_arch_* lookup
constants. */
@@ -219,6 +219,7 @@ do_test (void)
CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
+ CHECK_CPU_FEATURE_PRESENT (AVX10);
CHECK_CPU_FEATURE_PRESENT (APX_F);
CHECK_CPU_FEATURE_PRESENT (AESKLE);
CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
@@ -391,11 +392,18 @@ do_test (void)
CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
+ CHECK_CPU_FEATURE_ACTIVE (AVX10);
CHECK_CPU_FEATURE_ACTIVE (APX_F);
CHECK_CPU_FEATURE_ACTIVE (AESKLE);
CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
+ if (CPU_FEATURE_ACTIVE (AVX10))
+ {
+ CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM);
+ CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM);
+ CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM);
+ }
return 0;
}