x86: Update CPU feature detection
Commit Message
1. Add COMMON_CPUID_INDEX_7_ECX_1 for AVX512_BF16.
2. Detect ENQCMD, PKS, AVX512_VP2INTERSECT, MD_CLEAR, SERIALIZE, HYBRID,
TSXLDTRK, L1D_FLUSH, CORE_CAPABILITIES and AVX512_BF16.
3. Rename CAPABILITIES to ARCH_CAPABILITIES.
4. Check if AVX512_VP2INTERSECT and AVX512_BF16 are usable.
5. Update CPU feature detection test.
---
sysdeps/x86/cpu-features.c | 26 ++++++++++---
sysdeps/x86/cpu-features.h | 59 ++++++++++++++++++++++++++++--
sysdeps/x86/tst-get-cpu-features.c | 14 ++++++-
3 files changed, 90 insertions(+), 9 deletions(-)
Comments
* H. J. Lu via Libc-alpha:
> 1. Add COMMON_CPUID_INDEX_7_ECX_1 for AVX512_BF16.
Do you expect similar changes in the future? This changes the layout of
struct cpu_features.
If __x86_get_cpu_features is exported, such a change will require a new
symbol version for __x86_get_cpu_features and can therefore only be made
at a release boundary. Even then, statically linked libraries will
silently use the old layout of struct cpu_features, silently giving
incorrect results if linked against a newer glibc version.
Thanks,
Florian
@@ -90,11 +90,18 @@ get_common_indices (struct cpu_features *cpu_features,
}
if (cpu_features->basic.max_cpuid >= 7)
- __cpuid_count (7, 0,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
- cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+ {
+ __cpuid_count (7, 0,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+ __cpuid_count (7, 1,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].eax,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ebx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ecx,
+ cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].edx);
+ }
if (cpu_features->basic.max_cpuid >= 0xd)
__cpuid_count (0xd, 1,
@@ -215,6 +222,15 @@ get_common_indices (struct cpu_features *cpu_features,
if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ))
cpu_features->feature[index_arch_AVX512_VPOPCNTDQ_Usable]
|= bit_arch_AVX512_VPOPCNTDQ_Usable;
+ /* Determine if AVX512_VP2INTERSECT is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features,
+ AVX512_VP2INTERSECT))
+ cpu_features->feature[index_arch_AVX512_VP2INTERSECT_Usable]
+ |= bit_arch_AVX512_VP2INTERSECT_Usable;
+ /* Determine if AVX512_BF16 is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BF16))
+ cpu_features->feature[index_arch_AVX512_BF16_Usable]
+ |= bit_arch_AVX512_BF16_Usable;
}
}
}
@@ -36,6 +36,7 @@ enum
COMMON_CPUID_INDEX_D_ECX_1,
COMMON_CPUID_INDEX_80000007,
COMMON_CPUID_INDEX_80000008,
+ COMMON_CPUID_INDEX_7_ECX_1,
/* Keep the following line at the end. */
COMMON_CPUID_INDEX_MAX
};
@@ -142,6 +143,8 @@ extern const struct cpu_features *__get_cpu_features (void)
#define bit_arch_VPCLMULQDQ_Usable (1u << 20)
#define bit_arch_XOP_Usable (1u << 21)
#define bit_arch_XSAVEC_Usable (1u << 22)
+#define bit_arch_AVX512_VP2INTERSECT_Usable (1u << 23)
+#define bit_arch_AVX512_BF16_Usable (1u << 24)
#define index_arch_AVX_Usable FEATURE_INDEX_1
#define index_arch_AVX2_Usable FEATURE_INDEX_1
@@ -166,6 +169,8 @@ extern const struct cpu_features *__get_cpu_features (void)
#define index_arch_VPCLMULQDQ_Usable FEATURE_INDEX_1
#define index_arch_XOP_Usable FEATURE_INDEX_1
#define index_arch_XSAVEC_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_VP2INTERSECT_Usable FEATURE_INDEX_1
+#define index_arch_AVX512_BF16_Usable FEATURE_INDEX_1
/* Unused. Compiler will optimize them out. */
#define bit_arch_SSE3_Usable (1u << 0)
@@ -355,6 +360,7 @@ extern const struct cpu_features *__get_cpu_features (void)
#define need_arch_feature_AVX512_4VNNIW 1
#define need_arch_feature_AVX512_4FMAPS 1
#define need_arch_feature_FSRM 0
+#define need_arch_feature_AVX512_VP2INTERSECT 1
/* COMMON_CPUID_INDEX_80000001. */
@@ -375,6 +381,11 @@ extern const struct cpu_features *__get_cpu_features (void)
#define need_arch_feature_INVARIANT_TSC 0
#define need_arch_feature_WBNOINVD 0
+/* COMMON_CPUID_INDEX_7_ECX_1. */
+
+/* EAX. */
+#define need_arch_feature_AVX512_BF16 1
+
/* CPU features. */
/* COMMON_CPUID_INDEX_1. */
@@ -494,17 +505,26 @@ extern const struct cpu_features *__get_cpu_features (void)
#define bit_cpu_CLDEMOTE (1u << 25)
#define bit_cpu_MOVDIRI (1u << 27)
#define bit_cpu_MOVDIR64B (1u << 28)
+#define bit_cpu_ENQCMD (1u << 29)
#define bit_cpu_SGX_LC (1u << 30)
+#define bit_cpu_PKS (1u << 31)
/* EDX. */
#define bit_cpu_AVX512_4VNNIW (1u << 2)
#define bit_cpu_AVX512_4FMAPS (1u << 3)
#define bit_cpu_FSRM (1u << 4)
+#define bit_cpu_AVX512_VP2INTERSECT (1u << 8)
+#define bit_cpu_MD_CLEAR (1u << 10)
+#define bit_cpu_SERIALIZE (1u << 14)
+#define bit_cpu_HYBRID (1u << 15)
+#define bit_cpu_TSXLDTRK (1u << 16)
#define bit_cpu_PCONFIG (1u << 18)
#define bit_cpu_IBT (1u << 20)
#define bit_cpu_IBRS_IBPB (1u << 26)
#define bit_cpu_STIBP (1u << 27)
-#define bit_cpu_CAPABILITIES (1u << 29)
+#define bit_cpu_L1D_FLUSH (1u << 28)
+#define bit_cpu_ARCH_CAPABILITIES (1u << 29)
+#define bit_cpu_CORE_CAPABILITIES (1u << 30)
#define bit_cpu_SSBD (1u << 31)
/* COMMON_CPUID_INDEX_80000001. */
@@ -545,6 +565,11 @@ extern const struct cpu_features *__get_cpu_features (void)
/* EBX. */
#define bit_cpu_WBNOINVD (1u << 9)
+/* COMMON_CPUID_INDEX_7_ECX_1. */
+
+/* EAX. */
+#define bit_cpu_AVX512_BF16 (1u << 5)
+
/* COMMON_CPUID_INDEX_1. */
/* ECX. */
@@ -662,17 +687,26 @@ extern const struct cpu_features *__get_cpu_features (void)
#define index_cpu_CLDEMOTE COMMON_CPUID_INDEX_7
#define index_cpu_MOVDIRI COMMON_CPUID_INDEX_7
#define index_cpu_MOVDIR64B COMMON_CPUID_INDEX_7
+#define index_cpu_ENQCMD COMMON_CPUID_INDEX_7
#define index_cpu_SGX_LC COMMON_CPUID_INDEX_7
+#define index_cpu_PKS COMMON_CPUID_INDEX_7
/* EDX. */
#define index_cpu_AVX512_4VNNIW COMMON_CPUID_INDEX_7
#define index_cpu_AVX512_4FMAPS COMMON_CPUID_INDEX_7
#define index_cpu_FSRM COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VP2INTERSECT COMMON_CPUID_INDEX_7
+#define index_cpu_MD_CLEAR COMMON_CPUID_INDEX_7
+#define index_cpu_SERIALIZE COMMON_CPUID_INDEX_7
+#define index_cpu_HYBRID COMMON_CPUID_INDEX_7
+#define index_cpu_TSXLDTRK COMMON_CPUID_INDEX_7
#define index_cpu_PCONFIG COMMON_CPUID_INDEX_7
#define index_cpu_IBT COMMON_CPUID_INDEX_7
#define index_cpu_IBRS_IBPB COMMON_CPUID_INDEX_7
#define index_cpu_STIBP COMMON_CPUID_INDEX_7
-#define index_cpu_CAPABILITIES COMMON_CPUID_INDEX_7
+#define index_cpu_L1D_FLUSH COMMON_CPUID_INDEX_7
+#define index_cpu_ARCH_CAPABILITIES COMMON_CPUID_INDEX_7
+#define index_cpu_CORE_CAPABILITIES COMMON_CPUID_INDEX_7
#define index_cpu_SSBD COMMON_CPUID_INDEX_7
/* COMMON_CPUID_INDEX_80000001. */
@@ -713,6 +747,11 @@ extern const struct cpu_features *__get_cpu_features (void)
/* EBX. */
#define index_cpu_WBNOINVD COMMON_CPUID_INDEX_80000008
+/* COMMON_CPUID_INDEX_7_ECX_1. */
+
+/* EAX. */
+#define index_cpu_AVX512_BF16 COMMON_CPUID_INDEX_7_ECX_1
+
/* COMMON_CPUID_INDEX_1. */
/* ECX. */
@@ -830,17 +869,26 @@ extern const struct cpu_features *__get_cpu_features (void)
#define reg_CLDEMOTE ecx
#define reg_MOVDIRI ecx
#define reg_MOVDIR64B ecx
+#define reg_ENQCMD ecx
#define reg_SGX_LC ecx
+#define reg_PKS ecx
/* EDX. */
#define reg_AVX512_4VNNIW edx
#define reg_AVX512_4FMAPS edx
#define reg_FSRM edx
+#define reg_AVX512_VP2INTERSECT edx
+#define reg_MD_CLEAR edx
+#define reg_SERIALIZE edx
+#define reg_HYBRID edx
+#define reg_TSXLDTRK edx
#define reg_PCONFIG edx
#define reg_IBT edx
#define reg_IBRS_IBPB edx
#define reg_STIBP edx
-#define reg_CAPABILITIES edx
+#define reg_L1D_FLUSH edx
+#define reg_ARCH_CAPABILITIES edx
+#define reg_CORE_CAPABILITIES edx
#define reg_SSBD edx
/* COMMON_CPUID_INDEX_80000001. */
@@ -881,6 +929,11 @@ extern const struct cpu_features *__get_cpu_features (void)
/* EBX. */
#define reg_WBNOINVD ebx
+/* COMMON_CPUID_INDEX_7_ECX_1. */
+
+/* EAX. */
+#define reg_AVX512_BF16 eax
+
/* FEATURE_INDEX_2. */
#define bit_arch_I586 (1u << 0)
#define bit_arch_I686 (1u << 1)
@@ -174,15 +174,24 @@ do_test (void)
CHECK_CPU_FEATURE (CLDEMOTE);
CHECK_CPU_FEATURE (MOVDIRI);
CHECK_CPU_FEATURE (MOVDIR64B);
+ CHECK_CPU_FEATURE (ENQCMD);
CHECK_CPU_FEATURE (SGX_LC);
+ CHECK_CPU_FEATURE (PKS);
CHECK_CPU_FEATURE (AVX512_4VNNIW);
CHECK_CPU_FEATURE (AVX512_4FMAPS);
CHECK_CPU_FEATURE (FSRM);
+ CHECK_CPU_FEATURE (AVX512_VP2INTERSECT);
+ CHECK_CPU_FEATURE (MD_CLEAR);
+ CHECK_CPU_FEATURE (SERIALIZE);
+ CHECK_CPU_FEATURE (HYBRID);
+ CHECK_CPU_FEATURE (TSXLDTRK);
CHECK_CPU_FEATURE (PCONFIG);
CHECK_CPU_FEATURE (IBT);
CHECK_CPU_FEATURE (IBRS_IBPB);
CHECK_CPU_FEATURE (STIBP);
- CHECK_CPU_FEATURE (CAPABILITIES);
+ CHECK_CPU_FEATURE (L1D_FLUSH);
+ CHECK_CPU_FEATURE (ARCH_CAPABILITIES);
+ CHECK_CPU_FEATURE (CORE_CAPABILITIES);
CHECK_CPU_FEATURE (SSBD);
CHECK_CPU_FEATURE (LAHF64_SAHF64);
CHECK_CPU_FEATURE (SVM);
@@ -204,6 +213,7 @@ do_test (void)
CHECK_CPU_FEATURE (XSAVES);
CHECK_CPU_FEATURE (INVARIANT_TSC);
CHECK_CPU_FEATURE (WBNOINVD);
+ CHECK_CPU_FEATURE (AVX512_BF16);
printf ("Usable CPU features:\n");
CHECK_CPU_FEATURE_USABLE (SSE3);
@@ -267,6 +277,7 @@ do_test (void)
CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
CHECK_CPU_FEATURE_USABLE (FSRM);
+ CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
CHECK_CPU_FEATURE_USABLE (LAHF64_SAHF64);
CHECK_CPU_FEATURE_USABLE (LZCNT);
CHECK_CPU_FEATURE_USABLE (SSE4A);
@@ -282,6 +293,7 @@ do_test (void)
CHECK_CPU_FEATURE_USABLE (XSAVES);
CHECK_CPU_FEATURE_USABLE (INVARIANT_TSC);
CHECK_CPU_FEATURE_USABLE (WBNOINVD);
+ CHECK_CPU_FEATURE_USABLE (AVX512_BF16);
return 0;
}