x86: Update CPU feature detection

Message ID 20200617144348.301851-1-hjl.tools@gmail.com
State Superseded
Headers
Series x86: Update CPU feature detection |

Commit Message

H.J. Lu June 17, 2020, 2:43 p.m. UTC
  1. Add COMMON_CPUID_INDEX_7_ECX_1 for AVX512_BF16.
2. Detect ENQCMD, PKS, AVX512_VP2INTERSECT, MD_CLEAR, SERIALIZE, HYBRID,
TSXLDTRK, L1D_FLUSH, CORE_CAPABILITIES and AVX512_BF16.
3. Rename CAPABILITIES to ARCH_CAPABILITIES.
4. Check if AVX512_VP2INTERSECT and AVX512_BF16 are usable.
5. Update CPU feature detection test.
---
 sysdeps/x86/cpu-features.c         | 26 ++++++++++---
 sysdeps/x86/cpu-features.h         | 59 ++++++++++++++++++++++++++++--
 sysdeps/x86/tst-get-cpu-features.c | 14 ++++++-
 3 files changed, 90 insertions(+), 9 deletions(-)
  

Comments

Florian Weimer June 18, 2020, 8:09 a.m. UTC | #1
* H. J. Lu via Libc-alpha:

> 1. Add COMMON_CPUID_INDEX_7_ECX_1 for AVX512_BF16.

Do you expect similar changes in the future?  This changes the layout of
struct cpu_features.

If __x86_get_cpu_features is exported, such a change will require a new
symbol version for __x86_get_cpu_features and can therefore only be made
at a release boundary.  Even then, statically linked libraries will
silently use the old layout of struct cpu_features, silently giving
incorrect results if linked against a newer glibc version.

Thanks,
Florian
  

Patch

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 916bbf5242..489c370348 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -90,11 +90,18 @@  get_common_indices (struct cpu_features *cpu_features,
     }
 
   if (cpu_features->basic.max_cpuid >= 7)
-    __cpuid_count (7, 0,
-		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
-		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
-		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
-		   cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+    {
+      __cpuid_count (7, 0,
+		     cpu_features->cpuid[COMMON_CPUID_INDEX_7].eax,
+		     cpu_features->cpuid[COMMON_CPUID_INDEX_7].ebx,
+		     cpu_features->cpuid[COMMON_CPUID_INDEX_7].ecx,
+		     cpu_features->cpuid[COMMON_CPUID_INDEX_7].edx);
+      __cpuid_count (7, 1,
+		     cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].eax,
+		     cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ebx,
+		     cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].ecx,
+		     cpu_features->cpuid[COMMON_CPUID_INDEX_7_ECX_1].edx);
+    }
 
   if (cpu_features->basic.max_cpuid >= 0xd)
     __cpuid_count (0xd, 1,
@@ -215,6 +222,15 @@  get_common_indices (struct cpu_features *cpu_features,
 		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_VPOPCNTDQ))
 		    cpu_features->feature[index_arch_AVX512_VPOPCNTDQ_Usable]
 		      |= bit_arch_AVX512_VPOPCNTDQ_Usable;
+		  /* Determine if AVX512_VP2INTERSECT is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features,
+					  AVX512_VP2INTERSECT))
+		    cpu_features->feature[index_arch_AVX512_VP2INTERSECT_Usable]
+		      |= bit_arch_AVX512_VP2INTERSECT_Usable;
+		  /* Determine if AVX512_BF16 is usable.  */
+		  if (CPU_FEATURES_CPU_P (cpu_features, AVX512_BF16))
+		    cpu_features->feature[index_arch_AVX512_BF16_Usable]
+		      |= bit_arch_AVX512_BF16_Usable;
 		}
 	    }
 	}
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index a5cc55d8b6..de20044417 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -36,6 +36,7 @@  enum
   COMMON_CPUID_INDEX_D_ECX_1,
   COMMON_CPUID_INDEX_80000007,
   COMMON_CPUID_INDEX_80000008,
+  COMMON_CPUID_INDEX_7_ECX_1,
   /* Keep the following line at the end.  */
   COMMON_CPUID_INDEX_MAX
 };
@@ -142,6 +143,8 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define bit_arch_VPCLMULQDQ_Usable		(1u << 20)
 #define bit_arch_XOP_Usable			(1u << 21)
 #define bit_arch_XSAVEC_Usable			(1u << 22)
+#define bit_arch_AVX512_VP2INTERSECT_Usable	(1u << 23)
+#define bit_arch_AVX512_BF16_Usable		(1u << 24)
 
 #define index_arch_AVX_Usable			FEATURE_INDEX_1
 #define index_arch_AVX2_Usable			FEATURE_INDEX_1
@@ -166,6 +169,8 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define index_arch_VPCLMULQDQ_Usable		FEATURE_INDEX_1
 #define index_arch_XOP_Usable			FEATURE_INDEX_1
 #define index_arch_XSAVEC_Usable		FEATURE_INDEX_1
+#define index_arch_AVX512_VP2INTERSECT_Usable	FEATURE_INDEX_1
+#define index_arch_AVX512_BF16_Usable		FEATURE_INDEX_1
 
 /* Unused.  Compiler will optimize them out.  */
 #define bit_arch_SSE3_Usable			(1u << 0)
@@ -355,6 +360,7 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define need_arch_feature_AVX512_4VNNIW		1
 #define need_arch_feature_AVX512_4FMAPS		1
 #define need_arch_feature_FSRM			0
+#define need_arch_feature_AVX512_VP2INTERSECT	1
 
 /* COMMON_CPUID_INDEX_80000001.  */
 
@@ -375,6 +381,11 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define need_arch_feature_INVARIANT_TSC		0
 #define need_arch_feature_WBNOINVD		0
 
+/* COMMON_CPUID_INDEX_7_ECX_1.  */
+
+/* EAX.  */
+#define need_arch_feature_AVX512_BF16		1
+
 /* CPU features.  */
 
 /* COMMON_CPUID_INDEX_1.  */
@@ -494,17 +505,26 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define bit_cpu_CLDEMOTE	(1u << 25)
 #define bit_cpu_MOVDIRI		(1u << 27)
 #define bit_cpu_MOVDIR64B	(1u << 28)
+#define bit_cpu_ENQCMD		(1u << 29)
 #define bit_cpu_SGX_LC		(1u << 30)
+#define bit_cpu_PKS		(1u << 31)
 
 /* EDX.  */
 #define bit_cpu_AVX512_4VNNIW	(1u << 2)
 #define bit_cpu_AVX512_4FMAPS	(1u << 3)
 #define bit_cpu_FSRM		(1u << 4)
+#define bit_cpu_AVX512_VP2INTERSECT (1u << 8)
+#define bit_cpu_MD_CLEAR	(1u << 10)
+#define bit_cpu_SERIALIZE	(1u << 14)
+#define bit_cpu_HYBRID		(1u << 15)
+#define bit_cpu_TSXLDTRK	(1u << 16)
 #define bit_cpu_PCONFIG		(1u << 18)
 #define bit_cpu_IBT		(1u << 20)
 #define bit_cpu_IBRS_IBPB	(1u << 26)
 #define bit_cpu_STIBP		(1u << 27)
-#define bit_cpu_CAPABILITIES	(1u << 29)
+#define bit_cpu_L1D_FLUSH	(1u << 28)
+#define bit_cpu_ARCH_CAPABILITIES (1u << 29)
+#define bit_cpu_CORE_CAPABILITIES (1u << 30)
 #define bit_cpu_SSBD		(1u << 31)
 
 /* COMMON_CPUID_INDEX_80000001.  */
@@ -545,6 +565,11 @@  extern const struct cpu_features *__get_cpu_features (void)
 /* EBX.  */
 #define bit_cpu_WBNOINVD	(1u << 9)
 
+/* COMMON_CPUID_INDEX_7_ECX_1.  */
+
+/* EAX.  */
+#define bit_cpu_AVX512_BF16	(1u << 5)
+
 /* COMMON_CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -662,17 +687,26 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define index_cpu_CLDEMOTE	COMMON_CPUID_INDEX_7
 #define index_cpu_MOVDIRI	COMMON_CPUID_INDEX_7
 #define index_cpu_MOVDIR64B	COMMON_CPUID_INDEX_7
+#define index_cpu_ENQCMD	COMMON_CPUID_INDEX_7
 #define index_cpu_SGX_LC	COMMON_CPUID_INDEX_7
+#define index_cpu_PKS		COMMON_CPUID_INDEX_7
 
 /* EDX.  */
 #define index_cpu_AVX512_4VNNIW COMMON_CPUID_INDEX_7
 #define index_cpu_AVX512_4FMAPS	COMMON_CPUID_INDEX_7
 #define index_cpu_FSRM		COMMON_CPUID_INDEX_7
+#define index_cpu_AVX512_VP2INTERSECT COMMON_CPUID_INDEX_7
+#define index_cpu_MD_CLEAR	COMMON_CPUID_INDEX_7
+#define index_cpu_SERIALIZE	COMMON_CPUID_INDEX_7
+#define index_cpu_HYBRID	COMMON_CPUID_INDEX_7
+#define index_cpu_TSXLDTRK	COMMON_CPUID_INDEX_7
 #define index_cpu_PCONFIG	COMMON_CPUID_INDEX_7
 #define index_cpu_IBT		COMMON_CPUID_INDEX_7
 #define index_cpu_IBRS_IBPB	COMMON_CPUID_INDEX_7
 #define index_cpu_STIBP		COMMON_CPUID_INDEX_7
-#define index_cpu_CAPABILITIES	COMMON_CPUID_INDEX_7
+#define index_cpu_L1D_FLUSH	COMMON_CPUID_INDEX_7
+#define index_cpu_ARCH_CAPABILITIES COMMON_CPUID_INDEX_7
+#define index_cpu_CORE_CAPABILITIES COMMON_CPUID_INDEX_7
 #define index_cpu_SSBD		COMMON_CPUID_INDEX_7
 
 /* COMMON_CPUID_INDEX_80000001.  */
@@ -713,6 +747,11 @@  extern const struct cpu_features *__get_cpu_features (void)
 /* EBX.  */
 #define index_cpu_WBNOINVD	COMMON_CPUID_INDEX_80000008
 
+/* COMMON_CPUID_INDEX_7_ECX_1.  */
+
+/* EAX.  */
+#define index_cpu_AVX512_BF16	COMMON_CPUID_INDEX_7_ECX_1
+
 /* COMMON_CPUID_INDEX_1.  */
 
 /* ECX.  */
@@ -830,17 +869,26 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define reg_CLDEMOTE		ecx
 #define reg_MOVDIRI		ecx
 #define reg_MOVDIR64B		ecx
+#define reg_ENQCMD		ecx
 #define reg_SGX_LC		ecx
+#define reg_PKS			ecx
 
 /* EDX.  */
 #define reg_AVX512_4VNNIW	edx
 #define reg_AVX512_4FMAPS	edx
 #define reg_FSRM		edx
+#define reg_AVX512_VP2INTERSECT	edx
+#define reg_MD_CLEAR		edx
+#define reg_SERIALIZE		edx
+#define reg_HYBRID		edx
+#define reg_TSXLDTRK		edx
 #define reg_PCONFIG		edx
 #define reg_IBT			edx
 #define reg_IBRS_IBPB		edx
 #define reg_STIBP		edx
-#define reg_CAPABILITIES	edx
+#define reg_L1D_FLUSH		edx
+#define reg_ARCH_CAPABILITIES	edx
+#define reg_CORE_CAPABILITIES	edx
 #define reg_SSBD		edx
 
 /* COMMON_CPUID_INDEX_80000001.  */
@@ -881,6 +929,11 @@  extern const struct cpu_features *__get_cpu_features (void)
 /* EBX.  */
 #define reg_WBNOINVD		ebx
 
+/* COMMON_CPUID_INDEX_7_ECX_1.  */
+
+/* EAX.  */
+#define reg_AVX512_BF16		eax
+
 /* FEATURE_INDEX_2.  */
 #define bit_arch_I586				(1u << 0)
 #define bit_arch_I686				(1u << 1)
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
index 0dcb906a86..815fdb664a 100644
--- a/sysdeps/x86/tst-get-cpu-features.c
+++ b/sysdeps/x86/tst-get-cpu-features.c
@@ -174,15 +174,24 @@  do_test (void)
   CHECK_CPU_FEATURE (CLDEMOTE);
   CHECK_CPU_FEATURE (MOVDIRI);
   CHECK_CPU_FEATURE (MOVDIR64B);
+  CHECK_CPU_FEATURE (ENQCMD);
   CHECK_CPU_FEATURE (SGX_LC);
+  CHECK_CPU_FEATURE (PKS);
   CHECK_CPU_FEATURE (AVX512_4VNNIW);
   CHECK_CPU_FEATURE (AVX512_4FMAPS);
   CHECK_CPU_FEATURE (FSRM);
+  CHECK_CPU_FEATURE (AVX512_VP2INTERSECT);
+  CHECK_CPU_FEATURE (MD_CLEAR);
+  CHECK_CPU_FEATURE (SERIALIZE);
+  CHECK_CPU_FEATURE (HYBRID);
+  CHECK_CPU_FEATURE (TSXLDTRK);
   CHECK_CPU_FEATURE (PCONFIG);
   CHECK_CPU_FEATURE (IBT);
   CHECK_CPU_FEATURE (IBRS_IBPB);
   CHECK_CPU_FEATURE (STIBP);
-  CHECK_CPU_FEATURE (CAPABILITIES);
+  CHECK_CPU_FEATURE (L1D_FLUSH);
+  CHECK_CPU_FEATURE (ARCH_CAPABILITIES);
+  CHECK_CPU_FEATURE (CORE_CAPABILITIES);
   CHECK_CPU_FEATURE (SSBD);
   CHECK_CPU_FEATURE (LAHF64_SAHF64);
   CHECK_CPU_FEATURE (SVM);
@@ -204,6 +213,7 @@  do_test (void)
   CHECK_CPU_FEATURE (XSAVES);
   CHECK_CPU_FEATURE (INVARIANT_TSC);
   CHECK_CPU_FEATURE (WBNOINVD);
+  CHECK_CPU_FEATURE (AVX512_BF16);
 
   printf ("Usable CPU features:\n");
   CHECK_CPU_FEATURE_USABLE (SSE3);
@@ -267,6 +277,7 @@  do_test (void)
   CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
   CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
   CHECK_CPU_FEATURE_USABLE (FSRM);
+  CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
   CHECK_CPU_FEATURE_USABLE (LAHF64_SAHF64);
   CHECK_CPU_FEATURE_USABLE (LZCNT);
   CHECK_CPU_FEATURE_USABLE (SSE4A);
@@ -282,6 +293,7 @@  do_test (void)
   CHECK_CPU_FEATURE_USABLE (XSAVES);
   CHECK_CPU_FEATURE_USABLE (INVARIANT_TSC);
   CHECK_CPU_FEATURE_USABLE (WBNOINVD);
+  CHECK_CPU_FEATURE_USABLE (AVX512_BF16);
 
   return 0;
 }