[RFC] X86_64 Avx2 Detection
Commit Message
From: Sihai Yao <sihai.ysh@alibaba-inc.com>
This patch sets bit_AVX2_Usable of __cpu_features.feature by checking
COMMON_CPUID_INDEX_7 for Haswell. Architecture related assembler file
can use this bit to determine calling path.
---
ChangeLog | 9 +++++++++
sysdeps/x86_64/multiarch/ifunc-defines.sym | 2 ++
sysdeps/x86_64/multiarch/init-arch.c | 3 +++
sysdeps/x86_64/multiarch/init-arch.h | 9 +++++++++
4 files changed, 23 insertions(+)
Comments
On Fri, Apr 4, 2014 at 12:16 AM, <ling.ma.program@gmail.com> wrote:
> From: Sihai Yao <sihai.ysh@alibaba-inc.com>
>
> This patch sets bit_AVX2_Usable of __cpu_features.feature by checking
> COMMON_CPUID_INDEX_7 for Haswell. Architecture related assembler file
> can use this bit to determine calling path.
>
> ---
> ChangeLog | 9 +++++++++
> sysdeps/x86_64/multiarch/ifunc-defines.sym | 2 ++
> sysdeps/x86_64/multiarch/init-arch.c | 3 +++
> sysdeps/x86_64/multiarch/init-arch.h | 9 +++++++++
> 4 files changed, 23 insertions(+)
>
> diff --git a/ChangeLog b/ChangeLog
> index da8ea6d..ab23a3a 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,12 @@
> +2014-04-04 Sihai Yao <sihai.ysh@alibaba-inc.com>
> +
> + * sysdeps/x86_64/multiarch/ifunc-defines.sym: Add COMMON_CPU_INDEX_7 and
> + FEATURE_INDEX_7.
> + * sysdeps/x86_64/multiarch/init-arch.c: Add AVX2 detection from cpu
> + features word of COMMON_CPUID_INDEX_7.
> + * sysdeps/x86_64/multiarch/init-arch.h: Add bit_AVX2_Usable for memset.S
> + to determine calling path.
> +
> 2014-04-03 David Svoboda <svoboda@cert.org>
>
> [BZ #5666]
> diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
> index eb1538a..448b8c4 100644
> --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
> +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym
> @@ -17,4 +17,6 @@ FEATURE_OFFSET offsetof (struct cpu_features, feature)
> FEATURE_SIZE sizeof (unsigned int)
>
> COMMON_CPUID_INDEX_1
> +COMMON_CPUID_INDEX_7
> FEATURE_INDEX_1
> +FEATURE_INDEX_7
> diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
> index db74d97..2bbc5eb 100644
> --- a/sysdeps/x86_64/multiarch/init-arch.c
> +++ b/sysdeps/x86_64/multiarch/init-arch.c
> @@ -106,6 +106,7 @@ __init_cpu_features (void)
> case 0x2c:
> case 0x2e:
> case 0x2f:
> + case 0x3c:
This isn't mentioned in ChangeLog. IA Optimization reference manual
shows that 0x45 and 0x46 are also Haswell. This should be in a separate
patch.
> /* Rep string instructions, copy backward, unaligned loads
> and pminub are fast on Intel Core i3, i5 and i7. */
> #if index_Fast_Rep_String != index_Fast_Copy_Backward
> @@ -153,6 +154,8 @@ __init_cpu_features (void)
> __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
> __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
> __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
> + if (CPUID_AVX2)
> + __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable;
^^^^^^^^^^
This should be inside if (CPUID_OSXSAVE), similar to bit_AVX_Usable.
> /* Can we call xgetbv? */
> if (CPUID_OSXSAVE)
> diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
> index 793707a..e453ccc 100644
> --- a/sysdeps/x86_64/multiarch/init-arch.h
> +++ b/sysdeps/x86_64/multiarch/init-arch.h
> @@ -24,6 +24,7 @@
> #define bit_FMA_Usable (1 << 7)
> #define bit_FMA4_Usable (1 << 8)
> #define bit_Slow_SSE4_2 (1 << 9)
> +#define bit_AVX2_Usable (1 << 10)
>
> /* CPUID Feature flags. */
>
> @@ -40,6 +41,7 @@
>
> /* COMMON_CPUID_INDEX_7. */
> #define bit_RTM (1 << 11)
> +#define bit_AVX2 (1 << 5)
>
> /* XCR0 Feature flags. */
> #define bit_XMM_state (1 << 1)
> @@ -54,6 +56,7 @@
> # define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
> # define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
> # define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
> +# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
>
> # define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
> # define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
> @@ -64,6 +67,7 @@
> # define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
> # define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
> # define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
> +# define index_AVX2_Usable FEATURE_INDEX_7*FEATURE_SIZE
>
> #else /* __ASSEMBLER__ */
>
> @@ -81,6 +85,7 @@ enum
> enum
> {
> FEATURE_INDEX_1 = 0,
> + FEATURE_INDEX_7,
> /* Keep the following line at the end. */
> FEATURE_INDEX_MAX
> };
> @@ -145,6 +150,8 @@ extern const struct cpu_features *__get_cpu_features (void)
> HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
> # define CPUID_RTM \
> HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
> +# define CPUID_AVX2 \
> + HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
>
> /* HAS_* evaluates to true if we may use the feature at runtime. */
> # define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
> @@ -153,6 +160,7 @@ extern const struct cpu_features *__get_cpu_features (void)
> # define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
> # define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
> # define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
> +# define HAS_AVX2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
>
> # define index_Fast_Rep_String FEATURE_INDEX_1
> # define index_Fast_Copy_Backward FEATURE_INDEX_1
> @@ -163,6 +171,7 @@ extern const struct cpu_features *__get_cpu_features (void)
> # define index_FMA_Usable FEATURE_INDEX_1
> # define index_FMA4_Usable FEATURE_INDEX_1
> # define index_Slow_SSE4_2 FEATURE_INDEX_1
> +# define index_AVX2_Usable FEATURE_INDEX_7
>
> # define HAS_ARCH_FEATURE(name) \
> ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
> --
> 1.8.1.4
>
@@ -1,3 +1,12 @@
+2014-04-04 Sihai Yao <sihai.ysh@alibaba-inc.com>
+
+ * sysdeps/x86_64/multiarch/ifunc-defines.sym: Add COMMON_CPU_INDEX_7 and
+ FEATURE_INDEX_7.
+ * sysdeps/x86_64/multiarch/init-arch.c: Add AVX2 detection from cpu
+ features word of COMMON_CPUID_INDEX_7.
+ * sysdeps/x86_64/multiarch/init-arch.h: Add bit_AVX2_Usable for memset.S
+ to determine calling path.
+
2014-04-03 David Svoboda <svoboda@cert.org>
[BZ #5666]
@@ -17,4 +17,6 @@ FEATURE_OFFSET offsetof (struct cpu_features, feature)
FEATURE_SIZE sizeof (unsigned int)
COMMON_CPUID_INDEX_1
+COMMON_CPUID_INDEX_7
FEATURE_INDEX_1
+FEATURE_INDEX_7
@@ -106,6 +106,7 @@ __init_cpu_features (void)
case 0x2c:
case 0x2e:
case 0x2f:
+ case 0x3c:
/* Rep string instructions, copy backward, unaligned loads
and pminub are fast on Intel Core i3, i5 and i7. */
#if index_Fast_Rep_String != index_Fast_Copy_Backward
@@ -153,6 +154,8 @@ __init_cpu_features (void)
__cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
__cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
__cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
+ if (CPUID_AVX2)
+ __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable;
/* Can we call xgetbv? */
if (CPUID_OSXSAVE)
@@ -24,6 +24,7 @@
#define bit_FMA_Usable (1 << 7)
#define bit_FMA4_Usable (1 << 8)
#define bit_Slow_SSE4_2 (1 << 9)
+#define bit_AVX2_Usable (1 << 10)
/* CPUID Feature flags. */
@@ -40,6 +41,7 @@
/* COMMON_CPUID_INDEX_7. */
#define bit_RTM (1 << 11)
+#define bit_AVX2 (1 << 5)
/* XCR0 Feature flags. */
#define bit_XMM_state (1 << 1)
@@ -54,6 +56,7 @@
# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
# define index_AVX COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_AVX2 COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
@@ -64,6 +67,7 @@
# define index_FMA_Usable FEATURE_INDEX_1*FEATURE_SIZE
# define index_FMA4_Usable FEATURE_INDEX_1*FEATURE_SIZE
# define index_Slow_SSE4_2 FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX2_Usable FEATURE_INDEX_7*FEATURE_SIZE
#else /* __ASSEMBLER__ */
@@ -81,6 +85,7 @@ enum
enum
{
FEATURE_INDEX_1 = 0,
+ FEATURE_INDEX_7,
/* Keep the following line at the end. */
FEATURE_INDEX_MAX
};
@@ -145,6 +150,8 @@ extern const struct cpu_features *__get_cpu_features (void)
HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
# define CPUID_RTM \
HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
+# define CPUID_AVX2 \
+ HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
/* HAS_* evaluates to true if we may use the feature at runtime. */
# define HAS_SSE2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
@@ -153,6 +160,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define HAS_SSE4_1 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
# define HAS_RTM HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
+# define HAS_AVX2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
# define index_Fast_Rep_String FEATURE_INDEX_1
# define index_Fast_Copy_Backward FEATURE_INDEX_1
@@ -163,6 +171,7 @@ extern const struct cpu_features *__get_cpu_features (void)
# define index_FMA_Usable FEATURE_INDEX_1
# define index_FMA4_Usable FEATURE_INDEX_1
# define index_Slow_SSE4_2 FEATURE_INDEX_1
+# define index_AVX2_Usable FEATURE_INDEX_7
# define HAS_ARCH_FEATURE(name) \
((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)