diff mbox

[RFC] X86_64 Avx2 Detection

Message ID CAMe9rOr_6A9M0jmK4F8m2w9URMQkC38Oy7u_U4LBnZf5y0JTLg@mail.gmail.com
State Committed
Headers show

Commit Message

H.J. Lu April 9, 2014, 4:28 p.m. UTC
On Wed, Apr 9, 2014 at 9:12 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Fri, Apr 4, 2014 at 12:16 AM,  <ling.ma.program@gmail.com> wrote:
>> From: Sihai Yao  <sihai.ysh@alibaba-inc.com>
>>
>> This patch sets bit_AVX2_Usable of __cpu_features.feature by checking
>> COMMON_CPUID_INDEX_7 for Haswell. Architecture related assembler file
>> can use this bit to determine calling path.
>>
>> ---
>>  ChangeLog                                  | 9 +++++++++
>>  sysdeps/x86_64/multiarch/ifunc-defines.sym | 2 ++
>>  sysdeps/x86_64/multiarch/init-arch.c       | 3 +++
>>  sysdeps/x86_64/multiarch/init-arch.h       | 9 +++++++++
>>  4 files changed, 23 insertions(+)
>>
>> diff --git a/ChangeLog b/ChangeLog
>> index da8ea6d..ab23a3a 100644
>> --- a/ChangeLog
>> +++ b/ChangeLog
>> @@ -1,3 +1,12 @@
>> +2014-04-04  Sihai Yao  <sihai.ysh@alibaba-inc.com>
>> +
>> +       * sysdeps/x86_64/multiarch/ifunc-defines.sym: Add COMMON_CPU_INDEX_7 and
>> +       FEATURE_INDEX_7.
>> +       * sysdeps/x86_64/multiarch/init-arch.c: Add AVX2 detection from cpu
>> +       features word of COMMON_CPUID_INDEX_7.
>> +       * sysdeps/x86_64/multiarch/init-arch.h: Add bit_AVX2_Usable for memset.S
>> +       to determine calling path.
>> +
>>  2014-04-03  David Svoboda  <svoboda@cert.org>
>>
>>         [BZ #5666]
>> diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
>> index eb1538a..448b8c4 100644
>> --- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
>> +++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym
>> @@ -17,4 +17,6 @@ FEATURE_OFFSET                offsetof (struct cpu_features, feature)
>>  FEATURE_SIZE           sizeof (unsigned int)
>>
>>  COMMON_CPUID_INDEX_1
>> +COMMON_CPUID_INDEX_7
>>  FEATURE_INDEX_1
>> +FEATURE_INDEX_7
>> diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
>> index db74d97..2bbc5eb 100644
>> --- a/sysdeps/x86_64/multiarch/init-arch.c
>> +++ b/sysdeps/x86_64/multiarch/init-arch.c
>> @@ -106,6 +106,7 @@ __init_cpu_features (void)
>>             case 0x2c:
>>             case 0x2e:
>>             case 0x2f:
>> +           case 0x3c:
>
> This isn't mentioned in ChangeLog.   IA Optimization reference manual
> shows that 0x45 and 0x46 are also Haswell.  This should be in a separate
> patch.
>
>>               /* Rep string instructions, copy backward, unaligned loads
>>                  and pminub are fast on Intel Core i3, i5 and i7.  */
>>  #if index_Fast_Rep_String != index_Fast_Copy_Backward
>> @@ -153,6 +154,8 @@ __init_cpu_features (void)
>>                    __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ebx,
>>                    __cpu_features.cpuid[COMMON_CPUID_INDEX_7].ecx,
>>                    __cpu_features.cpuid[COMMON_CPUID_INDEX_7].edx);
>> +  if (CPUID_AVX2)
>> +    __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable;
> ^^^^^^^^^^
>
> This should be inside if (CPUID_OSXSAVE), similar to bit_AVX_Usable.
>
>>    /* Can we call xgetbv?  */
>>    if (CPUID_OSXSAVE)
>> diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
>> index 793707a..e453ccc 100644
>> --- a/sysdeps/x86_64/multiarch/init-arch.h
>> +++ b/sysdeps/x86_64/multiarch/init-arch.h
>> @@ -24,6 +24,7 @@
>>  #define bit_FMA_Usable                 (1 << 7)
>>  #define bit_FMA4_Usable                        (1 << 8)
>>  #define bit_Slow_SSE4_2                        (1 << 9)
>> +#define bit_AVX2_Usable                        (1 << 10)
>>
>>  /* CPUID Feature flags.  */
>>
>> @@ -40,6 +41,7 @@
>>
>>  /* COMMON_CPUID_INDEX_7.  */
>>  #define bit_RTM                (1 << 11)
>> +#define bit_AVX2       (1 << 5)
>>
>>  /* XCR0 Feature flags.  */
>>  #define bit_XMM_state  (1 << 1)
>> @@ -54,6 +56,7 @@
>>  # define index_SSE4_1  COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
>>  # define index_SSE4_2  COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
>>  # define index_AVX     COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
>> +# define index_AVX2    COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
>>
>>  # define index_Fast_Rep_String         FEATURE_INDEX_1*FEATURE_SIZE
>>  # define index_Fast_Copy_Backward      FEATURE_INDEX_1*FEATURE_SIZE
>> @@ -64,6 +67,7 @@
>>  # define index_FMA_Usable              FEATURE_INDEX_1*FEATURE_SIZE
>>  # define index_FMA4_Usable             FEATURE_INDEX_1*FEATURE_SIZE
>>  # define index_Slow_SSE4_2             FEATURE_INDEX_1*FEATURE_SIZE
>> +# define index_AVX2_Usable             FEATURE_INDEX_7*FEATURE_SIZE
>>
>>  #else  /* __ASSEMBLER__ */
>>
>> @@ -81,6 +85,7 @@ enum
>>  enum
>>    {
>>      FEATURE_INDEX_1 = 0,
>> +    FEATURE_INDEX_7,
>>      /* Keep the following line at the end.  */
>>      FEATURE_INDEX_MAX
>>    };
>> @@ -145,6 +150,8 @@ extern const struct cpu_features *__get_cpu_features (void)
>>    HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
>>  # define CPUID_RTM \
>>    HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
>> +# define CPUID_AVX2 \
>> +  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
>>
>>  /* HAS_* evaluates to true if we may use the feature at runtime.  */
>>  # define HAS_SSE2      HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
>> @@ -153,6 +160,7 @@ extern const struct cpu_features *__get_cpu_features (void)
>>  # define HAS_SSE4_1    HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
>>  # define HAS_SSE4_2    HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
>>  # define HAS_RTM       HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
>> +# define HAS_AVX2      HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
>>
>>  # define index_Fast_Rep_String         FEATURE_INDEX_1
>>  # define index_Fast_Copy_Backward      FEATURE_INDEX_1
>> @@ -163,6 +171,7 @@ extern const struct cpu_features *__get_cpu_features (void)
>>  # define index_FMA_Usable              FEATURE_INDEX_1
>>  # define index_FMA4_Usable             FEATURE_INDEX_1
>>  # define index_Slow_SSE4_2             FEATURE_INDEX_1
>> +# define index_AVX2_Usable             FEATURE_INDEX_7
>>
>>  # define HAS_ARCH_FEATURE(name) \
>>    ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
>> --
>> 1.8.1.4

Here is the updated patch.   We don't need FEATURE_INDEX_7.
diff mbox

Patch

diff --git a/sysdeps/x86_64/multiarch/ifunc-defines.sym b/sysdeps/x86_64/multiarch/ifunc-defines.sym
index eb1538a..a410d88 100644
--- a/sysdeps/x86_64/multiarch/ifunc-defines.sym
+++ b/sysdeps/x86_64/multiarch/ifunc-defines.sym
@@ -17,4 +17,5 @@  FEATURE_OFFSET		offsetof (struct cpu_features, feature)
 FEATURE_SIZE		sizeof (unsigned int)
 
 COMMON_CPUID_INDEX_1
+COMMON_CPUID_INDEX_7
 FEATURE_INDEX_1
diff --git a/sysdeps/x86_64/multiarch/init-arch.c b/sysdeps/x86_64/multiarch/init-arch.c
index db74d97..2a6dcb7 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -167,6 +167,9 @@  __init_cpu_features (void)
 	  /* Determine if AVX is usable.  */
 	  if (CPUID_AVX)
 	    __cpu_features.feature[index_AVX_Usable] |= bit_AVX_Usable;
+	  /* Determine if AVX2 is usable.  */
+	  if (CPUID_AVX2)
+	    __cpu_features.feature[index_AVX2_Usable] |= bit_AVX2_Usable;
 	  /* Determine if FMA is usable.  */
 	  if (CPUID_FMA)
 	    __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h b/sysdeps/x86_64/multiarch/init-arch.h
index 793707a..813b6de 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -24,6 +24,7 @@ 
 #define bit_FMA_Usable			(1 << 7)
 #define bit_FMA4_Usable			(1 << 8)
 #define bit_Slow_SSE4_2			(1 << 9)
+#define bit_AVX2_Usable			(1 << 10)
 
 /* CPUID Feature flags.  */
 
@@ -40,6 +41,7 @@ 
 
 /* COMMON_CPUID_INDEX_7.  */
 #define bit_RTM		(1 << 11)
+#define bit_AVX2	(1 << 5)
 
 /* XCR0 Feature flags.  */
 #define bit_XMM_state  (1 << 1)
@@ -54,6 +56,7 @@ 
 # define index_SSE4_1	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_SSE4_2	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
 # define index_AVX	COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
+# define index_AVX2	COMMON_CPUID_INDEX_7*CPUID_SIZE+CPUID_EBX_OFFSET
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1*FEATURE_SIZE
@@ -64,6 +67,7 @@ 
 # define index_FMA_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_FMA4_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 # define index_Slow_SSE4_2		FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX2_Usable		FEATURE_INDEX_1*FEATURE_SIZE
 
 #else	/* __ASSEMBLER__ */
 
@@ -145,6 +149,8 @@  extern const struct cpu_features *__get_cpu_features (void)
   HAS_CPUID_FLAG (COMMON_CPUID_INDEX_80000001, ecx, bit_FMA4)
 # define CPUID_RTM \
   HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
+# define CPUID_AVX2 \
+  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
 
 /* HAS_* evaluates to true if we may use the feature at runtime.  */
 # define HAS_SSE2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
@@ -153,6 +159,7 @@  extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_SSE4_1	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_1)
 # define HAS_SSE4_2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, bit_SSE4_2)
 # define HAS_RTM	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
+# define HAS_AVX2	HAS_CPU_FEATURE (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
 
 # define index_Fast_Rep_String		FEATURE_INDEX_1
 # define index_Fast_Copy_Backward	FEATURE_INDEX_1
@@ -163,6 +170,7 @@  extern const struct cpu_features *__get_cpu_features (void)
 # define index_FMA_Usable		FEATURE_INDEX_1
 # define index_FMA4_Usable		FEATURE_INDEX_1
 # define index_Slow_SSE4_2		FEATURE_INDEX_1
+# define index_AVX2_Usable		FEATURE_INDEX_1
 
 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)