diff mbox

[x86_64] Detection of availability of AVX512F and AVX512DQ ISAs

Message ID CAMXFM3tt+9NG7vdZowDdMrmWWKtKGWhBW2Q3NL2mddXO+pkjGg@mail.gmail.com
State Superseded
Headers show

Commit Message

Andrew Senkevich April 29, 2015, 3:10 p.m. UTC
Hi,

this patch adds detection of availability of AVX512F and AVX512DQ ISAs.

2015-04-29  Andrew Senkevich  <andrew.senkevich@intel.com>

        * sysdeps/x86_64/multiarch/init-arch.h (bit_AVX512F_Usable,
        bit_AVX512DQ_Usable): New macro.
        * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
        Check and set bit_AVX512F_Usable, bit_AVX512DQ_Usable.

(AVX_Fast_Unaligned_Load)

Ok for trunk?


--
WBR,
Andrew

Comments

H.J. Lu April 29, 2015, 3:17 p.m. UTC | #1
On Wed, Apr 29, 2015 at 8:10 AM, Andrew Senkevich
<andrew.n.senkevich@gmail.com> wrote:
> Hi,
>
> this patch adds detection of availability of AVX512F and AVX512DQ ISAs.
>
> 2015-04-29  Andrew Senkevich  <andrew.senkevich@intel.com>
>
>         * sysdeps/x86_64/multiarch/init-arch.h (bit_AVX512F_Usable,
>         bit_AVX512DQ_Usable): New macro.
>         * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
>         Check and set bit_AVX512F_Usable, bit_AVX512DQ_Usable.
>
> diff --git a/sysdeps/x86_64/multiarch/init-arch.c
> b/sysdeps/x86_64/multiarch/init-arch.c
> index 7dec218..6a81d48 100644
> --- a/sysdeps/x86_64/multiarch/init-arch.c
> +++ b/sysdeps/x86_64/multiarch/init-arch.c
> @@ -179,6 +179,19 @@ __init_cpu_features (void)
>           if (CPUID_AVX2)
>             __cpu_features.feature[index_AVX2_Usable]
>               |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
> +         /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
> +            ZMM16-ZMM31 state are enabled.  */
> +         if ((xcrlow & 0xe0) == 0xe0 )

Please add/use bit_ZMM0_15_state and bit_ZMM16_31_state.
This file is used by both 32-bit and 64-bit. Does it work for both
32-bit and 64-bit?
diff mbox

Patch

diff --git a/sysdeps/x86_64/multiarch/init-arch.c
b/sysdeps/x86_64/multiarch/init-arch.c
index 7dec218..6a81d48 100644
--- a/sysdeps/x86_64/multiarch/init-arch.c
+++ b/sysdeps/x86_64/multiarch/init-arch.c
@@ -179,6 +179,19 @@  __init_cpu_features (void)
          if (CPUID_AVX2)
            __cpu_features.feature[index_AVX2_Usable]
              |= bit_AVX2_Usable | bit_AVX_Fast_Unaligned_Load;
+         /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
+            ZMM16-ZMM31 state are enabled.  */
+         if ((xcrlow & 0xe0) == 0xe0 )
+           {
+             /* Determine if AVX512F is usable.  */
+             if (CPUID_AVX512F)
+               __cpu_features.feature[index_AVX512F_Usable]
+                 |= bit_AVX512F_Usable;
+             /* Determine if AVX512DQ is usable.  */
+             if (CPUID_AVX512DQ)
+               __cpu_features.feature[index_AVX512DQ_Usable]
+                 |= bit_AVX512DQ_Usable;
+           }
          /* Determine if FMA is usable.  */
          if (CPUID_FMA)
            __cpu_features.feature[index_FMA_Usable] |= bit_FMA_Usable;
diff --git a/sysdeps/x86_64/multiarch/init-arch.h
b/sysdeps/x86_64/multiarch/init-arch.h
index e6b5ba5..b4b445e 100644
--- a/sysdeps/x86_64/multiarch/init-arch.h
+++ b/sysdeps/x86_64/multiarch/init-arch.h
@@ -26,6 +26,8 @@ 
 #define bit_Slow_SSE4_2                        (1 << 9)
 #define bit_AVX2_Usable                        (1 << 10)
 #define bit_AVX_Fast_Unaligned_Load    (1 << 11)
+#define bit_AVX512F_Usable             (1 << 12)
+#define bit_AVX512DQ_Usable            (1 << 13)

 /* CPUID Feature flags.  */

@@ -43,6 +45,8 @@ 
 /* COMMON_CPUID_INDEX_7.  */
 #define bit_RTM                (1 << 11)
 #define bit_AVX2       (1 << 5)
+#define bit_AVX512F    (1 << 16)
+#define bit_AVX512DQ   (1 << 17)

 /* XCR0 Feature flags.  */
 #define bit_XMM_state  (1 << 1)
@@ -76,6 +80,8 @@ 
 # define index_Slow_SSE4_2             FEATURE_INDEX_1*FEATURE_SIZE
 # define index_AVX2_Usable             FEATURE_INDEX_1*FEATURE_SIZE
 # define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX512F_Usable          FEATURE_INDEX_1*FEATURE_SIZE
+# define index_AVX512DQ_Usable         FEATURE_INDEX_1*FEATURE_SIZE

 #else  /* __ASSEMBLER__ */

@@ -152,6 +158,10 @@  extern const struct cpu_features *__get_cpu_features (void)
   HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_RTM)
 # define CPUID_AVX2 \
   HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX2)
+# define CPUID_AVX512F \
+  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512F)
+# define CPUID_AVX512DQ \
+  HAS_CPUID_FLAG (COMMON_CPUID_INDEX_7, ebx, bit_AVX512DQ)

 /* HAS_* evaluates to true if we may use the feature at runtime.  */
 # define HAS_SSE2      HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, edx, bit_SSE2)
@@ -172,6 +182,8 @@  extern const struct cpu_features *__get_cpu_features (void)
 # define index_Slow_SSE4_2             FEATURE_INDEX_1
 # define index_AVX2_Usable             FEATURE_INDEX_1
 # define index_AVX_Fast_Unaligned_Load FEATURE_INDEX_1
+# define index_AVX512F_Usable          FEATURE_INDEX_1
+# define index_AVX512DQ_Usable         FEATURE_INDEX_1

 # define HAS_ARCH_FEATURE(name) \
   ((__get_cpu_features ()->feature[index_##name] & (bit_##name)) != 0)
@@ -182,6 +194,8 @@  extern const struct cpu_features *__get_cpu_features (void)
 # define HAS_FAST_UNALIGNED_LOAD       HAS_ARCH_FEATURE (Fast_Unaligned_Load)
 # define HAS_AVX                       HAS_ARCH_FEATURE (AVX_Usable)
 # define HAS_AVX2                      HAS_ARCH_FEATURE (AVX2_Usable)
+# define HAS_AVX512F                   HAS_ARCH_FEATURE (AVX512F_Usable)
+# define HAS_AVX512DQ                  HAS_ARCH_FEATURE (AVX512DQ_Usable)
 # define HAS_FMA                       HAS_ARCH_FEATURE (FMA_Usable)
 # define HAS_FMA4                      HAS_ARCH_FEATURE (FMA4_Usable)
 # define HAS_AVX_FAST_UNALIGNED_LOAD   HAS_ARCH_FEATURE