x86: Detect Intel Advanced Matrix Extensions

Message ID 20200625223807.3447984-1-hjl.tools@gmail.com
State Committed
Headers
Series x86: Detect Intel Advanced Matrix Extensions |

Commit Message

H.J. Lu June 25, 2020, 10:38 p.m. UTC
  Intel Advanced Matrix Extensions (Intel AMX) is a new programming
paradigm consisting of two components: a set of 2-dimensional registers
(tiles) representing sub-arrays from a larger 2-dimensional memory image,
and accelerators able to operate on tiles.  Intel AMX is an extensible
architecture.  New accelerators can be added and the existing accelerator
may be enhanced to provide higher performance.  The initial features are
AMX-BF16, AMX-TILE and AMX-INT8, which are usable only if the operating
system supports both XTILECFG state and XTILEDATA state.

Add AMX-BF16, AMX-TILE and AMX-INT8 support to HAS_CPU_FEATURE and
CPU_FEATURE_USABLE.
---
 sysdeps/x86/cpu-features.c         | 18 ++++++++++++++++++
 sysdeps/x86/cpu-features.h         | 20 ++++++++++++++++++++
 sysdeps/x86/tst-get-cpu-features.c |  6 ++++++
 3 files changed, 44 insertions(+)
  

Comments

H.J. Lu June 26, 2020, 12:40 p.m. UTC | #1
On Thu, Jun 25, 2020 at 3:38 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Intel Advanced Matrix Extensions (Intel AMX) is a new programming
> paradigm consisting of two components: a set of 2-dimensional registers
> (tiles) representing sub-arrays from a larger 2-dimensional memory image,
> and accelerators able to operate on tiles.  Intel AMX is an extensible
> architecture.  New accelerators can be added and the existing accelerator
> may be enhanced to provide higher performance.  The initial features are
> AMX-BF16, AMX-TILE and AMX-INT8, which are usable only if the operating
> system supports both XTILECFG state and XTILEDATA state.
>
> Add AMX-BF16, AMX-TILE and AMX-INT8 support to HAS_CPU_FEATURE and
> CPU_FEATURE_USABLE.
> ---
>  sysdeps/x86/cpu-features.c         | 18 ++++++++++++++++++
>  sysdeps/x86/cpu-features.h         | 20 ++++++++++++++++++++
>  sysdeps/x86/tst-get-cpu-features.c |  6 ++++++
>  3 files changed, 44 insertions(+)
>
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index 79bc0d7216..c351bdd54a 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -239,6 +239,24 @@ get_common_indices (struct cpu_features *cpu_features,
>             }
>         }
>
> +      /* Are XTILECFG and XTILEDATA states usable?  */
> +      if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
> +         == (bit_XTILECFG_state | bit_XTILEDATA_state))
> +       {
> +         /* Determine if AMX_BF16 is usable.  */
> +         if (CPU_FEATURES_CPU_P (cpu_features, AMX_BF16))
> +           cpu_features->usable[index_arch_AMX_BF16_Usable]
> +             |= bit_arch_AMX_BF16_Usable;
> +         /* Determine if AMX_TILE is usable.  */
> +         if (CPU_FEATURES_CPU_P (cpu_features, AMX_TILE))
> +           cpu_features->usable[index_arch_AMX_TILE_Usable]
> +             |= bit_arch_AMX_TILE_Usable;
> +         /* Determine if AMX_INT8 is usable.  */
> +         if (CPU_FEATURES_CPU_P (cpu_features, AMX_INT8))
> +           cpu_features->usable[index_arch_AMX_INT8_Usable]
> +             |= bit_arch_AMX_INT8_Usable;
> +       }
> +
>        /* For _dl_runtime_resolve, set xsave_state_size to xsave area
>          size + integer register save size and align it to 64 bytes.  */
>        if (cpu_features->basic.max_cpuid >= 0xd)
> diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
> index 574f055e0c..78d0692fab 100644
> --- a/sysdeps/x86/cpu-features.h
> +++ b/sysdeps/x86/cpu-features.h
> @@ -156,6 +156,9 @@ extern const struct cpu_features *__get_cpu_features (void)
>  #define bit_arch_AVX512_VP2INTERSECT_Usable    (1u << 24)
>  #define bit_arch_AVX512_BF16_Usable            (1u << 25)
>  #define bit_arch_PKU_Usable                    (1u << 26)
> +#define bit_arch_AMX_BF16_Usable               (1u << 27)
> +#define bit_arch_AMX_TILE_Usable               (1u << 28)
> +#define bit_arch_AMX_INT8_Usable               (1u << 29)
>
>  #define index_arch_AVX_Usable                  USABLE_FEATURE_INDEX_1
>  #define index_arch_AVX2_Usable                 USABLE_FEATURE_INDEX_1
> @@ -184,6 +187,9 @@ extern const struct cpu_features *__get_cpu_features (void)
>  #define index_arch_AVX512_VP2INTERSECT_Usable  USABLE_FEATURE_INDEX_1
>  #define index_arch_AVX512_BF16_Usable          USABLE_FEATURE_INDEX_1
>  #define index_arch_PKU_Usable                  USABLE_FEATURE_INDEX_1
> +#define index_arch_AMX_BF16_Usable             USABLE_FEATURE_INDEX_1
> +#define index_arch_AMX_TILE_Usable             USABLE_FEATURE_INDEX_1
> +#define index_arch_AMX_INT8_Usable             USABLE_FEATURE_INDEX_1
>
>  #define feature_AVX_Usable                     usable
>  #define feature_AVX2_Usable                    usable
> @@ -212,6 +218,9 @@ extern const struct cpu_features *__get_cpu_features (void)
>  #define feature_AVX512_VP2INTERSECT_Usable     usable
>  #define feature_AVX512_BF16_Usable             usable
>  #define feature_PKU_Usable                     usable
> +#define feature_AMX_BF16_Usable                        usable
> +#define feature_AMX_TILE_Usable                        usable
> +#define feature_AMX_INT8_Usable                        usable
>
>  /* CPU features.  */
>
> @@ -347,6 +356,9 @@ extern const struct cpu_features *__get_cpu_features (void)
>  #define bit_cpu_TSXLDTRK       (1u << 16)
>  #define bit_cpu_PCONFIG                (1u << 18)
>  #define bit_cpu_IBT            (1u << 20)
> +#define bit_cpu_AMX_BF16       (1u << 22)
> +#define bit_cpu_AMX_TILE       (1u << 24)
> +#define bit_cpu_AMX_INT8       (1u << 25)
>  #define bit_cpu_IBRS_IBPB      (1u << 26)
>  #define bit_cpu_STIBP          (1u << 27)
>  #define bit_cpu_L1D_FLUSH      (1u << 28)
> @@ -527,6 +539,9 @@ extern const struct cpu_features *__get_cpu_features (void)
>  #define index_cpu_SERIALIZE    COMMON_CPUID_INDEX_7
>  #define index_cpu_HYBRID       COMMON_CPUID_INDEX_7
>  #define index_cpu_TSXLDTRK     COMMON_CPUID_INDEX_7
> +#define index_cpu_AMX_BF16     COMMON_CPUID_INDEX_7
> +#define index_cpu_AMX_TILE     COMMON_CPUID_INDEX_7
> +#define index_cpu_AMX_INT8     COMMON_CPUID_INDEX_7
>  #define index_cpu_PCONFIG      COMMON_CPUID_INDEX_7
>  #define index_cpu_IBT          COMMON_CPUID_INDEX_7
>  #define index_cpu_IBRS_IBPB    COMMON_CPUID_INDEX_7
> @@ -709,6 +724,9 @@ extern const struct cpu_features *__get_cpu_features (void)
>  #define reg_SERIALIZE          edx
>  #define reg_HYBRID             edx
>  #define reg_TSXLDTRK           edx
> +#define reg_AMX_BF16           edx
> +#define reg_AMX_TILE           edx
> +#define reg_AMX_INT8           edx
>  #define reg_PCONFIG            edx
>  #define reg_IBT                        edx
>  #define reg_IBRS_IBPB          edx
> @@ -819,6 +837,8 @@ extern const struct cpu_features *__get_cpu_features (void)
>  #define bit_Opmask_state       (1u << 5)
>  #define bit_ZMM0_15_state      (1u << 6)
>  #define bit_ZMM16_31_state     (1u << 7)
> +#define bit_XTILECFG_state     (1u << 17)
> +#define bit_XTILEDATA_state    (1u << 18)
>
>  # if defined (_LIBC) && !IS_IN (nonlib)
>  /* Unused for x86.  */
> diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
> index c60918cf00..3d44af202e 100644
> --- a/sysdeps/x86/tst-get-cpu-features.c
> +++ b/sysdeps/x86/tst-get-cpu-features.c
> @@ -185,6 +185,9 @@ do_test (void)
>    CHECK_CPU_FEATURE (SERIALIZE);
>    CHECK_CPU_FEATURE (HYBRID);
>    CHECK_CPU_FEATURE (TSXLDTRK);
> +  CHECK_CPU_FEATURE (AMX_BF16);
> +  CHECK_CPU_FEATURE (AMX_TILE);
> +  CHECK_CPU_FEATURE (AMX_INT8);
>    CHECK_CPU_FEATURE (PCONFIG);
>    CHECK_CPU_FEATURE (IBT);
>    CHECK_CPU_FEATURE (IBRS_IBPB);
> @@ -239,6 +242,9 @@ do_test (void)
>    CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
>    CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
>    CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
> +  CHECK_CPU_FEATURE_USABLE (AMX_BF16);
> +  CHECK_CPU_FEATURE_USABLE (AMX_TILE);
> +  CHECK_CPU_FEATURE_USABLE (AMX_INT8);
>    CHECK_CPU_FEATURE_USABLE (XOP);
>    CHECK_CPU_FEATURE_USABLE (FMA4);
>    CHECK_CPU_FEATURE_USABLE (XSAVEC);
> --
> 2.26.2
>

I am checking it in.
  

Patch

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 79bc0d7216..c351bdd54a 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -239,6 +239,24 @@  get_common_indices (struct cpu_features *cpu_features,
 	    }
 	}
 
+      /* Are XTILECFG and XTILEDATA states usable?  */
+      if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
+	  == (bit_XTILECFG_state | bit_XTILEDATA_state))
+	{
+	  /* Determine if AMX_BF16 is usable.  */
+	  if (CPU_FEATURES_CPU_P (cpu_features, AMX_BF16))
+	    cpu_features->usable[index_arch_AMX_BF16_Usable]
+	      |= bit_arch_AMX_BF16_Usable;
+	  /* Determine if AMX_TILE is usable.  */
+	  if (CPU_FEATURES_CPU_P (cpu_features, AMX_TILE))
+	    cpu_features->usable[index_arch_AMX_TILE_Usable]
+	      |= bit_arch_AMX_TILE_Usable;
+	  /* Determine if AMX_INT8 is usable.  */
+	  if (CPU_FEATURES_CPU_P (cpu_features, AMX_INT8))
+	    cpu_features->usable[index_arch_AMX_INT8_Usable]
+	      |= bit_arch_AMX_INT8_Usable;
+	}
+
       /* For _dl_runtime_resolve, set xsave_state_size to xsave area
 	 size + integer register save size and align it to 64 bytes.  */
       if (cpu_features->basic.max_cpuid >= 0xd)
diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
index 574f055e0c..78d0692fab 100644
--- a/sysdeps/x86/cpu-features.h
+++ b/sysdeps/x86/cpu-features.h
@@ -156,6 +156,9 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define bit_arch_AVX512_VP2INTERSECT_Usable	(1u << 24)
 #define bit_arch_AVX512_BF16_Usable		(1u << 25)
 #define bit_arch_PKU_Usable			(1u << 26)
+#define bit_arch_AMX_BF16_Usable		(1u << 27)
+#define bit_arch_AMX_TILE_Usable		(1u << 28)
+#define bit_arch_AMX_INT8_Usable		(1u << 29)
 
 #define index_arch_AVX_Usable			USABLE_FEATURE_INDEX_1
 #define index_arch_AVX2_Usable			USABLE_FEATURE_INDEX_1
@@ -184,6 +187,9 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define index_arch_AVX512_VP2INTERSECT_Usable	USABLE_FEATURE_INDEX_1
 #define index_arch_AVX512_BF16_Usable		USABLE_FEATURE_INDEX_1
 #define index_arch_PKU_Usable			USABLE_FEATURE_INDEX_1
+#define index_arch_AMX_BF16_Usable		USABLE_FEATURE_INDEX_1
+#define index_arch_AMX_TILE_Usable		USABLE_FEATURE_INDEX_1
+#define index_arch_AMX_INT8_Usable		USABLE_FEATURE_INDEX_1
 
 #define feature_AVX_Usable			usable
 #define feature_AVX2_Usable			usable
@@ -212,6 +218,9 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define feature_AVX512_VP2INTERSECT_Usable	usable
 #define feature_AVX512_BF16_Usable		usable
 #define feature_PKU_Usable			usable
+#define feature_AMX_BF16_Usable			usable
+#define feature_AMX_TILE_Usable			usable
+#define feature_AMX_INT8_Usable			usable
 
 /* CPU features.  */
 
@@ -347,6 +356,9 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define bit_cpu_TSXLDTRK	(1u << 16)
 #define bit_cpu_PCONFIG		(1u << 18)
 #define bit_cpu_IBT		(1u << 20)
+#define bit_cpu_AMX_BF16	(1u << 22)
+#define bit_cpu_AMX_TILE	(1u << 24)
+#define bit_cpu_AMX_INT8	(1u << 25)
 #define bit_cpu_IBRS_IBPB	(1u << 26)
 #define bit_cpu_STIBP		(1u << 27)
 #define bit_cpu_L1D_FLUSH	(1u << 28)
@@ -527,6 +539,9 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define index_cpu_SERIALIZE	COMMON_CPUID_INDEX_7
 #define index_cpu_HYBRID	COMMON_CPUID_INDEX_7
 #define index_cpu_TSXLDTRK	COMMON_CPUID_INDEX_7
+#define index_cpu_AMX_BF16	COMMON_CPUID_INDEX_7
+#define index_cpu_AMX_TILE	COMMON_CPUID_INDEX_7
+#define index_cpu_AMX_INT8	COMMON_CPUID_INDEX_7
 #define index_cpu_PCONFIG	COMMON_CPUID_INDEX_7
 #define index_cpu_IBT		COMMON_CPUID_INDEX_7
 #define index_cpu_IBRS_IBPB	COMMON_CPUID_INDEX_7
@@ -709,6 +724,9 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define reg_SERIALIZE		edx
 #define reg_HYBRID		edx
 #define reg_TSXLDTRK		edx
+#define reg_AMX_BF16		edx
+#define reg_AMX_TILE		edx
+#define reg_AMX_INT8		edx
 #define reg_PCONFIG		edx
 #define reg_IBT			edx
 #define reg_IBRS_IBPB		edx
@@ -819,6 +837,8 @@  extern const struct cpu_features *__get_cpu_features (void)
 #define bit_Opmask_state	(1u << 5)
 #define bit_ZMM0_15_state	(1u << 6)
 #define bit_ZMM16_31_state	(1u << 7)
+#define bit_XTILECFG_state	(1u << 17)
+#define bit_XTILEDATA_state	(1u << 18)
 
 # if defined (_LIBC) && !IS_IN (nonlib)
 /* Unused for x86.  */
diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
index c60918cf00..3d44af202e 100644
--- a/sysdeps/x86/tst-get-cpu-features.c
+++ b/sysdeps/x86/tst-get-cpu-features.c
@@ -185,6 +185,9 @@  do_test (void)
   CHECK_CPU_FEATURE (SERIALIZE);
   CHECK_CPU_FEATURE (HYBRID);
   CHECK_CPU_FEATURE (TSXLDTRK);
+  CHECK_CPU_FEATURE (AMX_BF16);
+  CHECK_CPU_FEATURE (AMX_TILE);
+  CHECK_CPU_FEATURE (AMX_INT8);
   CHECK_CPU_FEATURE (PCONFIG);
   CHECK_CPU_FEATURE (IBT);
   CHECK_CPU_FEATURE (IBRS_IBPB);
@@ -239,6 +242,9 @@  do_test (void)
   CHECK_CPU_FEATURE_USABLE (AVX512_4VNNIW);
   CHECK_CPU_FEATURE_USABLE (AVX512_4FMAPS);
   CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
+  CHECK_CPU_FEATURE_USABLE (AMX_BF16);
+  CHECK_CPU_FEATURE_USABLE (AMX_TILE);
+  CHECK_CPU_FEATURE_USABLE (AMX_INT8);
   CHECK_CPU_FEATURE_USABLE (XOP);
   CHECK_CPU_FEATURE_USABLE (FMA4);
   CHECK_CPU_FEATURE_USABLE (XSAVEC);