[3/6] Support Intel AMX-TF32

Message ID 20241113084435.1784546-4-haochen.jiang@intel.com
State New
Headers
Series Support Intel Diamond Rapids AMX instructions |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 fail Patch failed to apply

Commit Message

Haochen Jiang Nov. 13, 2024, 8:44 a.m. UTC
  In this patch, we will support AMX-TF32. It is a simple ISA
comparing to the previous ones, so there is no special handling.

gas/ChangeLog:

	* NEWS: Support Intel AMX-TF32.
	* config/tc-i386.c: Add amx_tf32.
	* doc/c-i386.texi: Document .amx_tf32.
	* testsuite/gas/i386/i386.exp: Run AMX-TF32 tests.
	* testsuite/gas/i386/x86-64.exp: Ditto.
	* testsuite/gas/i386/amx-tf32-inval.l: New test.
	* testsuite/gas/i386/amx-tf32-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-amx-tf32-intel.d: Ditto.
	* testsuite/gas/i386/x86-64-amx-tf32-inval.l: Ditto.
	* testsuite/gas/i386/x86-64-amx-tf32-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-amx-tf32.d: Ditto.
	* testsuite/gas/i386/x86-64-amx-tf32.s: Ditto.

opcodes/ChangeLog:

	* i386-dis.c (PREFIX_VEX_0F3848_X86_64_W_0_L_0): New.
	(X86_64_VEX_0F3848): Ditto.
	(VEX_LEN_0F3848_X86_64_W_0): Ditto.
	(VEX_W_0F3848_X86_64): Ditto.
	(prefix_table): Add PREFIX_VEX_0F3848_X86_64_W_0_L_0.
	(x86_64_table): Add X86_64_VEX_0F3848.
	(vex_len_table): Add VEX_LEN_0F3848_X86_64_W_0.
	(vex_w_table): Add VEX_W_0F3848_X86_64.
	* i386-gen.c (cpu_flag_init): Add CPU_AMX_TF32_FLAGS and
	CPU_ANY_AMX_TF32_FLAGS.
	* i386-init.h: Regenerated.
	* i386-mnem.h: Ditto.
	* i386-opc.h (CpuAMX_TF32): New.
	(i386_cpu_flags): Add cpuamx_tf32.
	* i386-opc.tbl: Add AMX-TF32 instructions.
	* i386-tbl.h: Regenerated.
---
 gas/NEWS                                      |    2 +
 gas/config/tc-i386.c                          |    1 +
 gas/doc/c-i386.texi                           |    3 +-
 gas/testsuite/gas/i386/amx-tf32-inval.l       |    3 +
 gas/testsuite/gas/i386/amx-tf32-inval.s       |    7 +
 gas/testsuite/gas/i386/i386.exp               |    1 +
 .../gas/i386/x86-64-amx-tf32-intel.d          |   15 +
 .../gas/i386/x86-64-amx-tf32-inval.l          |    7 +
 .../gas/i386/x86-64-amx-tf32-inval.s          |   11 +
 gas/testsuite/gas/i386/x86-64-amx-tf32.d      |   13 +
 gas/testsuite/gas/i386/x86-64-amx-tf32.s      |   15 +
 gas/testsuite/gas/i386/x86-64.exp             |    3 +
 opcodes/i386-dis.c                            |   28 +-
 opcodes/i386-gen.c                            |    3 +
 opcodes/i386-init.h                           |  736 +++++-----
 opcodes/i386-mnem.h                           | 1250 +++++++++--------
 opcodes/i386-opc.h                            |    3 +
 opcodes/i386-opc.tbl                          |    4 +
 opcodes/i386-tbl.h                            |  236 ++--
 19 files changed, 1252 insertions(+), 1089 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/amx-tf32-inval.l
 create mode 100644 gas/testsuite/gas/i386/amx-tf32-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32-inval.l
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-tf32.s
  

Comments

Jan Beulich Nov. 18, 2024, 3:12 p.m. UTC | #1
On 13.11.2024 09:44, Haochen Jiang wrote:
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.l
> @@ -0,0 +1,7 @@
> +.* Assembler messages:
> +.*:6: Error: all tmm registers must be distinct for `tmmultf32ps'
> +.*:7: Error: all tmm registers must be distinct for `tmmultf32ps'
> +.*:8: Error: all tmm registers must be distinct for `tmmultf32ps'
> +.*:9: Error: all tmm registers must be distinct for `ttmmultf32ps'
> +.*:10: Error: all tmm registers must be distinct for `ttmmultf32ps'
> +.*:11: Error: all tmm registers must be distinct for `ttmmultf32ps'

Okay, this tests the assembler side. Like for others with similar constraints
there also wants to be a disassembler test (thus also demonstrating that no
changes outside the tables are needed in i386-dis.c).

Jan
  

Patch

diff --git a/gas/NEWS b/gas/NEWS
index 9575dcdaaa1..56143b9b27e 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,7 @@ 
 -*- text -*-
 
+* Add support for Intel AMX-TF32 instructions.
+
 * Add support for Intel AMX-AVX512 instructions.
 
 * Add support for Intel AMX-TRANSPOSE instructions.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 9e54aae65fa..57c4285cc68 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1184,6 +1184,7 @@  static const arch_entry cpu_arch[] =
   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
   SUBARCH (amx_transpose, AMX_TRANSPOSE, ANY_AMX_TRANSPOSE, false),
   SUBARCH (amx_avx512, AMX_AVX512, ANY_AMX_AVX512, false),
+  SUBARCH (amx_tf32, AMX_TF32, ANY_AMX_TF32, false),
   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index dd2e422e323..bfadb9317e3 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -230,6 +230,7 @@  accept various extension mnemonics.  For example,
 @code{amx_complex},
 @code{amx_transpose},
 @code{amx_avx512},
+@code{amx_tf32},
 @code{amx_tile},
 @code{vmx},
 @code{vmfunc},
@@ -1703,7 +1704,7 @@  supported on the CPU specified.  The choices for @var{cpu_type} are:
 @item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
 @item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16}
 @item @samp{.amx_complex} @tab @samp{.amx_transpose} @tab @samp{.amx_avx512}
-@item @samp{.amx_tile}
+@item @samp{.amx_tf32} @tab @samp{.amx_tile}
 @item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab @samp{.hreset}
 @item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
 @item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
diff --git a/gas/testsuite/gas/i386/amx-tf32-inval.l b/gas/testsuite/gas/i386/amx-tf32-inval.l
new file mode 100644
index 00000000000..a13a3f6d35b
--- /dev/null
+++ b/gas/testsuite/gas/i386/amx-tf32-inval.l
@@ -0,0 +1,3 @@ 
+.* Assembler messages:
+.*:6: Error: `tmmultf32ps' is only supported in 64-bit mode
+.*:7: Error: `ttmmultf32ps' is only supported in 64-bit mode
diff --git a/gas/testsuite/gas/i386/amx-tf32-inval.s b/gas/testsuite/gas/i386/amx-tf32-inval.s
new file mode 100644
index 00000000000..fd7fb025420
--- /dev/null
+++ b/gas/testsuite/gas/i386/amx-tf32-inval.s
@@ -0,0 +1,7 @@ 
+# Check Illegal AMX-TF32 instructions
+
+	.allow_index_reg
+	.text
+_start:
+	tmmultf32ps	%tmm1, %tmm2, %tmm3
+	ttmmultf32ps	%tmm1, %tmm2, %tmm3
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index acc1e2b9a63..45e8adf7723 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -548,6 +548,7 @@  if [gas_32_check] then {
     run_list_test "msr_imm-inval"
     run_list_test "amx-transpose-inval"
     run_list_test "amx-avx512-inval"
+    run_list_test "amx-tf32-inval"
     run_list_test "sg"
     run_dump_test "clzero"
     run_dump_test "invlpgb"
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32-intel.d b/gas/testsuite/gas/i386/x86-64-amx-tf32-intel.d
new file mode 100644
index 00000000000..cc9a1d34061
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32-intel.d
@@ -0,0 +1,15 @@ 
+#objdump: -dw -Mintel
+#name: x86_64 AMX-TF32 insns (Intel disassembly)
+#source: x86-64-amx-tf32.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+#...
+[a-f0-9]+ <_intel>:
+\s*[a-f0-9]+:\s*c4 e2 59 48 f5\s+tmmultf32ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 71 48 da\s+tmmultf32ps tmm3,tmm2,tmm1
+\s*[a-f0-9]+:\s*c4 e2 58 48 f5\s+ttmmultf32ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 70 48 da\s+ttmmultf32ps tmm3,tmm2,tmm1
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.l b/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.l
new file mode 100644
index 00000000000..069513331b0
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.l
@@ -0,0 +1,7 @@ 
+.* Assembler messages:
+.*:6: Error: all tmm registers must be distinct for `tmmultf32ps'
+.*:7: Error: all tmm registers must be distinct for `tmmultf32ps'
+.*:8: Error: all tmm registers must be distinct for `tmmultf32ps'
+.*:9: Error: all tmm registers must be distinct for `ttmmultf32ps'
+.*:10: Error: all tmm registers must be distinct for `ttmmultf32ps'
+.*:11: Error: all tmm registers must be distinct for `ttmmultf32ps'
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.s b/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.s
new file mode 100644
index 00000000000..21a36dc9a82
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32-inval.s
@@ -0,0 +1,11 @@ 
+# Check Illegal 64bit AMX-TF32 instructions
+
+	.allow_index_reg
+	.text
+_start:
+	tmmultf32ps	%tmm1, %tmm1, %tmm2
+	tmmultf32ps	%tmm1, %tmm2, %tmm1
+	tmmultf32ps	%tmm2, %tmm1, %tmm1
+	ttmmultf32ps	%tmm1, %tmm1, %tmm2
+	ttmmultf32ps	%tmm1, %tmm2, %tmm1
+	ttmmultf32ps	%tmm2, %tmm1, %tmm1
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32.d b/gas/testsuite/gas/i386/x86-64-amx-tf32.d
new file mode 100644
index 00000000000..4fa91cbc040
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32.d
@@ -0,0 +1,13 @@ 
+#objdump: -dw
+#name: x86_64 AMX-TF32 insns
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 59 48 f5\s+tmmultf32ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e2 71 48 da\s+tmmultf32ps %tmm1,%tmm2,%tmm3
+\s*[a-f0-9]+:\s*c4 e2 58 48 f5\s+ttmmultf32ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e2 70 48 da\s+ttmmultf32ps %tmm1,%tmm2,%tmm3
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-tf32.s b/gas/testsuite/gas/i386/x86-64-amx-tf32.s
new file mode 100644
index 00000000000..9c1433ed49b
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-tf32.s
@@ -0,0 +1,15 @@ 
+# Check 64bit AMX-TF32 instructions
+
+	.text
+_start:
+	tmmultf32ps	%tmm4, %tmm5, %tmm6
+	tmmultf32ps	%tmm1, %tmm2, %tmm3
+	ttmmultf32ps	%tmm4, %tmm5, %tmm6
+	ttmmultf32ps	%tmm1, %tmm2, %tmm3
+
+_intel:
+	.intel_syntax noprefix
+	tmmultf32ps	tmm6, tmm5, tmm4
+	tmmultf32ps	tmm3, tmm2, tmm1
+	ttmmultf32ps	tmm6, tmm5, tmm4
+	ttmmultf32ps	tmm3, tmm2, tmm1
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index 131e598e02a..9cb79eb0a4c 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -529,6 +529,9 @@  run_dump_test "x86-64-amx-transpose-intel"
 run_list_test "x86-64-amx-transpose-inval"
 run_dump_test "x86-64-amx-avx512"
 run_dump_test "x86-64-amx-avx512-intel"
+run_dump_test "x86-64-amx-tf32"
+run_dump_test "x86-64-amx-tf32-intel"
+run_list_test "x86-64-amx-tf32-inval"
 run_dump_test "x86-64-clzero"
 run_dump_test "x86-64-mwaitx-bdver4"
 run_list_test "x86-64-mwaitx-reg"
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 8f651f7a06f..57f8246bf76 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -1132,6 +1132,7 @@  enum
   PREFIX_VEX_0F98_L_0_W_1,
   PREFIX_VEX_0F99_L_0_W_0,
   PREFIX_VEX_0F99_L_0_W_1,
+  PREFIX_VEX_0F3848_X86_64_L_0_W_0,
   PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0,
   PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_1,
   PREFIX_VEX_0F384B_X86_64_L_0_W_0,
@@ -1354,6 +1355,7 @@  enum
   X86_64_0F38F8_M_1,
   X86_64_0FC7_REG_6_MOD_3_PREFIX_1,
 
+  X86_64_VEX_0F3848,
   X86_64_VEX_0F3849,
   X86_64_VEX_0F384B,
   X86_64_VEX_0F385C,
@@ -1446,6 +1448,7 @@  enum
   VEX_LEN_0F381A,
   VEX_LEN_0F3836,
   VEX_LEN_0F3841,
+  VEX_LEN_0F3848_X86_64,
   VEX_LEN_0F3849_X86_64,
   VEX_LEN_0F384B_X86_64,
   VEX_LEN_0F385A,
@@ -1621,6 +1624,7 @@  enum
   VEX_W_0F382F,
   VEX_W_0F3836,
   VEX_W_0F3846,
+  VEX_W_0F3848_X86_64_L_0,
   VEX_W_0F3849_X86_64_L_0,
   VEX_W_0F384B_X86_64_L_0,
   VEX_W_0F3850,
@@ -4087,6 +4091,13 @@  static const struct dis386 prefix_table[][4] = {
     { "ktestd", { MaskG, MaskR }, 0 },
   },
 
+  /* PREFIX_VEX_0F3848_X86_64_L_0_W_0 */
+  {
+    { "ttmmultf32ps",	{ TMM, Rtmm, VexTmm }, 0 },
+    { Bad_Opcode },
+    { "tmmultf32ps",	{ TMM, Rtmm, VexTmm }, 0 },
+  },
+
   /* PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0 */
   {
     { "ldtilecfg", { M }, 0 },
@@ -4622,6 +4633,12 @@  static const struct dis386 x86_64_table[][2] = {
     { "senduipi",	{ Eq }, 0 },
   },
 
+  /* X86_64_VEX_0F3848 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F3848_X86_64) },
+  },
+
   /* X86_64_VEX_0F3849 */
   {
     { Bad_Opcode },
@@ -6535,7 +6552,7 @@  static const struct dis386 vex_table[][256] = {
     { VEX_W_TABLE (VEX_W_0F3846) },
     { "vpsllv%DQ", { XM, Vex, EXx }, PREFIX_DATA },
     /* 48 */
-    { Bad_Opcode },
+    { X86_64_TABLE (X86_64_VEX_0F3848) },
     { X86_64_TABLE (X86_64_VEX_0F3849) },
     { Bad_Opcode },
     { X86_64_TABLE (X86_64_VEX_0F384B) },
@@ -7215,6 +7232,11 @@  static const struct dis386 vex_len_table[][2] = {
     { "vphminposuw",	{ XM, EXx }, PREFIX_DATA },
   },
 
+  /* VEX_LEN_0F3848_X86_64 */
+  {
+    { VEX_W_TABLE (VEX_W_0F3848_X86_64_L_0) },
+  },
+
   /* VEX_LEN_0F3849_X86_64 */
   {
     { VEX_W_TABLE (VEX_W_0F3849_X86_64_L_0) },
@@ -7901,6 +7923,10 @@  static const struct dis386 vex_w_table[][2] = {
     /* VEX_W_0F3846 */
     { "vpsravd",	{ XM, Vex, EXx }, PREFIX_DATA },
   },
+  {
+    /* VEX_W_0F3848_X86_64_L_0 */
+    { PREFIX_TABLE (PREFIX_VEX_0F3848_X86_64_L_0_W_0) },
+  },
   {
     /* VEX_W_0F3849_X86_64_L_0 */
     { MOD_TABLE (MOD_VEX_0F3849_X86_64_L_0_W_0) },
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 168dc565a60..90a6be46950 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -267,6 +267,8 @@  static const dependency isa_dependencies[] =
     "AMX_TILE" },
   { "AMX_AVX512",
     "AMX_TILE|AVX10_2" },
+  { "AMX_TF32",
+    "AMX_TILE" },
   { "KL",
     "SSE2" },
   { "WIDEKL",
@@ -435,6 +437,7 @@  static bitfield cpu_flags[] =
   BITFIELD (AMX_COMPLEX),
   BITFIELD (AMX_TRANSPOSE),
   BITFIELD (AMX_AVX512),
+  BITFIELD (AMX_TF32),
   BITFIELD (AMX_TILE),
   BITFIELD (MOVDIRI),
   BITFIELD (MOVDIR64B),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 91972954966..8d7879c8eb4 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -254,6 +254,8 @@  enum i386_cpu
   CpuAMX_COMPLEX,
   /* Intel AMX-AVX512 Instructions support required.  */
   CpuAMX_AVX512,
+  /* Intel AMX-TF32 Instructions support required.  */
+  CpuAMX_TF32,
   /* AMX-TILE instructions required */
   CpuAMX_TILE,
   /* GFNI instructions required */
@@ -503,6 +505,7 @@  typedef union i386_cpu_flags
       unsigned int cpuamx_fp16:1;
       unsigned int cpuamx_complex:1;
       unsigned int cpuamx_avx512:1;
+      unsigned int cpuamx_tf32:1;
       unsigned int cpuamx_tile:1;
       unsigned int cpugfni:1;
       unsigned int cpuvaes:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index d17765aa0af..2a195c8bbb3 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3234,12 +3234,16 @@  tilerelease, 0x49c0, AMX_TILE, Vex128|Space0F38|VexW0|NoSuf, {}
 
 tilezero, 0xf249, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
 
+tmmultf32ps, 0x6648, AMX_TF32, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+
 ttcmmimfp16ps, 0xf26b, AMX_COMPLEX&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
 ttcmmrlfp16ps, 0xf36b, AMX_COMPLEX&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
 
 ttdpbf16ps, 0xf36c, AMX_BF16&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
 ttdpfp16ps, 0xf26c, AMX_FP16&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
 
+ttmmultf32ps, 0x48, AMX_TF32&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+
 ttransposed, 0xf35f, AMX_TRANSPOSE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, RegTMM }
 
 // AMX instructions end.