@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AMX-TF32 instructions.
+
* Add support for Intel AMX-AVX512 instructions.
* Add support for Intel AMX-TRANSPOSE instructions.
@@ -1184,6 +1184,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
SUBARCH (amx_transpose, AMX_TRANSPOSE, ANY_AMX_TRANSPOSE, false),
SUBARCH (amx_avx512, AMX_AVX512, ANY_AMX_AVX512, false),
+ SUBARCH (amx_tf32, AMX_TF32, ANY_AMX_TF32, false),
SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
@@ -230,6 +230,7 @@ accept various extension mnemonics. For example,
@code{amx_complex},
@code{amx_transpose},
@code{amx_avx512},
+@code{amx_tf32},
@code{amx_tile},
@code{vmx},
@code{vmfunc},
@@ -1703,7 +1704,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16}
@item @samp{.amx_complex} @tab @samp{.amx_transpose} @tab @samp{.amx_avx512}
-@item @samp{.amx_tile}
+@item @samp{.amx_tf32} @tab @samp{.amx_tile}
@item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab @samp{.hreset}
@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
new file mode 100644
@@ -0,0 +1,3 @@
+.* Assembler messages:
+.*:6: Error: `tmmultf32ps' is only supported in 64-bit mode
+.*:7: Error: `ttmmultf32ps' is only supported in 64-bit mode
new file mode 100644
@@ -0,0 +1,7 @@
+# Check Illegal AMX-TF32 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ tmmultf32ps %tmm1, %tmm2, %tmm3
+ ttmmultf32ps %tmm1, %tmm2, %tmm3
@@ -548,6 +548,7 @@ if [gas_32_check] then {
run_list_test "msr_imm-inval"
run_list_test "amx-transpose-inval"
run_list_test "amx-avx512-inval"
+ run_list_test "amx-tf32-inval"
run_list_test "sg"
run_dump_test "clzero"
run_dump_test "invlpgb"
new file mode 100644
@@ -0,0 +1,15 @@
+#objdump: -dw -Mintel
+#name: x86_64 AMX-TF32 insns (Intel disassembly)
+#source: x86-64-amx-tf32.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+#...
+[a-f0-9]+ <_intel>:
+\s*[a-f0-9]+:\s*c4 e2 59 48 f5\s+tmmultf32ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 71 48 da\s+tmmultf32ps tmm3,tmm2,tmm1
+\s*[a-f0-9]+:\s*c4 e2 58 48 f5\s+ttmmultf32ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 70 48 da\s+ttmmultf32ps tmm3,tmm2,tmm1
+#pass
new file mode 100644
@@ -0,0 +1,7 @@
+.* Assembler messages:
+.*:6: Error: all tmm registers must be distinct for `tmmultf32ps'
+.*:7: Error: all tmm registers must be distinct for `tmmultf32ps'
+.*:8: Error: all tmm registers must be distinct for `tmmultf32ps'
+.*:9: Error: all tmm registers must be distinct for `ttmmultf32ps'
+.*:10: Error: all tmm registers must be distinct for `ttmmultf32ps'
+.*:11: Error: all tmm registers must be distinct for `ttmmultf32ps'
new file mode 100644
@@ -0,0 +1,11 @@
+# Check Illegal 64bit AMX-TF32 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ tmmultf32ps %tmm1, %tmm1, %tmm2
+ tmmultf32ps %tmm1, %tmm2, %tmm1
+ tmmultf32ps %tmm2, %tmm1, %tmm1
+ ttmmultf32ps %tmm1, %tmm1, %tmm2
+ ttmmultf32ps %tmm1, %tmm2, %tmm1
+ ttmmultf32ps %tmm2, %tmm1, %tmm1
new file mode 100644
@@ -0,0 +1,13 @@
+#objdump: -dw
+#name: x86_64 AMX-TF32 insns
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 59 48 f5\s+tmmultf32ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e2 71 48 da\s+tmmultf32ps %tmm1,%tmm2,%tmm3
+\s*[a-f0-9]+:\s*c4 e2 58 48 f5\s+ttmmultf32ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e2 70 48 da\s+ttmmultf32ps %tmm1,%tmm2,%tmm3
+#pass
new file mode 100644
@@ -0,0 +1,15 @@
+# Check 64bit AMX-TF32 instructions
+
+ .text
+_start:
+ tmmultf32ps %tmm4, %tmm5, %tmm6
+ tmmultf32ps %tmm1, %tmm2, %tmm3
+ ttmmultf32ps %tmm4, %tmm5, %tmm6
+ ttmmultf32ps %tmm1, %tmm2, %tmm3
+
+_intel:
+ .intel_syntax noprefix
+ tmmultf32ps tmm6, tmm5, tmm4
+ tmmultf32ps tmm3, tmm2, tmm1
+ ttmmultf32ps tmm6, tmm5, tmm4
+ ttmmultf32ps tmm3, tmm2, tmm1
@@ -529,6 +529,9 @@ run_dump_test "x86-64-amx-transpose-intel"
run_list_test "x86-64-amx-transpose-inval"
run_dump_test "x86-64-amx-avx512"
run_dump_test "x86-64-amx-avx512-intel"
+run_dump_test "x86-64-amx-tf32"
+run_dump_test "x86-64-amx-tf32-intel"
+run_list_test "x86-64-amx-tf32-inval"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
@@ -1132,6 +1132,7 @@ enum
PREFIX_VEX_0F98_L_0_W_1,
PREFIX_VEX_0F99_L_0_W_0,
PREFIX_VEX_0F99_L_0_W_1,
+ PREFIX_VEX_0F3848_X86_64_L_0_W_0,
PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0,
PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_1,
PREFIX_VEX_0F384B_X86_64_L_0_W_0,
@@ -1354,6 +1355,7 @@ enum
X86_64_0F38F8_M_1,
X86_64_0FC7_REG_6_MOD_3_PREFIX_1,
+ X86_64_VEX_0F3848,
X86_64_VEX_0F3849,
X86_64_VEX_0F384B,
X86_64_VEX_0F385C,
@@ -1446,6 +1448,7 @@ enum
VEX_LEN_0F381A,
VEX_LEN_0F3836,
VEX_LEN_0F3841,
+ VEX_LEN_0F3848_X86_64,
VEX_LEN_0F3849_X86_64,
VEX_LEN_0F384B_X86_64,
VEX_LEN_0F385A,
@@ -1621,6 +1624,7 @@ enum
VEX_W_0F382F,
VEX_W_0F3836,
VEX_W_0F3846,
+ VEX_W_0F3848_X86_64_L_0,
VEX_W_0F3849_X86_64_L_0,
VEX_W_0F384B_X86_64_L_0,
VEX_W_0F3850,
@@ -4087,6 +4091,13 @@ static const struct dis386 prefix_table[][4] = {
{ "ktestd", { MaskG, MaskR }, 0 },
},
+ /* PREFIX_VEX_0F3848_X86_64_L_0_W_0 */
+ {
+ { "ttmmultf32ps", { TMM, Rtmm, VexTmm }, 0 },
+ { Bad_Opcode },
+ { "tmmultf32ps", { TMM, Rtmm, VexTmm }, 0 },
+ },
+
/* PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0 */
{
{ "ldtilecfg", { M }, 0 },
@@ -4622,6 +4633,12 @@ static const struct dis386 x86_64_table[][2] = {
{ "senduipi", { Eq }, 0 },
},
+ /* X86_64_VEX_0F3848 */
+ {
+ { Bad_Opcode },
+ { VEX_LEN_TABLE (VEX_LEN_0F3848_X86_64) },
+ },
+
/* X86_64_VEX_0F3849 */
{
{ Bad_Opcode },
@@ -6535,7 +6552,7 @@ static const struct dis386 vex_table[][256] = {
{ VEX_W_TABLE (VEX_W_0F3846) },
{ "vpsllv%DQ", { XM, Vex, EXx }, PREFIX_DATA },
/* 48 */
- { Bad_Opcode },
+ { X86_64_TABLE (X86_64_VEX_0F3848) },
{ X86_64_TABLE (X86_64_VEX_0F3849) },
{ Bad_Opcode },
{ X86_64_TABLE (X86_64_VEX_0F384B) },
@@ -7215,6 +7232,11 @@ static const struct dis386 vex_len_table[][2] = {
{ "vphminposuw", { XM, EXx }, PREFIX_DATA },
},
+ /* VEX_LEN_0F3848_X86_64 */
+ {
+ { VEX_W_TABLE (VEX_W_0F3848_X86_64_L_0) },
+ },
+
/* VEX_LEN_0F3849_X86_64 */
{
{ VEX_W_TABLE (VEX_W_0F3849_X86_64_L_0) },
@@ -7901,6 +7923,10 @@ static const struct dis386 vex_w_table[][2] = {
/* VEX_W_0F3846 */
{ "vpsravd", { XM, Vex, EXx }, PREFIX_DATA },
},
+ {
+ /* VEX_W_0F3848_X86_64_L_0 */
+ { PREFIX_TABLE (PREFIX_VEX_0F3848_X86_64_L_0_W_0) },
+ },
{
/* VEX_W_0F3849_X86_64_L_0 */
{ MOD_TABLE (MOD_VEX_0F3849_X86_64_L_0_W_0) },
@@ -267,6 +267,8 @@ static const dependency isa_dependencies[] =
"AMX_TILE" },
{ "AMX_AVX512",
"AMX_TILE|AVX10_2" },
+ { "AMX_TF32",
+ "AMX_TILE" },
{ "KL",
"SSE2" },
{ "WIDEKL",
@@ -435,6 +437,7 @@ static bitfield cpu_flags[] =
BITFIELD (AMX_COMPLEX),
BITFIELD (AMX_TRANSPOSE),
BITFIELD (AMX_AVX512),
+ BITFIELD (AMX_TF32),
BITFIELD (AMX_TILE),
BITFIELD (MOVDIRI),
BITFIELD (MOVDIR64B),
@@ -254,6 +254,8 @@ enum i386_cpu
CpuAMX_COMPLEX,
/* Intel AMX-AVX512 Instructions support required. */
CpuAMX_AVX512,
+ /* Intel AMX-TF32 Instructions support required. */
+ CpuAMX_TF32,
/* AMX-TILE instructions required */
CpuAMX_TILE,
/* GFNI instructions required */
@@ -503,6 +505,7 @@ typedef union i386_cpu_flags
unsigned int cpuamx_fp16:1;
unsigned int cpuamx_complex:1;
unsigned int cpuamx_avx512:1;
+ unsigned int cpuamx_tf32:1;
unsigned int cpuamx_tile:1;
unsigned int cpugfni:1;
unsigned int cpuvaes:1;
@@ -3234,12 +3234,16 @@ tilerelease, 0x49c0, AMX_TILE, Vex128|Space0F38|VexW0|NoSuf, {}
tilezero, 0xf249, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
+tmmultf32ps, 0x6648, AMX_TF32, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+
ttcmmimfp16ps, 0xf26b, AMX_COMPLEX&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
ttcmmrlfp16ps, 0xf36b, AMX_COMPLEX&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
ttdpbf16ps, 0xf36c, AMX_BF16&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
ttdpfp16ps, 0xf26c, AMX_FP16&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+ttmmultf32ps, 0x48, AMX_TF32&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+
ttransposed, 0xf35f, AMX_TRANSPOSE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, RegTMM }
// AMX instructions end.