[3/3] Support Intel AMX-FP8

Message ID 20241218063204.1346248-4-haochen.jiang@intel.com
State New
Headers
Series Support part of DMR AMX instructions |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Jiang, Haochen Dec. 18, 2024, 6:32 a.m. UTC
  From: Liwei Xu <liwei.xu@intel.com>

Changes in v2 has been mentioned in previous AMX-TRANSPOSE patch.

---

In this patch, we will support AMX-FP8 feature. No special handling.

gas/ChangeLog:

	* config/tc-i386.c: Add amx_fp8.
	* doc/c-i386.texi: Document .amx_fp8.
	* testsuite/gas/i386/x86-64.exp: Run AMX-FP8 tests.
	* testsuite/gas/i386/x86-64-amx-fp8.d: New test.
	* testsuite/gas/i386/x86-64-amx-fp8.s: Ditto.
	* testsuite/gas/i386/x86-64-amx-fp8-intel.d: Ditto.
	* testsuite/gas/i386/x86-64-amx-fp8-inval.l: Ditto.
	* testsuite/gas/i386/x86-64-amx-fp8-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-amx-fp8.d: Ditto.
	* testsuite/gas/i386/x86-64-amx-fp8.s: Ditto.

opcodes/ChangeLog:

	* i386-dis.c (PREFIX_VEX_MAP5_FD_X86_64_L_0_W_0): New.
	(X86_64_VEX_MAP5_FD): Ditto.
	(VEX_LEN_MAP5_FD_X86_64): Ditto.
	(VEX_W_MAP5_FD_X86_64_L_0):Ditto.
	(prefix_table): Add PREFIX_VEX_MAP5_FD_X86_64_L_0_W_0.
	(x86_64_table): Add X86_64_VEX_MAP5_FD.
	(vex_len_table): Add VEX_LEN_MAP5_FD_X86_64.
	(vex_w_table): Add VEX_W_MAP5_FD_X86_64_L_0.
	* i386-gen.c: Add CPU_AMX_FP8_FLAGS and
	CPU_ANY_AMX_FP8_FLAGS.
	* i386-init.h: Regenerated.
	* i386-mnem.h: Ditto.
	* i386-opc.h: Add cpuamx_fp8.
	* i386-opc.tbl: Add AMX_FP8 instructions.
	* i386-tbl.h: Regenerated.
---
 gas/config/tc-i386.c                          |    4 +-
 gas/doc/c-i386.texi                           |    3 +-
 gas/testsuite/gas/i386/x86-64-amx-fp8-bad.d   |   22 +
 gas/testsuite/gas/i386/x86-64-amx-fp8-bad.s   |   36 +
 gas/testsuite/gas/i386/x86-64-amx-fp8-intel.d |   19 +
 gas/testsuite/gas/i386/x86-64-amx-fp8-inval.l |    9 +
 gas/testsuite/gas/i386/x86-64-amx-fp8-inval.s |   12 +
 gas/testsuite/gas/i386/x86-64-amx-fp8.d       |   17 +
 gas/testsuite/gas/i386/x86-64-amx-fp8.s       |   23 +
 gas/testsuite/gas/i386/x86-64.exp             |    4 +
 opcodes/i386-dis.c                            |  322 +++++
 opcodes/i386-gen.c                            |    3 +
 opcodes/i386-init.h                           |  744 ++++++-----
 opcodes/i386-mnem.h                           | 1182 +++++++++--------
 opcodes/i386-opc.h                            |    3 +
 opcodes/i386-opc.tbl                          |    5 +
 opcodes/i386-tbl.h                            |  268 ++--
 17 files changed, 1615 insertions(+), 1061 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp8-bad.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp8-bad.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp8-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp8-inval.l
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp8-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp8.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp8.s
  

Comments

Jan Beulich Dec. 19, 2024, 11:56 a.m. UTC | #1
On 18.12.2024 07:32, Haochen Jiang wrote:
> @@ -7198,7 +7200,7 @@ i386_assemble (char *line)
>        /* The opcode space check isn't strictly needed; it's there only to
>  	 bypass the logic below when easily possible.  */
>        && t->opcode_space >= SPACE_0F
> -      && t->opcode_space <= SPACE_0F3A
> +      && t->opcode_space <= SPACE_MAP5

Why would this need adjustment? The whole conditional is only about ...

>        && !is_cpu (&i.tm, CpuSSE4a)
>        && !is_any_vex_encoding (t))

... non-VEX/EVEX encodings.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-amx-fp8-bad.s
> @@ -0,0 +1,36 @@
> +.text
> +	#tdpbf8ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
> +	.insn VEX.128.NP.M5.W0 0xfd, %tmm1, %tmm1, %tmm2
> +
> +	#tdpbf8ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
> +	.insn VEX.128.NP.M5.W0 0xfd, %tmm1, %tmm2, %tmm1
> +
> +	#tdpbf8ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
> +	.insn VEX.128.NP.M5.W0 0xfd, %tmm2, %tmm1, %tmm1
> +
> +	#tdpbhf8ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
> +	.insn VEX.128.f2.M5.W0 0xfd, %tmm1, %tmm1, %tmm2
> +
> +	#tdpbhf8ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
> +	.insn VEX.128.f2.M5.W0 0xfd, %tmm1, %tmm2, %tmm1
> +
> +	#tdpbhf8ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
> +	.insn VEX.128.f2.M5.W0 0xfd, %tmm2, %tmm1, %tmm1
> +
> +	#tdphbf8ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
> +	.insn VEX.128.f3.M5.W0 0xfd, %tmm1, %tmm1, %tmm2
> +
> +	#tdphbf8ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
> +	.insn VEX.128.f3.M5.W0 0xfd, %tmm1, %tmm2, %tmm1
> +
> +	#tdphbf8ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
> +	.insn VEX.128.f3.M5.W0 0xfd, %tmm2, %tmm1, %tmm1
> +
> +	#tdphf8ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
> +	.insn VEX.128.66.M5.W0 0xfd, %tmm1, %tmm1, %tmm2
> +
> +	#tdphf8ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
> +	.insn VEX.128.66.M5.W0 0xfd, %tmm1, %tmm2, %tmm1
> +
> +	#tdphf8ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
> +	.insn VEX.128.66.M5.W0 0xfd, %tmm2, %tmm1, %tmm1

Again see respective comments on patch 1.

> --- a/opcodes/i386-dis.c
> +++ b/opcodes/i386-dis.c
> @@ -1159,6 +1159,7 @@ enum
>    PREFIX_VEX_0F38F6_L_0,
>    PREFIX_VEX_0F38F7_L_0,
>    PREFIX_VEX_0F3AF0_L_0,
> +  PREFIX_VEX_MAP5_FD_X86_64_L_0_W_0,
>    PREFIX_VEX_MAP7_F6_L_0_W_0_R_0_X86_64,
>    PREFIX_VEX_MAP7_F8_L_0_W_0_R_0_X86_64,

By going through x86_64_table[] first, you're effectively guaranteeing that
no insns will show up at this opcode point which can also be used outside of
654-bit mode. Is that what you're intending (i.e. unlike what the adjacent
entries here show)?

> @@ -7029,6 +7048,297 @@ static const struct dis386 vex_table[][256] = {
>      { Bad_Opcode },
>      { Bad_Opcode },
>    },
> +  /* VEX_MAP5 */
> +  {
> +    /* 00 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 08 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 10 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 18 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 20 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 28 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 30 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 38 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 40 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 48 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 50 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 58 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 60 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 68 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 70 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 78 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 80 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 88 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 90 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* 98 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* a0 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* a8 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* b0 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* b8 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* c0 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* c8 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* d0 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* d8 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* e0 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* e8 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* f0 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    /* f8 */
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +    { X86_64_TABLE (X86_64_VEX_MAP5_FD) },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
> +  },
>  };

Hmm, a table of 256 entries with just a single opcode point defined, and
no clear perspective whether there's going to be more than just the
AMX-MOVRS one in the foreseeable future. Maybe better handle like the
Map7 entries for now?

Jan
  
Jiang, Haochen Dec. 24, 2024, 5:50 a.m. UTC | #2
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Thursday, December 19, 2024 7:56 PM
> 
> On 18.12.2024 07:32, Haochen Jiang wrote:
> > @@ -7198,7 +7200,7 @@ i386_assemble (char *line)
> >        /* The opcode space check isn't strictly needed; it's there only to
> >  	 bypass the logic below when easily possible.  */
> >        && t->opcode_space >= SPACE_0F
> > -      && t->opcode_space <= SPACE_0F3A
> > +      && t->opcode_space <= SPACE_MAP5
> 
> Why would this need adjustment? The whole conditional is only about ...
> 
> >        && !is_cpu (&i.tm, CpuSSE4a)
> >        && !is_any_vex_encoding (t))
> 
> ... non-VEX/EVEX encodings.

They are not needed. I will remove them.

> > --- a/opcodes/i386-dis.c
> > +++ b/opcodes/i386-dis.c
> > @@ -1159,6 +1159,7 @@ enum
> >    PREFIX_VEX_0F38F6_L_0,
> >    PREFIX_VEX_0F38F7_L_0,
> >    PREFIX_VEX_0F3AF0_L_0,
> > +  PREFIX_VEX_MAP5_FD_X86_64_L_0_W_0,
> >    PREFIX_VEX_MAP7_F6_L_0_W_0_R_0_X86_64,
> >    PREFIX_VEX_MAP7_F8_L_0_W_0_R_0_X86_64,
> 
> By going through x86_64_table[] first, you're effectively guaranteeing that no
> insns will show up at this opcode point which can also be used outside of 654-
> bit mode. Is that what you're intending (i.e. unlike what the adjacent entries
> here show)?

For all AMX insns, we all first pass x86_64_table. It is intended since the
introduction.

> 
> > @@ -7029,6 +7048,297 @@ static const struct dis386 vex_table[][256] = {
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> >    },
> > +  /* VEX_MAP5 */
> > +  {
> > +    /* 00 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 08 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 10 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 18 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 20 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 28 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 30 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 38 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 40 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 48 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 50 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 58 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 60 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 68 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 70 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 78 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 80 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 88 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 90 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* 98 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* a0 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* a8 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* b0 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* b8 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* c0 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* c8 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* d0 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* d8 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* e0 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* e8 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* f0 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    /* f8 */
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +    { X86_64_TABLE (X86_64_VEX_MAP5_FD) },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> > +  },
> >  };
> 
> Hmm, a table of 256 entries with just a single opcode point defined, and no
> clear perspective whether there's going to be more than just the AMX-MOVRS
> one in the foreseeable future. Maybe better handle like the
> Map7 entries for now?
> 

Ok let me do that for now.

Thx,
Haochen
  

Patch

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 54382392a98..25d7068b5be 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1184,6 +1184,7 @@  static const arch_entry cpu_arch[] =
   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
   SUBARCH (amx_transpose, AMX_TRANSPOSE, ANY_AMX_TRANSPOSE, false),
   SUBARCH (amx_tf32, AMX_TF32, ANY_AMX_TF32, false),
+  SUBARCH (amx_fp8, AMX_FP8, ANY_AMX_FP8, false),
   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
@@ -4227,6 +4228,7 @@  build_vex_prefix (const insn_template *t)
 	case SPACE_0F:
 	case SPACE_0F38:
 	case SPACE_0F3A:
+	case SPACE_MAP5:
 	case SPACE_MAP7:
 	  i.vex.bytes[0] = 0xc4;
 	  break;
@@ -7198,7 +7200,7 @@  i386_assemble (char *line)
       /* The opcode space check isn't strictly needed; it's there only to
 	 bypass the logic below when easily possible.  */
       && t->opcode_space >= SPACE_0F
-      && t->opcode_space <= SPACE_0F3A
+      && t->opcode_space <= SPACE_MAP5
       && !is_cpu (&i.tm, CpuSSE4a)
       && !is_any_vex_encoding (t))
     {
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index 45ef8566837..c29625d6ef3 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -230,6 +230,7 @@  accept various extension mnemonics.  For example,
 @code{amx_complex},
 @code{amx_transpose},
 @code{amx_tf32},
+@code{amx_fp8}
 @code{amx_tile},
 @code{vmx},
 @code{vmfunc},
@@ -1703,7 +1704,7 @@  supported on the CPU specified.  The choices for @var{cpu_type} are:
 @item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
 @item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16}
 @item @samp{.amx_complex} @tab @samp{.amx_transpose} @tab @samp{.amx_tf32}
-@item @samp{.amx_tile}
+@item @samp{.amx_fp8} @tab @samp{.amx_tile}
 @item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab @samp{.hreset}
 @item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
 @item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp8-bad.d b/gas/testsuite/gas/i386/x86-64-amx-fp8-bad.d
new file mode 100644
index 00000000000..96c5e1feb08
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp8-bad.d
@@ -0,0 +1,22 @@ 
+#objdump: -drw
+#name: x86_64 AMX_FP8 bad insns
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <\.text>:
+\s*[a-f0-9]+:\s*c4 e5 70 fd d1\s+tdpbf8ps %tmm1/\(bad\),%tmm1/\(bad\),%tmm2
+\s*[a-f0-9]+:\s*c4 e5 68 fd c9\s+tdpbf8ps %tmm2,%tmm1/\(bad\),%tmm1/\(bad\)
+\s*[a-f0-9]+:\s*c4 e5 70 fd ca\s+tdpbf8ps %tmm1/\(bad\),%tmm2,%tmm1\/\(bad\)
+\s*[a-f0-9]+:\s*c4 e5 73 fd d1\s+tdpbhf8ps %tmm1/\(bad\),%tmm1/\(bad\),%tmm2
+\s*[a-f0-9]+:\s*c4 e5 6b fd c9\s+tdpbhf8ps %tmm2,%tmm1/\(bad\),%tmm1/\(bad\)
+\s*[a-f0-9]+:\s*c4 e5 73 fd ca\s+tdpbhf8ps %tmm1/\(bad\),%tmm2,%tmm1/\(bad\)
+\s*[a-f0-9]+:\s*c4 e5 72 fd d1\s+tdphbf8ps %tmm1/\(bad\),%tmm1/\(bad\),%tmm2
+\s*[a-f0-9]+:\s*c4 e5 6a fd c9\s+tdphbf8ps %tmm2,%tmm1/\(bad\),%tmm1/\(bad\)
+\s*[a-f0-9]+:\s*c4 e5 72 fd ca\s+tdphbf8ps %tmm1/\(bad\),%tmm2,%tmm1\/\(bad\)
+\s*[a-f0-9]+:\s*c4 e5 71 fd d1\s+tdphf8ps %tmm1/\(bad\),%tmm1/\(bad\),%tmm2
+\s*[a-f0-9]+:\s*c4 e5 69 fd c9\s+tdphf8ps %tmm2,%tmm1/\(bad\),%tmm1/\(bad\)
+\s*[a-f0-9]+:\s*c4 e5 71 fd ca\s+tdphf8ps %tmm1/\(bad\),%tmm2,%tmm1/\(bad\)
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp8-bad.s b/gas/testsuite/gas/i386/x86-64-amx-fp8-bad.s
new file mode 100644
index 00000000000..c5f5ed667f8
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp8-bad.s
@@ -0,0 +1,36 @@ 
+.text
+	#tdpbf8ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
+	.insn VEX.128.NP.M5.W0 0xfd, %tmm1, %tmm1, %tmm2
+
+	#tdpbf8ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
+	.insn VEX.128.NP.M5.W0 0xfd, %tmm1, %tmm2, %tmm1
+
+	#tdpbf8ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
+	.insn VEX.128.NP.M5.W0 0xfd, %tmm2, %tmm1, %tmm1
+
+	#tdpbhf8ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
+	.insn VEX.128.f2.M5.W0 0xfd, %tmm1, %tmm1, %tmm2
+
+	#tdpbhf8ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
+	.insn VEX.128.f2.M5.W0 0xfd, %tmm1, %tmm2, %tmm1
+
+	#tdpbhf8ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
+	.insn VEX.128.f2.M5.W0 0xfd, %tmm2, %tmm1, %tmm1
+
+	#tdphbf8ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
+	.insn VEX.128.f3.M5.W0 0xfd, %tmm1, %tmm1, %tmm2
+
+	#tdphbf8ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
+	.insn VEX.128.f3.M5.W0 0xfd, %tmm1, %tmm2, %tmm1
+
+	#tdphbf8ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
+	.insn VEX.128.f3.M5.W0 0xfd, %tmm2, %tmm1, %tmm1
+
+	#tdphf8ps %tmm1, %tmm1, %tmm2 all tmm registers should be distinct
+	.insn VEX.128.66.M5.W0 0xfd, %tmm1, %tmm1, %tmm2
+
+	#tdphf8ps %tmm1, %tmm2, %tmm1 all tmm registers should be distinct
+	.insn VEX.128.66.M5.W0 0xfd, %tmm1, %tmm2, %tmm1
+
+	#tdphf8ps %tmm2, %tmm1, %tmm1 all tmm registers should be distinct
+	.insn VEX.128.66.M5.W0 0xfd, %tmm2, %tmm1, %tmm1
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp8-intel.d b/gas/testsuite/gas/i386/x86-64-amx-fp8-intel.d
new file mode 100644
index 00000000000..8af297b1f92
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp8-intel.d
@@ -0,0 +1,19 @@ 
+#objdump: -dw -Mintel
+#name: x86_64 AMX-FP8 insns (Intel disassembly)
+#source: x86-64-amx-fp8.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+#...
+[a-f0-9]+ <_intel>:
+\s*[a-f0-9]+:\s*c4 e5 58 fd f5\s+tdpbf8ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e5 70 fd da\s+tdpbf8ps tmm3,tmm2,tmm1
+\s*[a-f0-9]+:\s*c4 e5 5b fd f5\s+tdpbhf8ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e5 73 fd da\s+tdpbhf8ps tmm3,tmm2,tmm1
+\s*[a-f0-9]+:\s*c4 e5 5a fd f5\s+tdphbf8ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e5 72 fd da\s+tdphbf8ps tmm3,tmm2,tmm1
+\s*[a-f0-9]+:\s*c4 e5 59 fd f5\s+tdphf8ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e5 71 fd da\s+tdphf8ps tmm3,tmm2,tmm1
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp8-inval.l b/gas/testsuite/gas/i386/x86-64-amx-fp8-inval.l
new file mode 100644
index 00000000000..3f9ed319434
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp8-inval.l
@@ -0,0 +1,9 @@ 
+.* Assembler messages:
+.*:5: Error: all tmm registers must be distinct for `tdpbf8ps'
+.*:6: Error: all tmm registers must be distinct for `tdpbf8ps'
+.*:7: Error: all tmm registers must be distinct for `tdpbhf8ps'
+.*:8: Error: all tmm registers must be distinct for `tdpbhf8ps'
+.*:9: Error: all tmm registers must be distinct for `tdphbf8ps'
+.*:10: Error: all tmm registers must be distinct for `tdphbf8ps'
+.*:11: Error: all tmm registers must be distinct for `tdphf8ps'
+.*:12: Error: all tmm registers must be distinct for `tdphf8ps'
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp8-inval.s b/gas/testsuite/gas/i386/x86-64-amx-fp8-inval.s
new file mode 100644
index 00000000000..2e0bac1e220
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp8-inval.s
@@ -0,0 +1,12 @@ 
+# Check Illegal AMX-FP8 instructions
+
+	.text
+_start:
+	tdpbf8ps	%tmm1, %tmm1, %tmm2
+	tdpbf8ps	%tmm1, %tmm2, %tmm2
+	tdpbhf8ps	%tmm1, %tmm1, %tmm2
+	tdpbhf8ps	%tmm1, %tmm2, %tmm2
+	tdphbf8ps	%tmm1, %tmm1, %tmm2
+	tdphbf8ps	%tmm1, %tmm2, %tmm2
+	tdphf8ps	%tmm1, %tmm1, %tmm2
+	tdphf8ps	%tmm1, %tmm2, %tmm2
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp8.d b/gas/testsuite/gas/i386/x86-64-amx-fp8.d
new file mode 100644
index 00000000000..fd81d0c52ff
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp8.d
@@ -0,0 +1,17 @@ 
+#objdump: -dw
+#name: x86_64 AMX-FP8 insns
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e5 58 fd f5\s+tdpbf8ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e5 70 fd da\s+tdpbf8ps %tmm1,%tmm2,%tmm3
+\s*[a-f0-9]+:\s*c4 e5 5b fd f5\s+tdpbhf8ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e5 73 fd da\s+tdpbhf8ps %tmm1,%tmm2,%tmm3
+\s*[a-f0-9]+:\s*c4 e5 5a fd f5\s+tdphbf8ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e5 72 fd da\s+tdphbf8ps %tmm1,%tmm2,%tmm3
+\s*[a-f0-9]+:\s*c4 e5 59 fd f5\s+tdphf8ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e5 71 fd da\s+tdphf8ps %tmm1,%tmm2,%tmm3
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp8.s b/gas/testsuite/gas/i386/x86-64-amx-fp8.s
new file mode 100644
index 00000000000..b8357b41ecb
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp8.s
@@ -0,0 +1,23 @@ 
+# Check 64bit AMX-FP8 instructions
+
+	.text
+_start:
+	tdpbf8ps	%tmm4, %tmm5, %tmm6
+	tdpbf8ps	%tmm1, %tmm2, %tmm3
+	tdpbhf8ps	%tmm4, %tmm5, %tmm6
+	tdpbhf8ps	%tmm1, %tmm2, %tmm3
+	tdphbf8ps	%tmm4, %tmm5, %tmm6
+	tdphbf8ps	%tmm1, %tmm2, %tmm3
+	tdphf8ps	%tmm4, %tmm5, %tmm6
+	tdphf8ps	%tmm1, %tmm2, %tmm3
+
+_intel:
+	.intel_syntax noprefix
+	tdpbf8ps	tmm6, tmm5, tmm4
+	tdpbf8ps	tmm3, tmm2, tmm1
+	tdpbhf8ps	tmm6, tmm5, tmm4
+	tdpbhf8ps	tmm3, tmm2, tmm1
+	tdphbf8ps	tmm6, tmm5, tmm4
+	tdphbf8ps	tmm3, tmm2, tmm1
+	tdphf8ps	tmm6, tmm5, tmm4
+	tdphf8ps	tmm3, tmm2, tmm1
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index 15f8e289675..645e22e05d8 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -531,6 +531,10 @@  run_dump_test "x86-64-amx-tf32"
 run_dump_test "x86-64-amx-tf32-intel"
 run_list_test "x86-64-amx-tf32-inval"
 run_dump_test "x86-64-amx-tf32-bad"
+run_dump_test "x86-64-amx-fp8"
+run_dump_test "x86-64-amx-fp8-intel"
+run_list_test "x86-64-amx-fp8-inval"
+run_dump_test "x86-64-amx-fp8-bad"
 run_dump_test "x86-64-clzero"
 run_dump_test "x86-64-mwaitx-bdver4"
 run_list_test "x86-64-mwaitx-reg"
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 55a4f978475..503e93bb189 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -1159,6 +1159,7 @@  enum
   PREFIX_VEX_0F38F6_L_0,
   PREFIX_VEX_0F38F7_L_0,
   PREFIX_VEX_0F3AF0_L_0,
+  PREFIX_VEX_MAP5_FD_X86_64_L_0_W_0,
   PREFIX_VEX_MAP7_F6_L_0_W_0_R_0_X86_64,
   PREFIX_VEX_MAP7_F8_L_0_W_0_R_0_X86_64,
 
@@ -1360,6 +1361,7 @@  enum
   X86_64_VEX_0F386F,
   X86_64_VEX_0F38Ex,
 
+  X86_64_VEX_MAP5_FD,
   X86_64_VEX_MAP7_F6_L_0_W_0_R_0,
   X86_64_VEX_MAP7_F8_L_0_W_0_R_0,
 };
@@ -1382,6 +1384,7 @@  enum
   VEX_0F = 0,
   VEX_0F38,
   VEX_0F3A,
+  VEX_MAP5,
   VEX_MAP7,
 };
 
@@ -1480,6 +1483,7 @@  enum
   VEX_LEN_0F3ADE_W_0,
   VEX_LEN_0F3ADF,
   VEX_LEN_0F3AF0,
+  VEX_LEN_MAP5_FD_X86_64,
   VEX_LEN_MAP7_F6,
   VEX_LEN_MAP7_F8,
   VEX_LEN_XOP_08_85,
@@ -1652,6 +1656,7 @@  enum
   VEX_W_0F3ACE,
   VEX_W_0F3ACF,
   VEX_W_0F3ADE,
+  VEX_W_MAP5_FD_X86_64_L_0,
   VEX_W_MAP7_F6_L_0,
   VEX_W_MAP7_F8_L_0,
 
@@ -4278,6 +4283,14 @@  static const struct dis386 prefix_table[][4] = {
     { "%XErorxS",		{ Gdq, Edq, Ib }, 0 },
   },
 
+  /* PREFIX_VEX_MAP5_FD_X86_64_L_0_W_0 */
+  {
+    { "tdpbf8ps",	{ TMM, Rtmm, VexTmm }, 0 },
+    { "tdphbf8ps",	{ TMM, Rtmm, VexTmm }, 0 },
+    { "tdphf8ps",	{ TMM, Rtmm, VexTmm }, 0 },
+    { "tdpbhf8ps",	{ TMM, Rtmm, VexTmm }, 0 },
+  },
+
   /* PREFIX_VEX_MAP7_F6_L_0_W_0_R_0_X86_64 */
   {
     { Bad_Opcode },
@@ -4680,6 +4693,12 @@  static const struct dis386 x86_64_table[][2] = {
     { "%XEcmp%CCxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
   },
 
+  /* X86_64_VEX_MAP5_FD */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_MAP5_FD_X86_64) },
+  },
+
   /* X86_64_VEX_MAP7_F6_L_0_W_0_R_0 */
   {
     { Bad_Opcode },
@@ -7029,6 +7048,297 @@  static const struct dis386 vex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
   },
+  /* VEX_MAP5 */
+  {
+    /* 00 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 08 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 10 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 18 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 20 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 28 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 30 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 38 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 40 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 48 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 50 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 58 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 60 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 68 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 70 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 78 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 80 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 88 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 90 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 98 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* a0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* a8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* b0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* b8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* c0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* c8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* d0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* d8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* e0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* e8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* f0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* f8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { X86_64_TABLE (X86_64_VEX_MAP5_FD) },
+    { Bad_Opcode },
+    { Bad_Opcode },
+  },
 };
 
 #include "i386-dis-evex.h"
@@ -7468,6 +7778,11 @@  static const struct dis386 vex_len_table[][2] = {
     { PREFIX_TABLE (PREFIX_VEX_0F3AF0_L_0) },
   },
 
+  /* VEX_LEN_MAP5_FD_X86_64 */
+  {
+    { VEX_W_TABLE (VEX_W_MAP5_FD_X86_64_L_0) },
+  },
+
   /* VEX_LEN_MAP7_F6 */
   {
     { VEX_W_TABLE (VEX_W_MAP7_F6_L_0) },
@@ -8104,6 +8419,10 @@  static const struct dis386 vex_w_table[][2] = {
     /* VEX_W_0F3ADE */
     { VEX_LEN_TABLE (VEX_LEN_0F3ADE_W_0) },
   },
+  {
+    /* VEX_W_MAP5_FD_X86_64 */
+    { PREFIX_TABLE (PREFIX_VEX_MAP5_FD_X86_64_L_0_W_0) },
+  },
   {
     /* VEX_W_MAP7_F6_L_0 */
     { REG_TABLE (REG_VEX_MAP7_F6_L_0_W_0) },
@@ -9135,6 +9454,9 @@  get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
 	case 0x3:
 	  vex_table_index = VEX_0F3A;
 	  break;
+	case 0x5:
+	  vex_table_index = VEX_MAP5;
+	  break;
 	case 0x7:
 	  vex_table_index = VEX_MAP7;
 	  break;
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index ad5ab897b28..40a744eba41 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -269,6 +269,8 @@  static const dependency isa_dependencies[] =
     "AMX_TILE" },
   { "AMX_TF32",
     "AMX_TILE" },
+  { "AMX_FP8",
+    "AMX_TILE" },
   { "KL",
     "SSE2" },
   { "WIDEKL",
@@ -437,6 +439,7 @@  static bitfield cpu_flags[] =
   BITFIELD (AMX_COMPLEX),
   BITFIELD (AMX_TRANSPOSE),
   BITFIELD (AMX_TF32),
+  BITFIELD (AMX_FP8),
   BITFIELD (AMX_TILE),
   BITFIELD (MOVDIRI),
   BITFIELD (MOVDIR64B),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 5173c053494..55260d30d3e 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -252,6 +252,8 @@  enum i386_cpu
   CpuAMX_COMPLEX,
   /* AMX-TF32 Instructions support required.  */
   CpuAMX_TF32,
+  /* AMX-FP8 instructions required */
+  CpuAMX_FP8,
   /* AMX-TILE instructions required */
   CpuAMX_TILE,
   /* GFNI instructions required */
@@ -503,6 +505,7 @@  typedef union i386_cpu_flags
       unsigned int cpuamx_fp16:1;
       unsigned int cpuamx_complex:1;
       unsigned int cpuamx_tf32:1;
+      unsigned int cpuamx_fp8:1;
       unsigned int cpuamx_tile:1;
       unsigned int cpugfni:1;
       unsigned int cpuvaes:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 6697fe9d4f0..495a96c3f53 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3229,6 +3229,11 @@  ttransposed, 0xf35f, AMX_TRANSPOSE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM
 tmmultf32ps, 0x6648, AMX_TF32, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
 ttmmultf32ps, 0x48, AMX_TF32&AMX_TRANSPOSE, Modrm|Vex128|Space0F38|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
 
+tdpbf8ps, 0xfd, AMX_FP8, Modrm|Vex128|Map5|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbhf8ps, 0xf2fd, AMX_FP8, Modrm|Vex128|Map5|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdphbf8ps, 0xf3fd, AMX_FP8, Modrm|Vex128|Map5|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdphf8ps, 0x66fd, AMX_FP8, Modrm|Vex128|Map5|Src2VVVV|VexW0|NoSuf, { RegTMM, RegTMM, RegTMM }
+
 // AMX instructions end.
 
 // KEYLOCKER instructions.