Support Intel SM4 AVX10.2 extension

Message ID 20241213083126.2813952-1-haochen.jiang@intel.com
State New
Headers
Series Support Intel SM4 AVX10.2 extension |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 fail Patch failed to apply

Commit Message

Jiang, Haochen Dec. 13, 2024, 8:31 a.m. UTC
  Hi all,

This is the v2 patch for Intel SM4 AVX10.2 extension.

Changes, open and patch descrption are embedded below.

Ok for trunk?

Thx,
Haochen

---

Changes in v2:

  - Rebase to master to eliminate not yet committed AMX-TRANSPOSE.
  - Revise the entry in gas/NEWS.
  - Templatize the table to reduce redundancy.

---

Open:

Currently in v2 patch, I just templatize the table with the following changes
based on v1 patch:
  

Comments

Jan Beulich Dec. 13, 2024, 11:47 a.m. UTC | #1
On 13.12.2024 09:31, Haochen Jiang wrote:
> This is the v2 patch for Intel SM4 AVX10.2 extension.
> 
> Changes, open and patch descrption are embedded below.
> 
> Ok for trunk?

Yes, please apply as is, on the grounds of ...

> Open:
> 
> Currently in v2 patch, I just templatize the table with the following changes
> based on v1 patch:
> 
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -2193,11 +2193,8 @@ vsm3msg2, 0x66da, SM3, Modrm|Space0F38|Vex128|Src1VVVV|VexW0|NoSuf, { RegXMM|Uns
> 
>  // SM4 instructions.
> 
> -vsm4key4, 0xf3da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
> -vsm4rnds4, 0xf2da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
> -
> -vsm4key4, 0xf3da, SM4&AVX10_2, Modrm|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> -vsm4rnds4, 0xf2da, SM4&AVX10_2, Modrm|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> +<sm4:isa:attr:reg, $y:SM4:Vex:, $z:SM4&AVX10_2:Disp8ShiftVL:RegZMM>
> +
> +vsm4key4<sm4>, 0xf3da, <sm4:isa>, Modrm|Space0F38|<sm4:attr>|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|<sm4:reg>|Unspecified|BaseIndex, RegXMM|RegYMM|<sm4:reg>, RegXMM|RegYMM|<sm4:reg> }
> +vsm4rnds4<sm4>, 0xf2da, <sm4:isa>, Modrm|Space0F38|<sm4:attr>|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|<sm4:reg>|Unspecified|BaseIndex, RegXMM|RegYMM|<sm4:reg>, RegXMM|RegYMM|<sm4:reg> }
> +
> +<sm4>
>  
>  // SM4 instructions end.
> 
> 
> While I have also tried to merge the table like AVX/AVX512, it needs
> the following changes based on v1 patch:
> 
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -2224,7 +2224,8 @@ cpu_flags_match (const insn_template *t)
>        /* Dual AVX/AVX512 templates need to retain AVX512* only if we already
>          know that EVEX encoding will be needed.  */
>        if ((any.bitfield.cpuavx || any.bitfield.cpuavx2 || any.bitfield.cpufma)
> -         && (any.bitfield.cpuavx512f || any.bitfield.cpuavx512vl))
> +         && (any.bitfield.cpuavx512f || any.bitfield.cpuavx512vl
> +             || any.bitfield.cpuavx10_2))
>         {
>           if (need_evex_encoding (t))
>             {
> @@ -2238,6 +2239,7 @@ cpu_flags_match (const insn_template *t)
>             {
>               any.bitfield.cpuavx512f = 0;
>               any.bitfield.cpuavx512vl = 0;
> +             any.bitfield.cpuavx10_2 = 0;
>             }
>         }
> 
> @@ -4033,13 +4035,15 @@ install_template (const insn_template *t)
>      {
>        if ((maybe_cpu (t, CpuAVX) || maybe_cpu (t, CpuAVX2)
>            || maybe_cpu (t, CpuFMA))
> -         && (maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512VL)))
> +         && (maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512VL)
> +             || maybe_cpu (t, CpuAVX10_2)))
>         {
>           if (need_evex_encoding (t))
>             {
>               i.tm.opcode_modifier.vex = 0;
>               i.tm.cpu.bitfield.cpuavx512f = i.tm.cpu_any.bitfield.cpuavx512f;
>               i.tm.cpu.bitfield.cpuavx512vl = i.tm.cpu_any.bitfield.cpuavx512vl;
> +             i.tm.cpu.bitfield.cpuavx10_2 = i.tm.cpu_any.bitfield.cpuavx10_2;
>             }
>           else
>             {
> 
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -2193,11 +2193,8 @@ vsm3msg2, 0x66da, SM3, Modrm|Space0F38|Vex128|Src1VVVV|VexW0|NoSuf, { RegXMM|Uns
> 
>  // SM4 instructions.
> 
> -vsm4key4, 0xf3da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
> -vsm4rnds4, 0xf2da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
> -
> -vsm4key4, 0xf3da, SM4&AVX10_2, Modrm|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> -vsm4rnds4, 0xf2da, SM4&AVX10_2, Modrm|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> +vsm4key4, 0xf3da, SM4&(AVX|AVX10_2), Modrm|Space0F38|Vex|EVexDYN|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> +vsm4rnds4, 0xf2da, SM4&(AVX|AVX10_2), Modrm|Space0F38|Vex|EVexDYN|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
> 
>  // SM4 instructions end.
> 
> 
> I am okay to go either way, but slightly prefer the templatizing one
> since probably SM4 would be the only ISA with AVX10.2 needs such VEX
> to EVEX extension as mentioned in the previous thread (MOVRS does
> not need that). Also, it is a tendancy that we will directly provide
> EVEX encodings and no VEX encodings for vector instructions since
> AVX10.

... this statement of yours. I'll take you up on that if things end up
changing later ...

Jan
  

Patch

--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -2193,11 +2193,8 @@  vsm3msg2, 0x66da, SM3, Modrm|Space0F38|Vex128|Src1VVVV|VexW0|NoSuf, { RegXMM|Uns

 // SM4 instructions.

-vsm4key4, 0xf3da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vsm4rnds4, 0xf2da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-
-vsm4key4, 0xf3da, SM4&AVX10_2, Modrm|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vsm4rnds4, 0xf2da, SM4&AVX10_2, Modrm|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+<sm4:isa:attr:reg, $y:SM4:Vex:, $z:SM4&AVX10_2:Disp8ShiftVL:RegZMM>
+
+vsm4key4<sm4>, 0xf3da, <sm4:isa>, Modrm|Space0F38|<sm4:attr>|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|<sm4:reg>|Unspecified|BaseIndex, RegXMM|RegYMM|<sm4:reg>, RegXMM|RegYMM|<sm4:reg> }
+vsm4rnds4<sm4>, 0xf2da, <sm4:isa>, Modrm|Space0F38|<sm4:attr>|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|<sm4:reg>|Unspecified|BaseIndex, RegXMM|RegYMM|<sm4:reg>, RegXMM|RegYMM|<sm4:reg> }
+
+<sm4>
 
 // SM4 instructions end.


While I have also tried to merge the table like AVX/AVX512, it needs
the following changes based on v1 patch:

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -2224,7 +2224,8 @@  cpu_flags_match (const insn_template *t)
       /* Dual AVX/AVX512 templates need to retain AVX512* only if we already
         know that EVEX encoding will be needed.  */
       if ((any.bitfield.cpuavx || any.bitfield.cpuavx2 || any.bitfield.cpufma)
-         && (any.bitfield.cpuavx512f || any.bitfield.cpuavx512vl))
+         && (any.bitfield.cpuavx512f || any.bitfield.cpuavx512vl
+             || any.bitfield.cpuavx10_2))
        {
          if (need_evex_encoding (t))
            {
@@ -2238,6 +2239,7 @@  cpu_flags_match (const insn_template *t)
            {
              any.bitfield.cpuavx512f = 0;
              any.bitfield.cpuavx512vl = 0;
+             any.bitfield.cpuavx10_2 = 0;
            }
        }

@@ -4033,13 +4035,15 @@  install_template (const insn_template *t)
     {
       if ((maybe_cpu (t, CpuAVX) || maybe_cpu (t, CpuAVX2)
           || maybe_cpu (t, CpuFMA))
-         && (maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512VL)))
+         && (maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512VL)
+             || maybe_cpu (t, CpuAVX10_2)))
        {
          if (need_evex_encoding (t))
            {
              i.tm.opcode_modifier.vex = 0;
              i.tm.cpu.bitfield.cpuavx512f = i.tm.cpu_any.bitfield.cpuavx512f;
              i.tm.cpu.bitfield.cpuavx512vl = i.tm.cpu_any.bitfield.cpuavx512vl;
+             i.tm.cpu.bitfield.cpuavx10_2 = i.tm.cpu_any.bitfield.cpuavx10_2;
            }
          else
            {

--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -2193,11 +2193,8 @@  vsm3msg2, 0x66da, SM3, Modrm|Space0F38|Vex128|Src1VVVV|VexW0|NoSuf, { RegXMM|Uns

 // SM4 instructions.

-vsm4key4, 0xf3da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vsm4rnds4, 0xf2da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-
-vsm4key4, 0xf3da, SM4&AVX10_2, Modrm|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vsm4rnds4, 0xf2da, SM4&AVX10_2, Modrm|Space0F38|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vsm4key4, 0xf3da, SM4&(AVX|AVX10_2), Modrm|Space0F38|Vex|EVexDYN|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
+vsm4rnds4, 0xf2da, SM4&(AVX|AVX10_2), Modrm|Space0F38|Vex|EVexDYN|Src1VVVV|VexW0|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }

 // SM4 instructions end.


I am okay to go either way, but slightly prefer the templatizing one
since probably SM4 would be the only ISA with AVX10.2 needs such VEX
to EVEX extension as mentioned in the previous thread (MOVRS does
not need that). Also, it is a tendancy that we will directly provide
EVEX encodings and no VEX encodings for vector instructions since
AVX10.

---

In this patch, we will support SM4 AVX10.2 extension part. It is
a promotion from VEX encoding to EVEX encoding. The EVEX encoding
is based on AVX10.2, which is the same as the upcoming MOVRS ISA.
Thus, we decide to pull AVX10.2 out to CPU_COMMON_FLAGS.

gas/ChangeLog:

	* NEWS: Support Intel SM4 EVEX instructions.
	* config/tc-i386.c (_is_cpu): Handle AVX10.2.
	* testsuite/gas/i386/i386.exp: Run SM4 tests.
	* testsuite/gas/i386/x86-64.exp: Ditto.
	* testsuite/gas/i386/avx10_2-256-sm4-intel.d: Add SM4 tests.
	* testsuite/gas/i386/avx10_2-256-sm4.d: Ditto.
	* testsuite/gas/i386/avx10_2-256-sm4.s: Ditto.
	* testsuite/gas/i386/avx10_2-512-sm4-intel.d: Ditto.
	* testsuite/gas/i386/avx10_2-512-sm4.d: Ditto.
	* testsuite/gas/i386/avx10_2-512-sm4.s: Ditto.
	* testsuite/gas/i386/avx10_2-sm4-inval.l: Ditto.
	* testsuite/gas/i386/avx10_2-sm4-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-avx10_2-256-sm4-intel.d: Ditto.
	* testsuite/gas/i386/x86-64-avx10_2-256-sm4.d: Ditto.
	* testsuite/gas/i386/x86-64-avx10_2-256-sm4.s: Ditto.
	* testsuite/gas/i386/x86-64-avx10_2-512-sm4-intel.d: Ditto.
	* testsuite/gas/i386/x86-64-avx10_2-512-sm4.d: Ditto.
	* testsuite/gas/i386/x86-64-avx10_2-512-sm4.s: Ditto.
	* testsuite/gas/i386/x86-64-avx10_2-sm4-inval.l: Ditto.
	* testsuite/gas/i386/x86-64-avx10_2-sm4-inval.s: Ditto.

opcodes/ChangeLog:

	* i386-dis-evex.h: Add evex table entry for SM4.
	* i386-dis.h: Ditto.
	* i386-opc.h: (i386_cpu): Move AVX10.2 to CPU_FLAGS_COMMON.
	* i386-opc.tbl: Add SM4 EVEX instructions.
	* i386-init.h: Regenerated.
	* i386-tbl.h: Ditto.
---
 gas/NEWS                                      |     2 +
 gas/config/tc-i386.c                          |     1 +
 .../gas/i386/avx10_2-256-sm4-intel.d          |    31 +
 gas/testsuite/gas/i386/avx10_2-256-sm4.d      |    29 +
 gas/testsuite/gas/i386/avx10_2-256-sm4.s      |    47 +
 .../gas/i386/avx10_2-512-sm4-intel.d          |    21 +
 gas/testsuite/gas/i386/avx10_2-512-sm4.d      |    19 +
 gas/testsuite/gas/i386/avx10_2-512-sm4.s      |    27 +
 gas/testsuite/gas/i386/avx10_2-sm4-inval.l    |     5 +
 gas/testsuite/gas/i386/avx10_2-sm4-inval.s    |     9 +
 gas/testsuite/gas/i386/i386.exp               |     5 +
 .../gas/i386/x86-64-avx10_2-256-sm4-intel.d   |    31 +
 .../gas/i386/x86-64-avx10_2-256-sm4.d         |    29 +
 .../gas/i386/x86-64-avx10_2-256-sm4.s         |    47 +
 .../gas/i386/x86-64-avx10_2-512-sm4-intel.d   |    21 +
 .../gas/i386/x86-64-avx10_2-512-sm4.d         |    19 +
 .../gas/i386/x86-64-avx10_2-512-sm4.s         |    27 +
 .../gas/i386/x86-64-avx10_2-sm4-inval.l       |     5 +
 .../gas/i386/x86-64-avx10_2-sm4-inval.s       |     9 +
 gas/testsuite/gas/i386/x86-64.exp             |     5 +
 opcodes/i386-dis-evex.h                       |     2 +-
 opcodes/i386-dis.c                            |     4 +-
 opcodes/i386-init.h                           |   442 +-
 opcodes/i386-opc.h                            |     6 +-
 opcodes/i386-opc.tbl                          |     8 +-
 opcodes/i386-tbl.h                            | 18194 ++++++++--------
 26 files changed, 9731 insertions(+), 9314 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/avx10_2-256-sm4-intel.d
 create mode 100644 gas/testsuite/gas/i386/avx10_2-256-sm4.d
 create mode 100644 gas/testsuite/gas/i386/avx10_2-256-sm4.s
 create mode 100644 gas/testsuite/gas/i386/avx10_2-512-sm4-intel.d
 create mode 100644 gas/testsuite/gas/i386/avx10_2-512-sm4.d
 create mode 100644 gas/testsuite/gas/i386/avx10_2-512-sm4.s
 create mode 100644 gas/testsuite/gas/i386/avx10_2-sm4-inval.l
 create mode 100644 gas/testsuite/gas/i386/avx10_2-sm4-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-avx10_2-sm4-inval.l
 create mode 100644 gas/testsuite/gas/i386/x86-64-avx10_2-sm4-inval.s

diff --git a/gas/NEWS b/gas/NEWS
index 086ba0477d3..2cd9c1a7ff2 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,7 @@ 
 -*- text -*-
 
+* Add support for the x86 Intel SM4 AVX10.2 instructions.
+
 * Add support for the x86 Intel AVX10.2 instructions.
 
 * Support for Nios II targets has been dropped, as the architecture has
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index aeb9b974451..f508a13efae 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1867,6 +1867,7 @@  _is_cpu (const i386_cpu_attr *a, enum i386_cpu cpu)
     case CpuAVX512F:  return a->bitfield.cpuavx512f;
     case CpuAVX512VL: return a->bitfield.cpuavx512vl;
     case CpuAPX_F:    return a->bitfield.cpuapx_f;
+    case CpuAVX10_2:  return a->bitfield.cpuavx10_2;
     case Cpu64:       return a->bitfield.cpu64;
     case CpuNo64:     return a->bitfield.cpuno64;
     default:
diff --git a/gas/testsuite/gas/i386/avx10_2-256-sm4-intel.d b/gas/testsuite/gas/i386/avx10_2-256-sm4-intel.d
new file mode 100644
index 00000000000..aaeed2484d0
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx10_2-256-sm4-intel.d
@@ -0,0 +1,31 @@ 
+#objdump: -dw -Mintel
+#name: i386 AVX10.2/256, SM4 insns (Intel disassembly)
+#source: avx10_2-256-sm4.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+#...
+[a-f0-9]+ <_intel>:
+\s*[a-f0-9]+:\s*62 f2 56 28 da f4\s+{evex} vsm4key4 ymm6,ymm5,ymm4
+\s*[a-f0-9]+:\s*62 f2 56 08 da f4\s+{evex} vsm4key4 xmm6,xmm5,xmm4
+\s*[a-f0-9]+:\s*62 f2 56 28 da b4 f4 00 00 00 10\s+{evex} vsm4key4 ymm6,ymm5,YMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 f2 56 28 da 31\s+{evex} vsm4key4 ymm6,ymm5,YMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*62 f2 56 28 da 71 7f\s+{evex} vsm4key4 ymm6,ymm5,YMMWORD PTR \[ecx\+0xfe0\]
+\s*[a-f0-9]+:\s*62 f2 56 28 da 72 80\s+{evex} vsm4key4 ymm6,ymm5,YMMWORD PTR \[edx-0x1000\]
+\s*[a-f0-9]+:\s*62 f2 56 08 da b4 f4 00 00 00 10\s+{evex} vsm4key4 xmm6,xmm5,XMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 f2 56 08 da 31\s+{evex} vsm4key4 xmm6,xmm5,XMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*62 f2 56 08 da 71 7f\s+{evex} vsm4key4 xmm6,xmm5,XMMWORD PTR \[ecx\+0x7f0\]
+\s*[a-f0-9]+:\s*62 f2 56 08 da 72 80\s+{evex} vsm4key4 xmm6,xmm5,XMMWORD PTR \[edx-0x800\]
+\s*[a-f0-9]+:\s*62 f2 57 28 da f4\s+{evex} vsm4rnds4 ymm6,ymm5,ymm4
+\s*[a-f0-9]+:\s*62 f2 57 08 da f4\s+{evex} vsm4rnds4 xmm6,xmm5,xmm4
+\s*[a-f0-9]+:\s*62 f2 57 28 da b4 f4 00 00 00 10\s+{evex} vsm4rnds4 ymm6,ymm5,YMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 f2 57 28 da 31\s+{evex} vsm4rnds4 ymm6,ymm5,YMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*62 f2 57 28 da 71 7f\s+{evex} vsm4rnds4 ymm6,ymm5,YMMWORD PTR \[ecx\+0xfe0\]
+\s*[a-f0-9]+:\s*62 f2 57 28 da 72 80\s+{evex} vsm4rnds4 ymm6,ymm5,YMMWORD PTR \[edx-0x1000\]
+\s*[a-f0-9]+:\s*62 f2 57 08 da b4 f4 00 00 00 10\s+{evex} vsm4rnds4 xmm6,xmm5,XMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 f2 57 08 da 31\s+{evex} vsm4rnds4 xmm6,xmm5,XMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*62 f2 57 08 da 71 7f\s+{evex} vsm4rnds4 xmm6,xmm5,XMMWORD PTR \[ecx\+0x7f0\]
+\s*[a-f0-9]+:\s*62 f2 57 08 da 72 80\s+{evex} vsm4rnds4 xmm6,xmm5,XMMWORD PTR \[edx-0x800\]
+#pass
diff --git a/gas/testsuite/gas/i386/avx10_2-256-sm4.d b/gas/testsuite/gas/i386/avx10_2-256-sm4.d
new file mode 100644
index 00000000000..08d644e277b
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx10_2-256-sm4.d
@@ -0,0 +1,29 @@ 
+#objdump: -dw
+#name: i386 AVX10.2/256, SM4 insns
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*62 f2 56 28 da f4\s+{evex} vsm4key4 %ymm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 56 08 da f4\s+{evex} vsm4key4 %xmm4,%xmm5,%xmm6
+\s*[a-f0-9]+:\s*62 f2 56 28 da b4 f4 00 00 00 10\s+{evex} vsm4key4 0x10000000\(%esp,%esi,8\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 56 28 da 31\s+{evex} vsm4key4 \(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 56 28 da 71 7f\s+{evex} vsm4key4 0xfe0\(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 56 28 da 72 80\s+{evex} vsm4key4 -0x1000\(%edx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 56 08 da b4 f4 00 00 00 10\s+{evex} vsm4key4 0x10000000\(%esp,%esi,8\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*62 f2 56 08 da 31\s+{evex} vsm4key4 \(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*62 f2 56 08 da 71 7f\s+{evex} vsm4key4 0x7f0\(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*62 f2 56 08 da 72 80\s+{evex} vsm4key4 -0x800\(%edx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*62 f2 57 28 da f4\s+{evex} vsm4rnds4 %ymm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 57 08 da f4\s+{evex} vsm4rnds4 %xmm4,%xmm5,%xmm6
+\s*[a-f0-9]+:\s*62 f2 57 28 da b4 f4 00 00 00 10\s+{evex} vsm4rnds4 0x10000000\(%esp,%esi,8\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 57 28 da 31\s+{evex} vsm4rnds4 \(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 57 28 da 71 7f\s+{evex} vsm4rnds4 0xfe0\(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 57 28 da 72 80\s+{evex} vsm4rnds4 -0x1000\(%edx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*62 f2 57 08 da b4 f4 00 00 00 10\s+{evex} vsm4rnds4 0x10000000\(%esp,%esi,8\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*62 f2 57 08 da 31\s+{evex} vsm4rnds4 \(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*62 f2 57 08 da 71 7f\s+{evex} vsm4rnds4 0x7f0\(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*62 f2 57 08 da 72 80\s+{evex} vsm4rnds4 -0x800\(%edx\),%xmm5,%xmm6
+#pass
diff --git a/gas/testsuite/gas/i386/avx10_2-256-sm4.s b/gas/testsuite/gas/i386/avx10_2-256-sm4.s
new file mode 100644
index 00000000000..93aed086902
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx10_2-256-sm4.s
@@ -0,0 +1,47 @@ 
+# Check 32bit SM4 instructions
+
+	.text
+_start:
+	{evex} vsm4key4	%ymm4, %ymm5, %ymm6
+	{evex} vsm4key4	%xmm4, %xmm5, %xmm6
+	{evex} vsm4key4	0x10000000(%esp, %esi, 8), %ymm5, %ymm6
+	{evex} vsm4key4	(%ecx), %ymm5, %ymm6
+	{evex} vsm4key4	4064(%ecx), %ymm5, %ymm6
+	{evex} vsm4key4	-4096(%edx), %ymm5, %ymm6
+	{evex} vsm4key4	0x10000000(%esp, %esi, 8), %xmm5, %xmm6
+	{evex} vsm4key4	(%ecx), %xmm5, %xmm6
+	{evex} vsm4key4	2032(%ecx), %xmm5, %xmm6
+	{evex} vsm4key4	-2048(%edx), %xmm5, %xmm6
+	{evex} vsm4rnds4	%ymm4, %ymm5, %ymm6
+	{evex} vsm4rnds4	%xmm4, %xmm5, %xmm6
+	{evex} vsm4rnds4	0x10000000(%esp, %esi, 8), %ymm5, %ymm6
+	{evex} vsm4rnds4	(%ecx), %ymm5, %ymm6
+	{evex} vsm4rnds4	4064(%ecx), %ymm5, %ymm6
+	{evex} vsm4rnds4	-4096(%edx), %ymm5, %ymm6
+	{evex} vsm4rnds4	0x10000000(%esp, %esi, 8), %xmm5, %xmm6
+	{evex} vsm4rnds4	(%ecx), %xmm5, %xmm6
+	{evex} vsm4rnds4	2032(%ecx), %xmm5, %xmm6
+	{evex} vsm4rnds4	-2048(%edx), %xmm5, %xmm6
+
+_intel:
+	.intel_syntax noprefix
+	{evex} vsm4key4	ymm6, ymm5, ymm4
+	{evex} vsm4key4	xmm6, xmm5, xmm4
+	{evex} vsm4key4	ymm6, ymm5, [esp+esi*8+0x10000000]
+	{evex} vsm4key4	ymm6, ymm5, YMMWORD PTR [ecx] 
+	{evex} vsm4key4	ymm6, ymm5, [ecx+4064]
+	{evex} vsm4key4	ymm6, ymm5, YMMWORD PTR [edx-4096]
+	{evex} vsm4key4	xmm6, xmm5, XMMWORD PTR [esp+esi*8+0x10000000]
+	{evex} vsm4key4	xmm6, xmm5, [ecx] 
+	{evex} vsm4key4	xmm6, xmm5, [ecx+2032]
+	{evex} vsm4key4	xmm6, xmm5, XMMWORD PTR [edx-2048]
+	{evex} vsm4rnds4	ymm6, ymm5, ymm4
+	{evex} vsm4rnds4	xmm6, xmm5, xmm4
+	{evex} vsm4rnds4	ymm6, ymm5, YMMWORD PTR [esp+esi*8+0x10000000]
+	{evex} vsm4rnds4	ymm6, ymm5, [ecx]
+	{evex} vsm4rnds4	ymm6, ymm5, [ecx+4064]
+	{evex} vsm4rnds4	ymm6, ymm5, YMMWORD PTR [edx-4096]
+	{evex} vsm4rnds4	xmm6, xmm5, [esp+esi*8+0x10000000]
+	{evex} vsm4rnds4	xmm6, xmm5, XMMWORD PTR [ecx]
+	{evex} vsm4rnds4	xmm6, xmm5, XMMWORD PTR [ecx+2032]
+	{evex} vsm4rnds4	xmm6, xmm5, [edx-2048]
diff --git a/gas/testsuite/gas/i386/avx10_2-512-sm4-intel.d b/gas/testsuite/gas/i386/avx10_2-512-sm4-intel.d
new file mode 100644
index 00000000000..8ea86978083
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx10_2-512-sm4-intel.d
@@ -0,0 +1,21 @@ 
+#objdump: -dw -Mintel
+#name: i386 AVX10.2/512, SM4 insns (Intel disassembly)
+#source: avx10_2-512-sm4.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+#...
+[a-f0-9]+ <_intel>:
+\s*[a-f0-9]+:\s*62 f2 56 48 da f4\s+vsm4key4 zmm6,zmm5,zmm4
+\s*[a-f0-9]+:\s*62 f2 56 48 da b4 f4 00 00 00 10\s+vsm4key4 zmm6,zmm5,ZMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 f2 56 48 da 31\s+vsm4key4 zmm6,zmm5,ZMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*62 f2 56 48 da 71 7f\s+vsm4key4 zmm6,zmm5,ZMMWORD PTR \[ecx\+0x1fc0\]
+\s*[a-f0-9]+:\s*62 f2 56 48 da 72 80\s+vsm4key4 zmm6,zmm5,ZMMWORD PTR \[edx-0x2000\]
+\s*[a-f0-9]+:\s*62 f2 57 48 da f4\s+vsm4rnds4 zmm6,zmm5,zmm4
+\s*[a-f0-9]+:\s*62 f2 57 48 da b4 f4 00 00 00 10\s+vsm4rnds4 zmm6,zmm5,ZMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 f2 57 48 da 31\s+vsm4rnds4 zmm6,zmm5,ZMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*62 f2 57 48 da 71 7f\s+vsm4rnds4 zmm6,zmm5,ZMMWORD PTR \[ecx\+0x1fc0\]
+\s*[a-f0-9]+:\s*62 f2 57 48 da 72 80\s+vsm4rnds4 zmm6,zmm5,ZMMWORD PTR \[edx-0x2000\]
+#pass
diff --git a/gas/testsuite/gas/i386/avx10_2-512-sm4.d b/gas/testsuite/gas/i386/avx10_2-512-sm4.d
new file mode 100644
index 00000000000..abb5ade307f
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx10_2-512-sm4.d
@@ -0,0 +1,19 @@ 
+#objdump: -dw
+#name: i386 AVX10_2, SM4 insns
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*62 f2 56 48 da f4\s+vsm4key4 %zmm4,%zmm5,%zmm6
+\s*[a-f0-9]+:\s*62 f2 56 48 da b4 f4 00 00 00 10\s+vsm4key4 0x10000000\(%esp,%esi,8\),%zmm5,%zmm6
+\s*[a-f0-9]+:\s*62 f2 56 48 da 31\s+vsm4key4 \(%ecx\),%zmm5,%zmm6
+\s*[a-f0-9]+:\s*62 f2 56 48 da 71 7f\s+vsm4key4 0x1fc0\(%ecx\),%zmm5,%zmm6
+\s*[a-f0-9]+:\s*62 f2 56 48 da 72 80\s+vsm4key4 -0x2000\(%edx\),%zmm5,%zmm6
+\s*[a-f0-9]+:\s*62 f2 57 48 da f4\s+vsm4rnds4 %zmm4,%zmm5,%zmm6
+\s*[a-f0-9]+:\s*62 f2 57 48 da b4 f4 00 00 00 10\s+vsm4rnds4 0x10000000\(%esp,%esi,8\),%zmm5,%zmm6
+\s*[a-f0-9]+:\s*62 f2 57 48 da 31\s+vsm4rnds4 \(%ecx\),%zmm5,%zmm6
+\s*[a-f0-9]+:\s*62 f2 57 48 da 71 7f\s+vsm4rnds4 0x1fc0\(%ecx\),%zmm5,%zmm6
+\s*[a-f0-9]+:\s*62 f2 57 48 da 72 80\s+vsm4rnds4 -0x2000\(%edx\),%zmm5,%zmm6
+#pass
diff --git a/gas/testsuite/gas/i386/avx10_2-512-sm4.s b/gas/testsuite/gas/i386/avx10_2-512-sm4.s
new file mode 100644
index 00000000000..d2ebe81e43a
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx10_2-512-sm4.s
@@ -0,0 +1,27 @@ 
+# Check 32bit SM4 instructions
+
+	.text
+_start:
+	vsm4key4	%zmm4, %zmm5, %zmm6
+	vsm4key4	0x10000000(%esp, %esi, 8), %zmm5, %zmm6
+	vsm4key4	(%ecx), %zmm5, %zmm6
+	vsm4key4	8128(%ecx), %zmm5, %zmm6
+	vsm4key4	-8192(%edx), %zmm5, %zmm6
+	vsm4rnds4	%zmm4, %zmm5, %zmm6
+	vsm4rnds4	0x10000000(%esp, %esi, 8), %zmm5, %zmm6
+	vsm4rnds4	(%ecx), %zmm5, %zmm6
+	vsm4rnds4	8128(%ecx), %zmm5, %zmm6
+	vsm4rnds4	-8192(%edx), %zmm5, %zmm6
+
+_intel:
+	.intel_syntax noprefix
+	vsm4key4	zmm6, zmm5, zmm4
+	vsm4key4	zmm6, zmm5, ZMMWORD PTR [esp+esi*8+0x10000000]
+	vsm4key4	zmm6, zmm5, [ecx]
+	vsm4key4	zmm6, zmm5, ZMMWORD PTR [ecx+8128]
+	vsm4key4	zmm6, zmm5, [edx-8192]
+	vsm4rnds4	zmm6, zmm5, zmm4
+	vsm4rnds4	zmm6, zmm5, [esp+esi*8+0x10000000]
+	vsm4rnds4	zmm6, zmm5, ZMMWORD PTR [ecx]
+	vsm4rnds4	zmm6, zmm5, ZMMWORD PTR [ecx+8128]
+	vsm4rnds4	zmm6, zmm5, [edx-8192]
diff --git a/gas/testsuite/gas/i386/avx10_2-sm4-inval.l b/gas/testsuite/gas/i386/avx10_2-sm4-inval.l
new file mode 100644
index 00000000000..b5410c2770f
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx10_2-sm4-inval.l
@@ -0,0 +1,5 @@ 
+.* Assembler messages:
+.*:6: Error: operand size mismatch for `vsm4key4'
+.*:7: Error: operand size mismatch for `vsm4rnds4'
+.*:8: Error: no EVEX encoding for `vsm4key4'
+.*:9: Error: no EVEX encoding for `vsm4rnds4'
diff --git a/gas/testsuite/gas/i386/avx10_2-sm4-inval.s b/gas/testsuite/gas/i386/avx10_2-sm4-inval.s
new file mode 100644
index 00000000000..8bd6aea3637
--- /dev/null
+++ b/gas/testsuite/gas/i386/avx10_2-sm4-inval.s
@@ -0,0 +1,9 @@ 
+# Check Illegal 32bit SM4 instructions
+
+	.text
+	.arch .noavx10.2
+_start:
+	vsm4key4	%zmm4, %zmm5, %zmm6 
+	vsm4rnds4	%zmm4, %zmm5, %zmm6
+	{evex} vsm4key4	%ymm4, %ymm5, %ymm6 
+	{evex} vsm4rnds4	%xmm4, %xmm5, %xmm6
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index bb1092b0c08..9a310375123 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -536,6 +536,11 @@  if [gas_32_check] then {
     run_dump_test "avx10_2-512-miscs-intel"
     run_dump_test "avx10_2-256-miscs"
     run_dump_test "avx10_2-256-miscs-intel"
+    run_dump_test "avx10_2-256-sm4"
+    run_dump_test "avx10_2-256-sm4-intel"
+    run_dump_test "avx10_2-512-sm4"
+    run_dump_test "avx10_2-512-sm4-intel"
+    run_list_test "avx10_2-sm4-inval"
     run_list_test "sg"
     run_dump_test "clzero"
     run_dump_test "invlpgb"
diff --git a/gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4-intel.d b/gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4-intel.d
new file mode 100644
index 00000000000..0624bfbbd4a
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4-intel.d
@@ -0,0 +1,31 @@ 
+#objdump: -dw -Mintel
+#name: x86_64 AVX10.2/256, SM4 insns (Intel disassembly)
+#source: x86-64-avx10_2-256-sm4.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+#pass
+[a-f0-9]+ <_intel>:
+\s*[a-f0-9]+:\s*62 02 16 20 da f4\s+vsm4key4 ymm30,ymm29,ymm28
+\s*[a-f0-9]+:\s*62 02 16 00 da f4\s+vsm4key4 xmm30,xmm29,xmm28
+\s*[a-f0-9]+:\s*62 22 16 20 da b4 f5 00 00 00 10\s+vsm4key4 ymm30,ymm29,YMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 42 16 20 da 31\s+vsm4key4 ymm30,ymm29,YMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*62 62 16 20 da 71 7f\s+vsm4key4 ymm30,ymm29,YMMWORD PTR \[rcx\+0xfe0\]
+\s*[a-f0-9]+:\s*62 62 16 20 da 72 80\s+vsm4key4 ymm30,ymm29,YMMWORD PTR \[rdx-0x1000\]
+\s*[a-f0-9]+:\s*62 22 16 00 da b4 f5 00 00 00 10\s+vsm4key4 xmm30,xmm29,XMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 42 16 00 da 31\s+vsm4key4 xmm30,xmm29,XMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*62 62 16 00 da 71 7f\s+vsm4key4 xmm30,xmm29,XMMWORD PTR \[rcx\+0x7f0\]
+\s*[a-f0-9]+:\s*62 62 16 00 da 72 80\s+vsm4key4 xmm30,xmm29,XMMWORD PTR \[rdx-0x800\]
+\s*[a-f0-9]+:\s*62 02 17 20 da f4\s+vsm4rnds4 ymm30,ymm29,ymm28
+\s*[a-f0-9]+:\s*62 02 17 00 da f4\s+vsm4rnds4 xmm30,xmm29,xmm28
+\s*[a-f0-9]+:\s*62 22 17 20 da b4 f5 00 00 00 10\s+vsm4rnds4 ymm30,ymm29,YMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 42 17 20 da 31\s+vsm4rnds4 ymm30,ymm29,YMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*62 62 17 20 da 71 7f\s+vsm4rnds4 ymm30,ymm29,YMMWORD PTR \[rcx\+0xfe0\]
+\s*[a-f0-9]+:\s*62 62 17 20 da 72 80\s+vsm4rnds4 ymm30,ymm29,YMMWORD PTR \[rdx-0x1000\]
+\s*[a-f0-9]+:\s*62 22 17 00 da b4 f5 00 00 00 10\s+vsm4rnds4 xmm30,xmm29,XMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 42 17 00 da 31\s+vsm4rnds4 xmm30,xmm29,XMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*62 62 17 00 da 71 7f\s+vsm4rnds4 xmm30,xmm29,XMMWORD PTR \[rcx\+0x7f0\]
+\s*[a-f0-9]+:\s*62 62 17 00 da 72 80\s+vsm4rnds4 xmm30,xmm29,XMMWORD PTR \[rdx-0x800\]
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4.d b/gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4.d
new file mode 100644
index 00000000000..2646c3e63c2
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4.d
@@ -0,0 +1,29 @@ 
+#objdump: -dw
+#name: x86_64 AVX10.2/256, SM4 insns
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*62 02 16 20 da f4\s+vsm4key4 %ymm28,%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 02 16 00 da f4\s+vsm4key4 %xmm28,%xmm29,%xmm30
+\s*[a-f0-9]+:\s*62 22 16 20 da b4 f5 00 00 00 10\s+vsm4key4 0x10000000\(%rbp,%r14,8\),%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 42 16 20 da 31\s+vsm4key4 \(%r9\),%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 62 16 20 da 71 7f\s+vsm4key4 0xfe0\(%rcx\),%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 62 16 20 da 72 80\s+vsm4key4 -0x1000\(%rdx\),%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 22 16 00 da b4 f5 00 00 00 10\s+vsm4key4 0x10000000\(%rbp,%r14,8\),%xmm29,%xmm30
+\s*[a-f0-9]+:\s*62 42 16 00 da 31\s+vsm4key4 \(%r9\),%xmm29,%xmm30
+\s*[a-f0-9]+:\s*62 62 16 00 da 71 7f\s+vsm4key4 0x7f0\(%rcx\),%xmm29,%xmm30
+\s*[a-f0-9]+:\s*62 62 16 00 da 72 80\s+vsm4key4 -0x800\(%rdx\),%xmm29,%xmm30
+\s*[a-f0-9]+:\s*62 02 17 20 da f4\s+vsm4rnds4 %ymm28,%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 02 17 00 da f4\s+vsm4rnds4 %xmm28,%xmm29,%xmm30
+\s*[a-f0-9]+:\s*62 22 17 20 da b4 f5 00 00 00 10\s+vsm4rnds4 0x10000000\(%rbp,%r14,8\),%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 42 17 20 da 31\s+vsm4rnds4 \(%r9\),%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 62 17 20 da 71 7f\s+vsm4rnds4 0xfe0\(%rcx\),%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 62 17 20 da 72 80\s+vsm4rnds4 -0x1000\(%rdx\),%ymm29,%ymm30
+\s*[a-f0-9]+:\s*62 22 17 00 da b4 f5 00 00 00 10\s+vsm4rnds4 0x10000000\(%rbp,%r14,8\),%xmm29,%xmm30
+\s*[a-f0-9]+:\s*62 42 17 00 da 31\s+vsm4rnds4 \(%r9\),%xmm29,%xmm30
+\s*[a-f0-9]+:\s*62 62 17 00 da 71 7f\s+vsm4rnds4 0x7f0\(%rcx\),%xmm29,%xmm30
+\s*[a-f0-9]+:\s*62 62 17 00 da 72 80\s+vsm4rnds4 -0x800\(%rdx\),%xmm29,%xmm30
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4.s b/gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4.s
new file mode 100644
index 00000000000..5813ef52b04
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-avx10_2-256-sm4.s
@@ -0,0 +1,47 @@ 
+# Check 64bit SM4 instructions
+
+	.text
+_start:
+	vsm4key4	%ymm28, %ymm29, %ymm30 
+	vsm4key4	%xmm28, %xmm29, %xmm30
+	vsm4key4	0x10000000(%rbp, %r14, 8), %ymm29, %ymm30
+	vsm4key4	(%r9), %ymm29, %ymm30
+	vsm4key4	4064(%rcx), %ymm29, %ymm30
+	vsm4key4	-4096(%rdx), %ymm29, %ymm30
+	vsm4key4	0x10000000(%rbp, %r14, 8), %xmm29, %xmm30
+	vsm4key4	(%r9), %xmm29, %xmm30
+	vsm4key4	2032(%rcx), %xmm29, %xmm30
+	vsm4key4	-2048(%rdx), %xmm29, %xmm30
+	vsm4rnds4	%ymm28, %ymm29, %ymm30
+	vsm4rnds4	%xmm28, %xmm29, %xmm30
+	vsm4rnds4	0x10000000(%rbp, %r14, 8), %ymm29, %ymm30
+	vsm4rnds4	(%r9), %ymm29, %ymm30
+	vsm4rnds4	4064(%rcx), %ymm29, %ymm30
+	vsm4rnds4	-4096(%rdx), %ymm29, %ymm30
+	vsm4rnds4	0x10000000(%rbp, %r14, 8), %xmm29, %xmm30
+	vsm4rnds4	(%r9), %xmm29, %xmm30
+	vsm4rnds4	2032(%rcx), %xmm29, %xmm30
+	vsm4rnds4	-2048(%rdx), %xmm29, %xmm30
+
+_intel:
+	.intel_syntax noprefix
+	vsm4key4	ymm30, ymm29, ymm28
+	vsm4key4	xmm30, xmm29, xmm28
+	vsm4key4	ymm30, ymm29, [rbp+r14*8+0x10000000]
+	vsm4key4	ymm30, ymm29, YMMWORD PTR [r9]
+	vsm4key4	ymm30, ymm29, [rcx+4064]
+	vsm4key4	ymm30, ymm29, YMMWORD PTR [rdx-4096]
+	vsm4key4	xmm30, xmm29, XMMWORD PTR [rbp+r14*8+0x10000000]
+	vsm4key4	xmm30, xmm29, [r9]
+	vsm4key4	xmm30, xmm29, [rcx+2032]
+	vsm4key4	xmm30, xmm29, XMMWORD PTR [rdx-2048]
+	vsm4rnds4	ymm30, ymm29, ymm28
+	vsm4rnds4	xmm30, xmm29, xmm28
+	vsm4rnds4	ymm30, ymm29, [rbp+r14*8+0x10000000]
+	vsm4rnds4	ymm30, ymm29, [r9]
+	vsm4rnds4	ymm30, ymm29, YMMWORD PTR [rcx+4064]
+	vsm4rnds4	ymm30, ymm29, YMMWORD PTR [rdx-4096]
+	vsm4rnds4	xmm30, xmm29, XMMWORD PTR [rbp+r14*8+0x10000000]
+	vsm4rnds4	xmm30, xmm29, XMMWORD PTR [r9]
+	vsm4rnds4	xmm30, xmm29, [rcx+2032]
+	vsm4rnds4	xmm30, xmm29, [rdx-2048]
diff --git a/gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4-intel.d b/gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4-intel.d
new file mode 100644
index 00000000000..c9dbdfbb46d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4-intel.d
@@ -0,0 +1,21 @@ 
+#objdump: -dw -Mintel
+#name: x86_64 AVX10.2/512, SM4 insns (Intel disassembly)
+#source: x86-64-avx10_2-512-sm4.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+#...
+[a-f0-9]+ <_intel>:
+\s*[a-f0-9]+:\s*62 02 16 40 da f4\s+vsm4key4 zmm30,zmm29,zmm28
+\s*[a-f0-9]+:\s*62 22 16 40 da b4 f5 00 00 00 10\s+vsm4key4 zmm30,zmm29,ZMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 42 16 40 da 31\s+vsm4key4 zmm30,zmm29,ZMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*62 62 16 40 da 71 7f\s+vsm4key4 zmm30,zmm29,ZMMWORD PTR \[rcx\+0x1fc0\]
+\s*[a-f0-9]+:\s*62 62 16 40 da 72 80\s+vsm4key4 zmm30,zmm29,ZMMWORD PTR \[rdx-0x2000\]
+\s*[a-f0-9]+:\s*62 02 17 40 da f4\s+vsm4rnds4 zmm30,zmm29,zmm28
+\s*[a-f0-9]+:\s*62 22 17 40 da b4 f5 00 00 00 10\s+vsm4rnds4 zmm30,zmm29,ZMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*62 42 17 40 da 31\s+vsm4rnds4 zmm30,zmm29,ZMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*62 62 17 40 da 71 7f\s+vsm4rnds4 zmm30,zmm29,ZMMWORD PTR \[rcx\+0x1fc0\]
+\s*[a-f0-9]+:\s*62 62 17 40 da 72 80\s+vsm4rnds4 zmm30,zmm29,ZMMWORD PTR \[rdx-0x2000\]
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4.d b/gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4.d
new file mode 100644
index 00000000000..ee660962340
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4.d
@@ -0,0 +1,19 @@ 
+#objdump: -dw
+#name: x86_64 AVX10.2/512, SM4 insns
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*62 02 16 40 da f4\s+vsm4key4 %zmm28,%zmm29,%zmm30
+\s*[a-f0-9]+:\s*62 22 16 40 da b4 f5 00 00 00 10\s+vsm4key4 0x10000000\(%rbp,%r14,8\),%zmm29,%zmm30
+\s*[a-f0-9]+:\s*62 42 16 40 da 31\s+vsm4key4 \(%r9\),%zmm29,%zmm30
+\s*[a-f0-9]+:\s*62 62 16 40 da 71 7f\s+vsm4key4 0x1fc0\(%rcx\),%zmm29,%zmm30
+\s*[a-f0-9]+:\s*62 62 16 40 da 72 80\s+vsm4key4 -0x2000\(%rdx\),%zmm29,%zmm30
+\s*[a-f0-9]+:\s*62 02 17 40 da f4\s+vsm4rnds4 %zmm28,%zmm29,%zmm30
+\s*[a-f0-9]+:\s*62 22 17 40 da b4 f5 00 00 00 10\s+vsm4rnds4 0x10000000\(%rbp,%r14,8\),%zmm29,%zmm30
+\s*[a-f0-9]+:\s*62 42 17 40 da 31\s+vsm4rnds4 \(%r9\),%zmm29,%zmm30
+\s*[a-f0-9]+:\s*62 62 17 40 da 71 7f\s+vsm4rnds4 0x1fc0\(%rcx\),%zmm29,%zmm30
+\s*[a-f0-9]+:\s*62 62 17 40 da 72 80\s+vsm4rnds4 -0x2000\(%rdx\),%zmm29,%zmm30
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4.s b/gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4.s
new file mode 100644
index 00000000000..eff1a598468
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-avx10_2-512-sm4.s
@@ -0,0 +1,27 @@ 
+# Check 64bit SM4 instructions
+
+	.text
+_start:
+	vsm4key4	%zmm28, %zmm29, %zmm30
+	vsm4key4	0x10000000(%rbp, %r14, 8), %zmm29, %zmm30
+	vsm4key4	(%r9), %zmm29, %zmm30
+	vsm4key4	8128(%rcx), %zmm29, %zmm30
+	vsm4key4	-8192(%rdx), %zmm29, %zmm30
+	vsm4rnds4	%zmm28, %zmm29, %zmm30
+	vsm4rnds4	0x10000000(%rbp, %r14, 8), %zmm29, %zmm30
+	vsm4rnds4	(%r9), %zmm29, %zmm30
+	vsm4rnds4	8128(%rcx), %zmm29, %zmm30
+	vsm4rnds4	-8192(%rdx), %zmm29, %zmm30
+
+_intel:
+	.intel_syntax noprefix
+	vsm4key4	zmm30, zmm29, zmm28
+	vsm4key4	zmm30, zmm29, [rbp+r14*8+0x10000000]
+	vsm4key4	zmm30, zmm29, ZMMWORD PTR [r9]
+	vsm4key4	zmm30, zmm29, [rcx+8128]
+	vsm4key4	zmm30, zmm29, ZMMWORD PTR [rdx-8192]
+	vsm4rnds4	zmm30, zmm29, zmm28
+	vsm4rnds4	zmm30, zmm29, [rbp+r14*8+0x10000000]
+	vsm4rnds4	zmm30, zmm29, ZMMWORD PTR [r9]
+	vsm4rnds4	zmm30, zmm29, ZMMWORD PTR [rcx+8128]
+	vsm4rnds4	zmm30, zmm29, [rdx-8192]
diff --git a/gas/testsuite/gas/i386/x86-64-avx10_2-sm4-inval.l b/gas/testsuite/gas/i386/x86-64-avx10_2-sm4-inval.l
new file mode 100644
index 00000000000..b5410c2770f
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-avx10_2-sm4-inval.l
@@ -0,0 +1,5 @@ 
+.* Assembler messages:
+.*:6: Error: operand size mismatch for `vsm4key4'
+.*:7: Error: operand size mismatch for `vsm4rnds4'
+.*:8: Error: no EVEX encoding for `vsm4key4'
+.*:9: Error: no EVEX encoding for `vsm4rnds4'
diff --git a/gas/testsuite/gas/i386/x86-64-avx10_2-sm4-inval.s b/gas/testsuite/gas/i386/x86-64-avx10_2-sm4-inval.s
new file mode 100644
index 00000000000..8234412f3db
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-avx10_2-sm4-inval.s
@@ -0,0 +1,9 @@ 
+# Check Illegal 64bit SM4 instructions
+
+	.text
+	.arch .noavx10.2
+_start:
+	vsm4key4	%zmm28, %zmm29, %zmm30 
+	vsm4rnds4	%zmm28, %zmm29, %zmm30
+	vsm4key4	%ymm28, %ymm29, %ymm30 
+	vsm4rnds4	%xmm28, %xmm29, %xmm30
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index 5e26d97e8ad..bdb066dd456 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -518,6 +518,11 @@  run_dump_test "x86-64-avx10_2-512-miscs"
 run_dump_test "x86-64-avx10_2-512-miscs-intel"
 run_dump_test "x86-64-avx10_2-256-miscs"
 run_dump_test "x86-64-avx10_2-256-miscs-intel"
+run_dump_test "x86-64-avx10_2-256-sm4"
+run_dump_test "x86-64-avx10_2-256-sm4-intel"
+run_dump_test "x86-64-avx10_2-512-sm4"
+run_dump_test "x86-64-avx10_2-512-sm4-intel"
+run_list_test "x86-64-avx10_2-sm4-inval"
 run_dump_test "x86-64-clzero"
 run_dump_test "x86-64-mwaitx-bdver4"
 run_list_test "x86-64-mwaitx-reg"
diff --git a/opcodes/i386-dis-evex.h b/opcodes/i386-dis-evex.h
index 751d59e55fb..30312eb1a4e 100644
--- a/opcodes/i386-dis-evex.h
+++ b/opcodes/i386-dis-evex.h
@@ -538,7 +538,7 @@  static const struct dis386 evex_table[][256] = {
     /* D8 */
     { Bad_Opcode },
     { Bad_Opcode },
-    { Bad_Opcode },
+    { VEX_W_TABLE (VEX_W_0F38DA) },
     { Bad_Opcode },
     { "%XEvaesencY",	{ XM, Vex, EXx }, PREFIX_DATA },
     { "%XEvaesenclastY", { XM, Vex, EXx }, PREFIX_DATA },
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 237f0f77744..813295c8aad 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -4173,9 +4173,9 @@  static const struct dis386 prefix_table[][4] = {
   /* PREFIX_VEX_0F38DA_W_0 */
   {
     { VEX_LEN_TABLE (VEX_LEN_0F38DA_W_0_P_0) },
-    { "vsm4key4", { XM, Vex, EXx }, 0 },
+    { "%XEvsm4key4",	{ XM, Vex, EXx }, 0 },
     { VEX_LEN_TABLE (VEX_LEN_0F38DA_W_0_P_2) },
-    { "vsm4rnds4", { XM, Vex, EXx }, 0 },
+    { "%XEvsm4rnds4",	{ XM, Vex, EXx }, 0 },
   },
 
   /* PREFIX_VEX_0F38F2_L_0 */
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index e50f518ec1a..75d8c4f8b70 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -229,8 +229,6 @@  enum i386_cpu
   CpuUSER_MSR,
   /* Intel MSR_IMM Instructions support required.  */
   CpuMSR_IMM,
-  /* Intel AVX10.2 Instructions support required.  */
-  CpuAVX10_2,
   /* mwaitx instruction required */
   CpuMWAITX,
   /* Clzero instruction required */
@@ -327,6 +325,8 @@  enum i386_cpu
   CpuAVX512VL,
   /* Intel APX_F Instructions support required.  */
   CpuAPX_F,
+  /* Intel AVX10.2 Instructions support required.  */
+  CpuAVX10_2,
   /* Not supported in the 64bit mode  */
   CpuNo64,
 
@@ -363,6 +363,7 @@  enum i386_cpu
 		   cpuavx512f:1, \
 		   cpuavx512vl:1, \
 		   cpuapx_f:1, \
+		   cpuavx10_2:1, \
       /* NOTE: This field needs to remain last. */ \
 		   cpuno64:1
 
@@ -485,7 +486,6 @@  typedef union i386_cpu_flags
       unsigned int cpulkgs:1;
       unsigned int cpuuser_msr:1;
       unsigned int cpumsr_imm:1;
-      unsigned int cpuavx10_2:1;
       unsigned int cpumwaitx:1;
       unsigned int cpuclzero:1;
       unsigned int cpuospke:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 1684161ce6e..666ad99563a 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -2193,8 +2193,12 @@  vsm3msg2, 0x66da, SM3, Modrm|Space0F38|Vex128|Src1VVVV|VexW0|NoSuf, { RegXMM|Uns
 
 // SM4 instructions.
 
-vsm4key4, 0xf3da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
-vsm4rnds4, 0xf2da, SM4, Modrm|Space0F38|Vex|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+<sm4:isa:attr:reg, $y:SM4:Vex:, $z:SM4&AVX10_2:Disp8ShiftVL:RegZMM>
+
+vsm4key4<sm4>, 0xf3da, <sm4:isa>, Modrm|Space0F38|<sm4:attr>|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|<sm4:reg>|Unspecified|BaseIndex, RegXMM|RegYMM|<sm4:reg>, RegXMM|RegYMM|<sm4:reg> }
+vsm4rnds4<sm4>, 0xf2da, <sm4:isa>, Modrm|Space0F38|<sm4:attr>|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { RegXMM|RegYMM|<sm4:reg>|Unspecified|BaseIndex, RegXMM|RegYMM|<sm4:reg>, RegXMM|RegYMM|<sm4:reg> }
+
+<sm4>
 
 // SM4 instructions end.