[2/8] Support APX GPR32 with extend evex prefix

Message ID 20230919152527.497773-3-lili.cui@intel.com
State New
Headers
Series Support Intel APX EGPR |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Testing passed

Commit Message

Frager, Neal via Binutils Sept. 19, 2023, 3:25 p.m. UTC
  EVEX extension of legacy instructions:
  All promoted legacy instructions are placed in EVEX map 4, which is
  currently reserved.
EVEX extension of EVEX instructions:
  All existing EVEX instructions are extended by APX using the extended
  EVEX prefix, so that they can access all 32 GPRs.
EVEX extension of VEX instructions:
  Promoting a VEX instruction into the EVEX space does not change the map
  id, the opcode, or the operand encoding of the VEX instruction.

gas/ChangeLog:

        * config/tc-i386.c (is_any_apx_evex_encoding):
        New func. Test apx evex encoding.
        (build_legacy_insns_with_apx_encoding): New func.
        (build_evex_insns_with_extend_evex_prefix): New func.
        Build evex insns with gpr32 use extend evex prefix.
        (md_assemble): Handle apx with rex2 or evex encoding.
        (output_insn): Handle apx evex encoding.

opcode/ChangeLog:

        * i386-opc.h (SPACE_EVEXMAP4): New define for legacy insn
        promote to evex.
        * i386-opc.tbl: Handle some legacy and vex insns don't
        support gpr32. And add some legacy insn (map2 / 3) promote
        to evex.
        * i386-gen.c (process_i386_opcode_modifier): set no_egpr for
        VEX instructions.
        * i386-tbl.h: Regenerated.
        * i386-dis-evex-len.h: Handle EVEX_LEN_0F38F2, EVEX_LEN_0F38F3.
        * i386-dis-evex-mod.h: Handle MOD_EVEX_MAP4_65,
        MOD_EVEX_MAP4_66_PREFIX_0, MOD_EVEX_MAP4_8A_W_0,
        MOD_EVEX_MAP4_DA_PREFIX_1, MOD_EVEX_MAP4_DB_PREFIX_1,
        MOD_EVEX_MAP4_DC_PREFIX_1, MOD_EVEX_MAP4_DD_PREFIX_1,
        MOD_EVEX_MAP4_DE_PREFIX_1, MOD_EVEX_MAP4_DF_PREFIX_1,
        MOD_EVEX_MAP4_F8_PREFIX_1, MOD_EVEX_MAP4_F8_PREFIX_2,
        MOD_EVEX_MAP4_F8_PREFIX_3, MOD_EVEX_MAP4_F9,
        MOD_EVEX_MAP4_8B.
        * i386-dis-evex-w.h: Handle EVEX_W_MAP4_8A.
        * i386-dis-evex-prefix.h: Handle PREFIX_EVEX_MAP4_60,
        PREFIX_EVEX_MAP4_61, PREFIX_EVEX_MAP4_66,
        PREFIX_EVEX_MAP4_8B_M_0, PREFIX_EVEX_MAP4_D8,
        PREFIX_EVEX_MAP4_DA, PREFIX_EVEX_MAP4_DB,
        PREFIX_EVEX_MAP4_DC, PREFIX_EVEX_MAP4_DD,
        PREFIX_EVEX_MAP4_DE, PREFIX_EVEX_MAP4_DF,
        PREFIX_EVEX_MAP4_F0, PREFIX_EVEX_MAP4_F1,
        PREFIX_EVEX_MAP4_F2, PREFIX_EVEX_MAP4_F8,
        PREFIX_EVEX_MAP4_FC.
        * i386-dis-evex-reg.h: Handle REG_EVEX_MAP4_D8_PREFIX_1,
        REG_EVEX_0F38F3_L_0.
        * i386-dis-evex-x86.h: Handle X86_64_EVEX_0F90, X86_64_EVEX_0F91,
        X86_64_EVEX_0F92, X86_64_EVEX_0F93, X86_64_EVEX_0F38F2,
        X86_64_EVEX_0F38F3, X86_64_EVEX_0F38F5, X86_64_EVEX_0F38F6,
        X86_64_EVEX_0F38F7, X86_64_EVEX_0F3AF0, X86_64_EVEX_MAP7_F8.
        * i386-dis-evex.h: Add EVEX_MAP4_ for legacy insn
        promote to apx to use gpr32, and add vex use gpr32 promote to evex.
        Add EVEX_MAP7.
        * opcodes/i386-dis.c (REG enum): Add REG_EVEX_MAP4_D8_PREFIX_1.
        (MOD enum): Add MOD_EVEX_MAP4_65, MOD_EVEX_MAP4_66_PREFIX_0,
        MOD_EVEX_MAP4_8A_W_0, MOD_EVEX_MAP4_8B,
        MOD_EVEX_MAP4_DA_PREFIX_1, MOD_EVEX_MAP4_DB_PREFIX_1,
        MOD_EVEX_MAP4_DC_PREFIX_1, MOD_EVEX_MAP4_DD_PREFIX_1,
        MOD_EVEX_MAP4_DE_PREFIX_1, MOD_EVEX_MAP4_DF_PREFIX_1,
        MOD_EVEX_MAP4_F8_PREFIX_1, MOD_EVEX_MAP4_F8_PREFIX_2,
        MOD_EVEX_MAP4_F8_PREFIX_3, MOD_EVEX_MAP4_F9,
        REG_EVEX_0F38F3_L_0.
        (PREFIX enum): Add PREFIX_EVEX_MAP4_60, PREFIX_EVEX_MAP4_61,
        PREFIX_EVEX_MAP4_66, PREFIX_EVEX_MAP4_8B_M_0,
        PREFIX_EVEX_MAP4_D8, PREFIX_EVEX_MAP4_DA,
        PREFIX_EVEX_MAP4_DB, PREFIX_EVEX_MAP4_DC,
        PREFIX_EVEX_MAP4_DD, PREFIX_EVEX_MAP4_DE,
        PREFIX_EVEX_MAP4_DF, PREFIX_EVEX_MAP4_F0,
        PREFIX_EVEX_MAP4_F1, PREFIX_EVEX_MAP4_F2,
        PREFIX_EVEX_MAP4_F8, PREFIX_EVEX_MAP4_FC.
        (EVEX_LEN_enum): Add EVEX_LEN_0F38F2, EVEX_LEN_0F38F3.
        (EVEX_X86_enum): Add X86_64_EVEX_0F90, X86_64_EVEX_0F91,
        X86_64_EVEX_0F92, X86_64_EVEX_0F93, X86_64_EVEX_0F3849,
        X86_64_EVEX_0F384B, X86_64_EVEX_0F38E0, X86_64_EVEX_0F38E1,
        X86_64_EVEX_0F38E2, X86_64_EVEX_0F38E3, X86_64_EVEX_0F38E4,
        X86_64_EVEX_0F38E5, X86_64_EVEX_0F38E6, X86_64_EVEX_0F38E7,
        X86_64_EVEX_0F38E8, X86_64_EVEX_0F38E9, X86_64_EVEX_0F38EA,
        X86_64_EVEX_0F38EB, X86_64_EVEX_0F38EC, X86_64_EVEX_0F38ED,
        X86_64_EVEX_0F38EE, X86_64_EVEX_0F38EF, X86_64_EVEX_0F38F2,
        X86_64_EVEX_0F38F3, X86_64_EVEX_0F38F5, X86_64_EVEX_0F38F6,
        X86_64_EVEX_0F38F7, X86_64_EVEX_0F3AF0.
        (EVEX_MAP4): New define.
        (EVEX_MAP7): New.
        (evex_type): Diito.
        (get_valid_dis386): Decode insn erex in extend evex prefix.
        Handle EVEX_MAP4, Handle EVEX_MAP7.
        (print_register): Handle apx instructions decode.
        (OP_E_memory): Diito.
        (OP_G): Ditto.
---
 gas/config/tc-i386.c           |  93 +++-
 opcodes/i386-dis-evex-len.h    |  10 +
 opcodes/i386-dis-evex-mod.h    |  50 ++
 opcodes/i386-dis-evex-prefix.h |  83 ++++
 opcodes/i386-dis-evex-reg.h    |  14 +
 opcodes/i386-dis-evex-x86.h    | 140 ++++++
 opcodes/i386-dis-evex.h        | 838 ++++++++++++++++++++++++++++-----
 opcodes/i386-dis.c             | 131 +++++-
 opcodes/i386-gen.c             |  10 +
 opcodes/i386-opc.h             |   2 +
 opcodes/i386-opc.tbl           |  80 ++++
 11 files changed, 1299 insertions(+), 152 deletions(-)
 create mode 100644 opcodes/i386-dis-evex-x86.h
  

Comments

Jan Beulich Sept. 22, 2023, 10:12 a.m. UTC | #1
On 19.09.2023 17:25, Cui, Lili wrote:
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -1945,6 +1945,30 @@ cpu_flags_match (const insn_template *t)
>  		  && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
>  		match |= CPU_FLAGS_ARCH_MATCH;
>  	    }
> +	  else if (x.bitfield.cpuapx_f)
> +	    {
> +	      if (cpu.bitfield.cpuapx_f
> +		  && (!x.bitfield.cpumovbe || cpu.bitfield.cpumovbe)
> +		  && (!x.bitfield.cpuept || cpu.bitfield.cpuept)
> +		  && (!x.bitfield.cpuinvpcid || cpu.bitfield.cpuinvpcid)
> +		  && (!x.bitfield.cpusse4_2 || cpu.bitfield.cpusse4_2)
> +		  && (!x.bitfield.cpubmi2 || cpu.bitfield.cpubmi2)
> +		  && (!x.bitfield.cpubmi || cpu.bitfield.cpubmi)
> +		  && (!x.bitfield.cpuadx || cpu.bitfield.cpuadx)
> +		  && (!x.bitfield.cpusha || cpu.bitfield.cpusha)
> +		  && (!x.bitfield.cpuavx512bw || cpu.bitfield.cpuavx512bw)
> +		  && (!x.bitfield.cpuavx512dq || cpu.bitfield.cpuavx512dq)
> +		  && (!x.bitfield.cpuavx512f || cpu.bitfield.cpuavx512f)
> +		  && (!x.bitfield.cpushstk || cpu.bitfield.cpushstk)
> +		  && (!x.bitfield.cpumovdir64b || cpu.bitfield.cpumovdir64b)
> +		  && (!x.bitfield.cpumovdiri || cpu.bitfield.cpumovdiri)
> +		  && (!x.bitfield.cpuenqcmd || cpu.bitfield.cpuenqcmd)
> +		  && (!x.bitfield.cpukl || cpu.bitfield.cpukl)
> +		  && (!x.bitfield.cpuwidekl || cpu.bitfield.cpuwidekl)
> +		  && (!x.bitfield.cpucmpccxadd || cpu.bitfield.cpucmpccxadd)
> +		  && (!x.bitfield.cpurao_int || cpu.bitfield.cpurao_int))
> +		match |= CPU_FLAGS_ARCH_MATCH;
> +	    }
>  	  else
>  	    match |= CPU_FLAGS_ARCH_MATCH;
>

This is getting unwieldy, so I think we will need to think of a better way
of expressing both "multiple ISAs need to be enabled" and "one of a set of
ISAs needs to be enabled". It's only the mix of these expressed in a
uniform way in the insn table that requires these extra conditionals. With
the size of i386_cpu_attr greatly shrunk as of recently, I wonder if we
couldn't simply add a 2nd instance of it to insn_template. One would be
"all of these are required", while the other would be "any one of these is
sufficient".

> @@ -3850,7 +3874,10 @@ is_any_vex_encoding (const insn_template *t)
>  static INLINE bool
>  is_any_apx_encoding (void)
>  {
> -  return i.rex2 || i.rex2_encoding;
> +  return i.rex2 
> +    || i.rex2_encoding
> +    || (i.vex.register_specifier
> +	&& i.vex.register_specifier->reg_flags & RegRex2);

Nit: For readability as well as for consistency this wants indenting
differently:

  return i.rex2
	 || i.rex2_encoding
	 || (i.vex.register_specifier
	     && i.vex.register_specifier->reg_flags & RegRex2);

or possibly (slightly shorter)

  return i.rex2 || i.rex2_encoding
	 || (i.vex.register_specifier
	     && i.vex.register_specifier->reg_flags & RegRex2);

In any event you want to avoid trailing blanks on any line.

> @@ -3859,6 +3886,12 @@ is_any_apx_rex2_encoding (void)
>    return (i.rex2 && i.vex.length == 2) || i.rex2_encoding;
>  }
>  
> +static INLINE bool
> +is_any_apx_evex_encoding (void)
> +{
> +  return i.rex2 && i.vex.length == 4;
> +}

This doesn't feel right: {evex} use would demand this encoding even if
i.rex2 is still zero.

Also - what is "any" in the name (also of the earlier predicate) intending
to express? is_any_vex_encoding() is named the way it is because it covers
both VEX and EVEX.

> @@ -4129,6 +4162,50 @@ build_rex2_prefix (void)
>  		    | (i.rex2 << 4) | i.rex);
>  }
>  
> +/* Build the EVEX prefix (4-byte) for evex insn
> +   | 62h |
> +   | `R`X`B`R' | B'mmm |
> +   | W | v`v`v`v | `x' | pp |
> +   | z| L'L | b | `v | aaa |
> +*/
> +static void
> +build_evex_insns_with_extend_evex_prefix (void)

The name is somewhat odd and doesn't fit that of other similar functions.
In particular this function doesn't build an entire insn, but still just
the prefix. So perhaps build_apx_evex_prefix()?

> +{
> +  build_evex_prefix ();
> +  if (i.rex2 & REX_R)
> +    i.vex.bytes[1] &= 0xef;
> +  if (i.vex.register_specifier
> +      && register_number (i.vex.register_specifier) > 0xf)
> +    i.vex.bytes[3] &=0xf7;

Nit: Missing blank.

But: Is this needed? Doesn't build_evex_prefix() fill this bit already,
which isn't new in APX?

> +  if (i.rex2 & REX_B)
> +    i.vex.bytes[1] |= 0x08;
> +  if (i.rex2 & REX_X)
> +    i.vex.bytes[2] &= 0xfb;
> +}
> +
> +/* Build the EVEX prefix (4-byte) for legacy insn
> +   | 62h |
> +   | `R`X`B`R' | B'100 |
> +   | W | v`v`v`v | `x' | pp |
> +   | 000 | ND | `v | NF | 00 |
> +   For legacy insn without ndd nor nf, [vvvvv] must be all zero.  */
> +static void
> +build_legacy_insns_with_apx_encoding (void)

As per above, maybe build_extended_evex_prefix()? Or, ...

> +{
> +  /* map{0,1} of legacy space without ndd or nf could use rex2 prefix.  */
> +  if (i.tm.opcode_space <= SPACE_0F
> +      && !i.vex.register_specifier && !i.has_nf && !i.has_zero_upper)
> +    return build_rex2_prefix ();

... because of this, build_apx_prefix()? Yet I think the call to this
function might better remain in the caller.

> +  if (i.prefix[DATA_PREFIX] != 0)
> +    {
> +      i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
> +      i.prefix[DATA_PREFIX] = 0;
> +    }

While this looks to be correct for the case when the prefix was derived
from an insn template and the use of 16-bit operands, I don't think it
is uniformly correct when "data16" was used as a prefix explicitly. In
such a case either REX2 encoding needs to be used, or an error needs
emitting.

You may further want to assert that i.tm.opcode_modifier.opcodeprefix
is still zero ahead of the assignment.

> @@ -10057,7 +10136,7 @@ output_insn (void)
>  
>        /* Since the VEX/EVEX prefix contains the implicit prefix, we
>  	 don't need the explicit prefix.  */
> -      if (!is_any_vex_encoding (&i.tm))
> +      if (!is_any_vex_encoding (&i.tm) && !is_any_apx_evex_encoding ())
>  	{
>  	  switch (i.tm.opcode_modifier.opcodeprefix)

I'm not convinced the use of this predicate is appropriate here. I'd
generally have expected is_any_vex_encoding() to be extended to also
detect all cases of EVEX encodings in APX.

> --- a/opcodes/i386-dis-evex-len.h
> +++ b/opcodes/i386-dis-evex-len.h

As for the earlier patch, I'll look at the disassembler changes separately.

> @@ -1121,6 +1122,15 @@ process_i386_opcode_modifier (FILE *table, char *mod, unsigned int space,
>  	fprintf (stderr,
>  		 "%s: %d: W modifier without Word/Dword/Qword operand(s)\n",
>  		 filename, lineno);
> +      if (modifiers[Vex].value
> +	  || (space > SPACE_0F
> +	      && !(space == SPACE_EVEXMAP4
> +		   || modifiers[EVex].value
> +		   || modifiers[Disp8MemShift].value
> +		   || modifiers[Broadcast].value
> +		   || modifiers[Masking].value
> +		   || modifiers[SAE].value)))

First of all, this wants simplifying to

      if (modifiers[Vex].value
	  || (space > SPACE_0F
	      && space != SPACE_EVEXMAP4
	      && !modifiers[EVex].value
	      && !modifiers[Disp8MemShift].value
	      && !modifiers[Broadcast].value
	      && !modifiers[Masking].value
	      && !modifiers[SAE].value))

which helps readability and makes more obvious that this parallels
tc-i386.c:is_evex_encoding(). Such a connection, where updates need
to be made in sync, needs pointing out in code comments at both sites.

Yet of course this condition won't hold anymore for combined VEX/EVEX
templates.

> +	modifiers[No_egpr].value = 1;
>      }

And then - shouldn't at least part of this already be put in place in
patch 1?

Finally, to avoid the split between where this attribute gets set,
wouldn't it be possible to also handle the XSAVE/XRSTOR variants here
rather than directly in the opcode table?

> @@ -187,6 +188,7 @@ mov, 0xf24, i386|No64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Te
>  
>  // Move after swapping the bytes
>  movbe, 0x0f38f0, Movbe, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> +movbe, 0x60, Movbe|APX_F|x64, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVex128|EVexMap4, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }

In new code please omit redundant Word, Dword, and alike.

I further wonder if it wouldn't help if i386-gen inserted the x64 for
all APX templates, rather than open-coding that on every single template.
Or alternatively put

#define APX_F APX_F|x64

earlier in the file.

> @@ -300,6 +302,9 @@ sbb, 0x18, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg
>  sbb, 0x83/3, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  sbb, 0x1c, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
>  sbb, 0x80/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x18, APX_F|x64, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x83/3, APX_F|x64, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x80/3, APX_F|x64, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
>  cmp, 0x38, 0, D|W|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  cmp, 0x83/7, 0, Modrm|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> @@ -332,9 +337,14 @@ adc, 0x10, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg
>  adc, 0x83/2, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  adc, 0x14, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
>  adc, 0x80/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x10, APX_F|x64, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x83/2, APX_F|x64, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x80/2, APX_F|x64, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
>  neg, 0xf6/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +
>  not, 0xf6/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +not, 0xf6/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }

Looking at just the additions up to here, I'm getting the impression that
in this patch - despite its title - you only add non-ND, non-NF insn forms
for previously non-VEX-encoded insns. This could do with clarifying, by
both making the title more concise and by stating the exact scope of the
work done in the description.

> @@ -1312,13 +1330,16 @@ getsec, 0xf37, SMX, NoSuf, {}
>  
>  invept, 0x660f3880, EPT|No64, Modrm|IgnoreSize|NoSuf, { Oword|Unspecified|BaseIndex, Reg32 }
>  invept, 0x660f3880, EPT|x64, Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }
> +invept, 0xf3f0, APX_F|EPT|x64, Modrm|NoSuf|NoRex64|EVex128|EVexMap4, { Oword|Unspecified|BaseIndex, Reg64 }
>  invvpid, 0x660f3881, EPT|No64, Modrm|IgnoreSize|NoSuf, { Oword|Unspecified|BaseIndex, Reg32 }
>  invvpid, 0x660f3881, EPT|x64, Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }
> +invvpid, 0xf3f1, APX_F|EPT|x64, Modrm|NoSuf|NoRex64|EVex128|EVexMap4, { Oword|Unspecified|BaseIndex, Reg64 }
>  
>  // INVPCID instruction
>  
>  invpcid, 0x660f3882, INVPCID|No64, Modrm|IgnoreSize|NoSuf, { Oword|Unspecified|BaseIndex, Reg32 }
>  invpcid, 0x660f3882, INVPCID|x64, Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }
> +invpcid, 0xf3f2, APX_F|INVPCID|x64, Modrm|NoSuf|NoRex64|EVex128|EVexMap4, { Oword|Unspecified|BaseIndex, Reg64 }

I don't think NoRex64 belongs in any EVEX template.

> @@ -1418,7 +1439,9 @@ pcmpestrm, 0x660f3a60, SSE4_2|x64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { I
>  pcmpistri<sse42>, 0x660f3a63, <sse42:cpu>, Modrm|<sse42:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
>  pcmpistrm<sse42>, 0x660f3a62, <sse42:cpu>, Modrm|<sse42:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
>  crc32, 0xf20f38f0, SSE4_2, W|Modrm|No_sSuf|No_qSuf, { Reg8|Reg16|Reg32|Unspecified|BaseIndex, Reg32 }
> +crc32, 0xf0, APX_F|x64, W|Modrm|No_sSuf|No_qSuf|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Unspecified|BaseIndex, Reg32 }
>  crc32, 0xf20f38f0, SSE4_2|x64, W|Modrm|No_wSuf|No_lSuf|No_sSuf, { Reg8|Reg64|Unspecified|BaseIndex, Reg64 }
> +crc32, 0xf0, APX_F|x64, W|Modrm|No_wSuf|No_lSuf|No_sSuf|EVex128|EVexMap4, { Reg8|Reg64|Unspecified|BaseIndex, Reg64 }

There's quite a bit of logic in tc-i386.c to get CRC32 right. I wonder
if you can really get away without adjusting that logic to also take
effect on the EVEX encodings.

> @@ -3408,3 +3487,4 @@ erets, 0xf20f01ca, FRED|x64, NoSuf, {}
>  eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
>  
>  // FRED instructions end.
> +

Nit: Stray change.

Jan
  
Jan Beulich Sept. 22, 2023, 10:50 a.m. UTC | #2
On 19.09.2023 17:25, Cui, Lili wrote:
> --- a/opcodes/i386-dis-evex-len.h
> +++ b/opcodes/i386-dis-evex-len.h
> @@ -62,6 +62,16 @@ static const struct dis386 evex_len_table[][3] = {
>      { REG_TABLE (REG_EVEX_0F38C7_L_2) },
>    },
>  
> +  /* EVEX_LEN_0F38F2 */
> +  {
> +    { "andnS",		{ Gdq, VexGdq, Edq }, 0 },
> +  },
> +
> +  /* EVEX_LEN_0F38F3 */
> +  {
> +    { REG_TABLE(REG_EVEX_0F38F3_L_0) },
> +  },
> +
>    /* EVEX_LEN_0F3A00 */
>    {
>      { Bad_Opcode },
> diff --git a/opcodes/i386-dis-evex-mod.h b/opcodes/i386-dis-evex-mod.h
> index f9f912c5094..5a1326a1b73 100644
> --- a/opcodes/i386-dis-evex-mod.h
> +++ b/opcodes/i386-dis-evex-mod.h
> @@ -1 +1,51 @@
>  /* Nothing at present.  */
> +  /* MOD_EVEX_MAP4_65 */
> +  {
> +    { "wrussK",		{ M, Gdq }, PREFIX_DATA },
> +  },
> +  /* MOD_EVEX_MAP4_66_PREFIX_0 */
> +  {
> +    { "wrssK",		{ M, Gdq }, 0 },
> +  },

Not very long ago I invested quite a bit of time to remove unnecessary
decoding through mod_table[]. Please don't introduce new instances.
Entries should be added here only when both branches are populated
(iow it looks as if this patch shouldn't touch this file at all).

> --- a/opcodes/i386-dis-evex-prefix.h
> +++ b/opcodes/i386-dis-evex-prefix.h
> @@ -338,6 +338,89 @@
>      { "vcmpp%XH", { MaskG, Vex, EXxh, EXxEVexS, CMP }, 0 },
>      { "vcmps%XH", { MaskG, VexScalar, EXw, EXxEVexS, CMP }, 0 },
>    },
> +  /* PREFIX_EVEX_MAP4_60 */
> +  {
> +    { "movbeS",	{ Gv, Ev }, 0 },
> +    { Bad_Opcode },
> +    { "movbeS",	{ Gv, Ev }, 0 },
> +  },
> +  /* PREFIX_EVEX_MAP4_61 */
> +  {
> +    { "movbeS",	{ Ev, Gv }, 0 },
> +    { Bad_Opcode },
> +    { "movbeS",	{ Ev, Gv }, 0 },
> +  },

In cases like this (of which, aiui, there will be many more), where only
prefix 66 is valid, and only to modify operand size, it would be quite
desirable to have a new PREFIX_... identifier to use in the parent table
entry, such that this additional decode step can be avoided.

> --- /dev/null
> +++ b/opcodes/i386-dis-evex-x86.h

I'm puzzled by the name suffix: x86 is kind of redundant with i386. Main
question perhaps is: Do we really need a new file here? It's not a lot
that is put here right now, but of course I haven't peeked ahead.

> --- a/opcodes/i386-dis-evex.h
> +++ b/opcodes/i386-dis-evex.h
> @@ -164,10 +164,10 @@ static const struct dis386 evex_table[][256] = {
>      { Bad_Opcode },
>      { Bad_Opcode },
>      /* 90 */
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F90) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F91) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F92) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F93) },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
> @@ -375,9 +375,9 @@ static const struct dis386 evex_table[][256] = {
>      { "vpsllv%DQ",	{ XM, Vex, EXx }, PREFIX_DATA },
>      /* 48 */
>      { Bad_Opcode },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F3849) },
>      { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F384B) },
>      { "vrcp14p%XW",	{ XM, EXx }, PREFIX_DATA },
>      { "vrcp14s%XW",	{ XMScalar, VexScalar, EXdq }, PREFIX_DATA },
>      { "vrsqrt14p%XW",	{ XM, EXx }, 0 },
> @@ -545,32 +545,32 @@ static const struct dis386 evex_table[][256] = {
>      { "%XEvaesdecY",	{ XM, Vex, EXx }, PREFIX_DATA },
>      { "%XEvaesdeclastY", { XM, Vex, EXx }, PREFIX_DATA },
>      /* E0 */
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E0) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E1) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E2) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E3) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E4) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E5) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E6) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E7) },
>      /* E8 */
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E8) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E9) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EA) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EB) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EC) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38ED) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EE) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EF) },
>      /* F0 */
>      { Bad_Opcode },
>      { Bad_Opcode },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F2) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F3) },
>      { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> -    { Bad_Opcode },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F5) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F6) },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F7) },
>      /* F8 */
>      { Bad_Opcode },
>      { Bad_Opcode },
> @@ -854,7 +854,7 @@ static const struct dis386 evex_table[][256] = {
>      { Bad_Opcode },
>      { Bad_Opcode },
>      /* F0 */
> -    { Bad_Opcode },
> +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F3AF0) },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
> @@ -872,7 +872,7 @@ static const struct dis386 evex_table[][256] = {
>      { Bad_Opcode },
>      { Bad_Opcode },
>    },
> -  /* EVEX_MAP5_ */
> +  /* EVEX_MAP4_ */

While just an artifact from this, ...

> @@ -893,8 +893,8 @@ static const struct dis386 evex_table[][256] = {
>      { Bad_Opcode },
>      { Bad_Opcode },
>      /* 10 */
> -    { PREFIX_TABLE (PREFIX_EVEX_MAP5_10) },
> -    { PREFIX_TABLE (PREFIX_EVEX_MAP5_11) },
> +    { Bad_Opcode },
> +    { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
> @@ -907,7 +907,7 @@ static const struct dis386 evex_table[][256] = {
>      { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
> -    { PREFIX_TABLE (PREFIX_EVEX_MAP5_1D) },
> +    { Bad_Opcode },
>      { Bad_Opcode },
>      { Bad_Opcode },
>      /* 20 */

... changes like these are extremely odd to read. Can you please try
to split this patch such that initially you simply introduce an empty
new sub-table, to avoid such anomalies (which will also affect "git
blame" then, I expect)?

> --- a/opcodes/i386-dis.c
> +++ b/opcodes/i386-dis.c
> @@ -132,6 +132,13 @@ enum x86_64_isa
>    intel64
>  };
>  
> +enum evex_type
> +{
> +  evex_default = 0,
> +  evex_from_legacy,
> +  evex_from_vex,
> +};
> +
>  struct instr_info
>  {
>    enum address_mode address_mode;
> @@ -212,7 +219,6 @@ struct instr_info
>      int ll;
>      bool w;
>      bool evex;
> -    bool r;

The change to eliminate this field would certainly be nice to be separate
from the bulk of thw APX changes here.

Jan
  
Jan Beulich Sept. 25, 2023, 6:03 a.m. UTC | #3
On 19.09.2023 17:25, Cui, Lili wrote:
> @@ -300,6 +302,9 @@ sbb, 0x18, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg
>  sbb, 0x83/3, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  sbb, 0x1c, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
>  sbb, 0x80/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x18, APX_F|x64, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x83/3, APX_F|x64, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> +sbb, 0x80/3, APX_F|x64, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
>  cmp, 0x38, 0, D|W|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  cmp, 0x83/7, 0, Modrm|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> @@ -332,9 +337,14 @@ adc, 0x10, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg
>  adc, 0x83/2, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
>  adc, 0x14, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
>  adc, 0x80/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x10, APX_F|x64, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x83/2, APX_F|x64, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
> +adc, 0x80/2, APX_F|x64, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
>  
>  neg, 0xf6/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +
>  not, 0xf6/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
> +not, 0xf6/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }

I can't seem to be able to find the disassembly part for these. The
reason I've noticed is because I was meaning to figure how you deal
with EVEX.nf needing to be zero for those (and others). For that
bit specifically I expect we want a new %NF macro, which would be
used as !%NF when (bad) (or some such) wants printing instead of {nf}.

Similary I don't think I can spot anywhere that you would check the
other bits which need to be zero in extended EVEX. Nor Improper use
of EVEX.x4 in certain pre-existing encodings (S/G insns at least;
perhaps all others are okay).

Jan
  
Cui, Lili Oct. 17, 2023, 3:48 p.m. UTC | #4
> On 19.09.2023 17:25, Cui, Lili wrote:

> > --- a/gas/config/tc-i386.c

> > +++ b/gas/config/tc-i386.c

> > @@ -1945,6 +1945,30 @@ cpu_flags_match (const insn_template *t)

> >                         && (!x.bitfield.cpuvpclmulqdq ||

> cpu.bitfield.cpuvpclmulqdq))

> >                       match |= CPU_FLAGS_ARCH_MATCH;

> >             }

> > +        else if (x.bitfield.cpuapx_f)

> > +          {

> > +            if (cpu.bitfield.cpuapx_f

> > +                      && (!x.bitfield.cpumovbe || cpu.bitfield.cpumovbe)

> > +                      && (!x.bitfield.cpuept || cpu.bitfield.cpuept)

> > +                      && (!x.bitfield.cpuinvpcid || cpu.bitfield.cpuinvpcid)

> > +                      && (!x.bitfield.cpusse4_2 || cpu.bitfield.cpusse4_2)

> > +                      && (!x.bitfield.cpubmi2 || cpu.bitfield.cpubmi2)

> > +                      && (!x.bitfield.cpubmi || cpu.bitfield.cpubmi)

> > +                      && (!x.bitfield.cpuadx || cpu.bitfield.cpuadx)

> > +                      && (!x.bitfield.cpusha || cpu.bitfield.cpusha)

> > +                      && (!x.bitfield.cpuavx512bw || cpu.bitfield.cpuavx512bw)

> > +                      && (!x.bitfield.cpuavx512dq || cpu.bitfield.cpuavx512dq)

> > +                      && (!x.bitfield.cpuavx512f || cpu.bitfield.cpuavx512f)

> > +                      && (!x.bitfield.cpushstk || cpu.bitfield.cpushstk)

> > +                      && (!x.bitfield.cpumovdir64b || cpu.bitfield.cpumovdir64b)

> > +                      && (!x.bitfield.cpumovdiri || cpu.bitfield.cpumovdiri)

> > +                      && (!x.bitfield.cpuenqcmd || cpu.bitfield.cpuenqcmd)

> > +                      && (!x.bitfield.cpukl || cpu.bitfield.cpukl)

> > +                      && (!x.bitfield.cpuwidekl || cpu.bitfield.cpuwidekl)

> > +                      && (!x.bitfield.cpucmpccxadd || cpu.bitfield.cpucmpccxadd)

> > +                      && (!x.bitfield.cpurao_int || cpu.bitfield.cpurao_int))

> > +                    match |= CPU_FLAGS_ARCH_MATCH;

> > +          }

> >           else

> >             match |= CPU_FLAGS_ARCH_MATCH;

> >

>

> This is getting unwieldy, so I think we will need to think of a better way of

> expressing both "multiple ISAs need to be enabled" and "one of a set of ISAs

> needs to be enabled". It's only the mix of these expressed in a uniform way in

> the insn table that requires these extra conditionals. With the size of

> i386_cpu_attr greatly shrunk as of recently, I wonder if we couldn't simply add

> a 2nd instance of it to insn_template. One would be "all of these are required",

> while the other would be "any one of these is sufficient".

>



I didn't find a better way to distinguish these two types of requirements in insn_template.

I wrote a new function "cpu_flags_not_or_check" to replace these clumsy judgments.



static INLINE int

cpu_flags_not_or_check (const union i386_cpu_flags *x,

                        const union i386_cpu_flags *y)

{

  switch (ARRAY_SIZE(x->array))

    {

    case 5:

      if ((~x->array[4] | y->array[4]) != 0xffffffff)

        return 0;

      /* Fall through.  */

    case 4:

      if ((~x->array[3] | y->array[3]) != 0xffffffff)

        return 0;

      /* Fall through.  */

    case 3:

      if ((~x->array[2] | y->array[2]) != 0xffffffff)

        return 0;

      /* Fall through.  */

    case 2:

      if ((~x->array[1] | y->array[1]) != 0xffffffff)

        return 0;

      /* Fall through.  */

    case 1:

     return ((~x->array[1] | y->array[1]) == 0Xffffffff);

      break;

    default:

      abort ();

    }

}



> > @@ -3850,7 +3874,10 @@ is_any_vex_encoding (const insn_template *t)

> > static INLINE bool  is_any_apx_encoding (void)  {

> > -  return i.rex2 || i.rex2_encoding;

> > +  return i.rex2

> > +    || i.rex2_encoding

> > +    || (i.vex.register_specifier

> > +      && i.vex.register_specifier->reg_flags & RegRex2);

>

> Nit: For readability as well as for consistency this wants indenting

> differently:

>

>   return i.rex2

>             || i.rex2_encoding

>             || (i.vex.register_specifier

>                 && i.vex.register_specifier->reg_flags & RegRex2);

>

> or possibly (slightly shorter)

>

>   return i.rex2 || i.rex2_encoding

>             || (i.vex.register_specifier

>                 && i.vex.register_specifier->reg_flags & RegRex2);

>

> In any event you want to avoid trailing blanks on any line.

>



Done.



> > @@ -3859,6 +3886,12 @@ is_any_apx_rex2_encoding (void)

> >    return (i.rex2 && i.vex.length == 2) || i.rex2_encoding;  }

> >

> > +static INLINE bool

> > +is_any_apx_evex_encoding (void)

> > +{

> > +  return i.rex2 && i.vex.length == 4; }

>

> This doesn't feel right: {evex} use would demand this encoding even if

> i.rex2 is still zero.

>

> Also - what is "any" in the name (also of the earlier predicate) intending to

> express? is_any_vex_encoding() is named the way it is because it covers both

> VEX and EVEX.

>



Yes, you are right, I found  this feature is redundant. It is only used in output_insn, see below, evex encoding is already included in is_any_vex_encoding(&i.tm) (since we added EVEX128 for all EVEX-promoted instruction in insn template), I removed this function.



if (!is_any_vex_encoding (&i.tm) && !is_any_apx_evex_encoding ())



> > @@ -4129,6 +4162,50 @@ build_rex2_prefix (void)

> >                           | (i.rex2 << 4) | i.rex);

> >  }

> >

> > +/* Build the EVEX prefix (4-byte) for evex insn

> > +   | 62h |

> > +   | `R`X`B`R' | B'mmm |

> > +   | W | v`v`v`v | `x' | pp |

> > +   | z| L'L | b | `v | aaa |

> > +*/

> > +static void

> > +build_evex_insns_with_extend_evex_prefix (void)

>

> The name is somewhat odd and doesn't fit that of other similar functions.

> In particular this function doesn't build an entire insn, but still just the prefix.

> So perhaps build_apx_evex_prefix()?

>



It is better, replaced.



> > +{

> > +  build_evex_prefix ();

> > +  if (i.rex2 & REX_R)

> > +    &= 0xef;

> > +  if (i.vex.register_specifier

> > +      && register_number (i.vex.register_specifier) > 0xf)

> > +    i.vex.bytes[3] &=0xf7;

>

> Nit: Missing blank.

>

> But: Is this needed? Doesn't build_evex_prefix() fill this bit already, which isn't

> new in APX?

>



V4 is used for vector register In build_evex_prefix(), we need to update V4 with GPR32.



      /* The upper 16 registers are encoded in the fourth byte of the

         EVEX prefix.  */

      if (!(i.vex.register_specifier->reg_flags & RegVRex))

        i.vex.bytes[3] = 0x8;



> > +  if (i.rex2 & REX_B)

> > +    i.vex.bytes[1] |= 0x08;

> > +  if (i.rex2 & REX_X)

> > +    i.vex.bytes[2] &= 0xfb;

> > +}

> > +

> > +/* Build the EVEX prefix (4-byte) for legacy insn

> > +   | 62h |

> > +   | `R`X`B`R' | B'100 |

> > +   | W | v`v`v`v | `x' | pp |

> > +   | 000 | ND | `v | NF | 00 |

> > +   For legacy insn without ndd nor nf, [vvvvv] must be all zero.  */

> > +static void build_legacy_insns_with_apx_encoding (void)

>

> As per above, maybe build_extended_evex_prefix()? Or, ...

>

> > +{

> > +  /* map{0,1} of legacy space without ndd or nf could use rex2

> > +prefix.  */

> > +  if (i.tm.opcode_space <= SPACE_0F

> > +      && !i.vex.register_specifier && !i.has_nf && !i.has_zero_upper)

> > +    return build_rex2_prefix ();

>

> ... because of this, build_apx_prefix()? Yet I think the call to this function might

> better remain in the caller.

>



I deleted this function, since we handle rex2 following rex, and handle others following VEX and EVEX.



> > +  if (i.prefix[DATA_PREFIX] != 0)

> > +    {

> > +      i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;

> > +      i.prefix[DATA_PREFIX] = 0;

> > +    }

>

> While this looks to be correct for the case when the prefix was derived from an

> insn template and the use of 16-bit operands, I don't think it is uniformly

> correct when "data16" was used as a prefix explicitly. In such a case either

> REX2 encoding needs to be used, or an error needs emitting.

>

> You may further want to assert that i.tm.opcode_modifier.opcodeprefix is still

> zero ahead of the assignment.

>



For REX2 encoding, we add no special handling, just follow REX.

For EVEX-promoted encoding, such as “data16 aand   %r25d,0x123(%r31,%rax,4)”, the following existing code will report an error.



  if (is_any_vex_encoding (&i.tm)

      || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX

      || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)

    {

      /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */

      if (i.prefix[DATA_PREFIX])

        {

          as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));

          return;

        }



> > @@ -10057,7 +10136,7 @@ output_insn (void)

> >

> >        /* Since the VEX/EVEX prefix contains the implicit prefix, we

> >          don't need the explicit prefix.  */

> > -      if (!is_any_vex_encoding (&i.tm))

> > +      if (!is_any_vex_encoding (&i.tm) && !is_any_apx_evex_encoding

> > + ())

> >         {

> >           switch (i.tm.opcode_modifier.opcodeprefix)

>

> I'm not convinced the use of this predicate is appropriate here. I'd generally

> have expected is_any_vex_encoding() to be extended to also detect all cases

> of EVEX encodings in APX.



Removed this function, as mentioned before, evex encoding is already included in is_any_vex_encoding(&i.tm) (since we added EVEX128 for all EVEX-promoted instruction in insn template).



>

> > --- a/opcodes/i386-dis-evex-len.h

> > +++ b/opcodes/i386-dis-evex-len.h

>

> As for the earlier patch, I'll look at the disassembler changes separately.

>

> > @@ -1121,6 +1122,15 @@ process_i386_opcode_modifier (FILE *table,

> char *mod, unsigned int space,

> >         fprintf (stderr,

> >                        "%s: %d: W modifier without Word/Dword/Qword

> operand(s)\n",

> >                        filename, lineno);

> > +      if (modifiers[Vex].value

> > +        || (space > SPACE_0F

> > +            && !(space == SPACE_EVEXMAP4

> > +                       || modifiers[EVex].value

> > +                       || modifiers[Disp8MemShift].value

> > +                       || modifiers[Broadcast].value

> > +                       || modifiers[Masking].value

> > +                       || modifiers[SAE].value)))

>

> First of all, this wants simplifying to

>

>       if (modifiers[Vex].value

>              || (space > SPACE_0F

>                  && space != SPACE_EVEXMAP4

>                  && !modifiers[EVex].value

>                  && !modifiers[Disp8MemShift].value

>                  && !modifiers[Broadcast].value

>                  && !modifiers[Masking].value

>                  && !modifiers[SAE].value))

>

> which helps readability and makes more obvious that this parallels tc-

> i386.c:is_evex_encoding(). Such a connection, where updates need to be

> made in sync, needs pointing out in code comments at both sites.

>



Done.



> Yet of course this condition won't hold anymore for combined VEX/EVEX

> templates.

>



I rebased master and as you predicted this doesn't work, one entry contains both VEX and EVEX, VEX requires No_egpr=1 and EVEX requires No_egpr=0,

Finally I chose to add "No_egpr=1" for it. And added the following judgment in check_EgprOperands.



check_EgprOperands (const insn_template *t)

{

-  if (t->opcode_modifier.noegpr)

if (t->opcode_modifier.noegpr && !need_evex_encoding())



> > +      modifiers[No_egpr].value = 1;

> >      }

>

> And then - shouldn't at least part of this already be put in place in patch 1?

>



Done in patch 1.



> Finally, to avoid the split between where this attribute gets set, wouldn't it be

> possible to also handle the XSAVE/XRSTOR variants here rather than directly in

> the opcode table?

>



Done in patch 1.



> > @@ -187,6 +188,7 @@ mov, 0xf24, i386|No64,

> > D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Te

> >

> >  // Move after swapping the bytes

> >  movbe, 0x0f38f0, Movbe,

> D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, {

> > Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }

> > +movbe, 0x60, Movbe|APX_F|x64,

> > +D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVex128|EVexMap4, {

> > +Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex,

> > +Reg16|Reg32|Reg64 }

>

> In new code please omit redundant Word, Dword, and alike.

>



Done.



> I further wonder if it wouldn't help if i386-gen inserted the x64 for all APX

> templates, rather than open-coding that on every single template.

> Or alternatively put

>

> #define APX_F APX_F|x64

>

> earlier in the file.

>



Done.



> > @@ -300,6 +302,9 @@ sbb, 0x18, 0,

> >

> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInd

> ex }

> > +not, 0xf6/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, {

> >

> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIn

> dex }

>

> Looking at just the additions up to here, I'm getting the impression that in this

> patch - despite its title - you only add non-ND, non-NF insn forms for

> previously non-VEX-encoded insns. This could do with clarifying, by both

> making the title more concise and by stating the exact scope of the work done

> in the description.

>



Done.



> > @@ -1312,13 +1330,16 @@ getsec, 0xf37, SMX, NoSuf, {}

> >

> >  invept, 0x660f3880, EPT|No64, Modrm|IgnoreSize|NoSuf, {

> > Oword|Unspecified|BaseIndex, Reg32 }  invept, 0x660f3880, EPT|x64,

> > Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }

> > +invept, 0xf3f0, APX_F|EPT|x64,

> Modrm|NoSuf|NoRex64|EVex128|EVexMap4,

> > +{ Oword|Unspecified|BaseIndex, Reg64 }

> >  invvpid, 0x660f3881, EPT|No64, Modrm|IgnoreSize|NoSuf, {

> > Oword|Unspecified|BaseIndex, Reg32 }  invvpid, 0x660f3881, EPT|x64,

> > Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }

> > +invvpid, 0xf3f1, APX_F|EPT|x64,

> Modrm|NoSuf|NoRex64|EVex128|EVexMap4,

> > +{ Oword|Unspecified|BaseIndex, Reg64 }

> >

> >  // INVPCID instruction

> >

> >  invpcid, 0x660f3882, INVPCID|No64, Modrm|IgnoreSize|NoSuf, {

> > Oword|Unspecified|BaseIndex, Reg32 }  invpcid, 0x660f3882,

> > INVPCID|x64, Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex,

> Reg64

> > }

> > +invpcid, 0xf3f2, APX_F|INVPCID|x64,

> > +Modrm|NoSuf|NoRex64|EVex128|EVexMap4,

> { Oword|Unspecified|BaseIndex,

> > +Reg64 }

>

> I don't think NoRex64 belongs in any EVEX template.

>



Removed it from APX_F EVEX template.



> >  crc32, 0xf20f38f0, SSE4_2|x64, W|Modrm|No_wSuf|No_lSuf|No_sSuf, {

> > Reg8|Reg64|Unspecified|BaseIndex, Reg64 }

> > +crc32, 0xf0, APX_F|x64,

> > +W|Modrm|No_wSuf|No_lSuf|No_sSuf|EVex128|EVexMap4, {

> > +Reg8|Reg64|Unspecified|BaseIndex, Reg64 }

>

> There's quite a bit of logic in tc-i386.c to get CRC32 right. I wonder if you can

> really get away without adjusting that logic to also take effect on the EVEX

> encodings.

>



Thanks for reminding, checked crc32 logic in tc-i386.c, it mainly focuses on base_opcode and i.rex W bit, for base_opcode part we can , for i.rex.w bit, we need apply it to evex.w bit,

For crc32 case we use else and APX_F only supports 64 bit mode, it can make sure we use i.rex & REX_W for evex.w.  and added some new test case to make sure encode is right.



Thanks for the reminder. I checked the crc32 logic in tc-i386.c, mainly focusing on the base_opcode and i.rex W bits. For the base_opcode part, we can inherit it. For the i.rex.w bit, we need to apply it to the evex.w bit. ,

For the following code in build_evex_prefix(), crc32 takes the else branch and APX_F only supports 64 bit mode, it can ensures we use i.rex.w for evex.w.  And added some new test cases to ensure correct encoding.



  /* Check the REX.W bit and VEXW.  */

  if (i.tm.opcode_modifier.vexw == VEXWIG)

    w = (evexwig == evexw1 || (i.rex & REX_W)) ? 1 : 0;

  else if (i.tm.opcode_modifier.vexw)

    w = i.tm.opcode_modifier.vexw == VEXW1 ? 1 : 0;

  else

    w = (flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1) ? 1 : 0;



+      crc32q  %r31, %r22

+      crc32q  (%r31), %r22

+      crc32b  %r19b, %r17

+      crc32b  %r19b, %r21d

+      crc32b (%r19),%ebx

+      crc32l  %r31d, %r23d

+      crc32l  (%r31), %r23d

+      crc32w  %r31w, %r21d

+      crc32w (%r31),%r21d

+      crc32   %rax, %r18

> > @@ -3408,3 +3487,4 @@ erets, 0xf20f01ca, FRED|x64, NoSuf, {}  eretu,

> > 0xf30f01ca, FRED|x64, NoSuf, {}

> >

> >  // FRED instructions end.

> > +

>

> Nit: Stray change.



Done.



Thanks,

Lili.
  
Cui, Lili Oct. 17, 2023, 3:50 p.m. UTC | #5
> > --- a/opcodes/i386-dis-evex-mod.h
> > +++ b/opcodes/i386-dis-evex-mod.h
> > @@ -1 +1,51 @@
> >  /* Nothing at present.  */
> > +  /* MOD_EVEX_MAP4_65 */
> > +  {
> > +    { "wrussK",		{ M, Gdq }, PREFIX_DATA },
> > +  },
> > +  /* MOD_EVEX_MAP4_66_PREFIX_0 */
> > +  {
> > +    { "wrssK",		{ M, Gdq }, 0 },
> > +  },
> 
> Not very long ago I invested quite a bit of time to remove unnecessary
> decoding through mod_table[]. Please don't introduce new instances.
> Entries should be added here only when both branches are populated (iow it
> looks as if this patch shouldn't touch this file at all).
> 

Done.

> > +  /* PREFIX_EVEX_MAP4_60 */
> > +  {
> > +    { "movbeS",	{ Gv, Ev }, 0 },
> > +    { Bad_Opcode },
> > +    { "movbeS",	{ Gv, Ev }, 0 },
> > +  },
> > +  /* PREFIX_EVEX_MAP4_61 */
> > +  {
> > +    { "movbeS",	{ Ev, Gv }, 0 },
> > +    { Bad_Opcode },
> > +    { "movbeS",	{ Ev, Gv }, 0 },
> > +  },
> 
> In cases like this (of which, aiui, there will be many more), where only prefix
> 66 is valid, and only to modify operand size, it would be quite desirable to
> have a new PREFIX_... identifier to use in the parent table entry, such that this
> additional decode step can be avoided.
> 

Added PREFIX_DATA_AND_NP_ONLY to report bad for f2 and f3 prefix. Do you have a better name for the new PREFIX ?
Also added two bad.s test cases for it.

        #movbe %r18w,%ax set EVEX.pp = f3 (illegal value).
        .byte 0x62, 0xfc, 0x7e, 0x08, 0x60, 0xc2
        #movbe %r18w,%ax set EVEX.pp = f2 (illegal value).
        .byte 0x62, 0xfc, 0x7f, 0x08, 0x60, 0xc2

> > --- /dev/null
> > +++ b/opcodes/i386-dis-evex-x86.h
> 
> I'm puzzled by the name suffix: x86 is kind of redundant with i386. Main
> question perhaps is: Do we really need a new file here? It's not a lot that is
> put here right now, but of course I haven't peeked ahead.

I think it should be i386-dis-evex-x86-64.h. This table is dedicated to the VEX promotion instruction. It is placed at the end of x86_64_table and marked with ins->evex_type = evex_from_vex.
It can share partial tables with VEX after x86-64 table.

   case USE_X86_64_EVEX_FROM_VEX_TABLE:
      ins->evex_type = evex_from_vex;
      /* Fall through.  */
    case USE_X86_64_TABLE:
      vindex = ins->address_mode == mode_64bit ? 1 : 0;
      dp = &x86_64_table[dp->op[1].bytemode][vindex];
      break;

> > --- a/opcodes/i386-dis-evex.h
> > +++ b/opcodes/i386-dis-evex.h
> > @@ -164,10 +164,10 @@ static const struct dis386 evex_table[][256] = {
> >      /* F8 */
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> > @@ -854,7 +854,7 @@ static const struct dis386 evex_table[][256] = {
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> >      /* F0 */
> > -    { Bad_Opcode },
> > +    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F3AF0) },
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> > @@ -872,7 +872,7 @@ static const struct dis386 evex_table[][256] = {
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> >    },
> > -  /* EVEX_MAP5_ */
> > +  /* EVEX_MAP4_ */
> 
> While just an artifact from this, ...
> 
> > @@ -893,8 +893,8 @@ static const struct dis386 evex_table[][256] = {
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> >      /* 10 */
> > -    { PREFIX_TABLE (PREFIX_EVEX_MAP5_10) },
> > -    { PREFIX_TABLE (PREFIX_EVEX_MAP5_11) },
> > +    { Bad_Opcode },
> > +    { Bad_Opcode },
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> > @@ -907,7 +907,7 @@ static const struct dis386 evex_table[][256] = {
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> > -    { PREFIX_TABLE (PREFIX_EVEX_MAP5_1D) },
> > +    { Bad_Opcode },
> >      { Bad_Opcode },
> >      { Bad_Opcode },
> >      /* 20 */
> 
> ... changes like these are extremely odd to read. Can you please try to split
> this patch such that initially you simply introduce an empty new sub-table, to
> avoid such anomalies (which will also affect "git blame" then, I expect)?
> 

Sure, it's a good suggestion. I had a hard time to resolve the conflict here,  done.

> > --- a/opcodes/i386-dis.c
> > +++ b/opcodes/i386-dis.c
> > @@ -132,6 +132,13 @@ enum x86_64_isa
> >    intel64
> >  };
> >
> > +enum evex_type
> > +{
> > +  evex_default = 0,
> > +  evex_from_legacy,
> > +  evex_from_vex,
> > +};
> > +
> >  struct instr_info
> >  {
> >    enum address_mode address_mode;
> > @@ -212,7 +219,6 @@ struct instr_info
> >      int ll;
> >      bool w;
> >      bool evex;
> > -    bool r;
> 
> The change to eliminate this field would certainly be nice to be separate from
> the bulk of thw APX changes here.
> 

Thanks,
Lili.
  
Cui, Lili Oct. 17, 2023, 3:52 p.m. UTC | #6
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x }
> >
> >  neg, 0xf6/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > +
> >  not, 0xf6/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
> >
> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
> }
> > +not, 0xf6/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, {
> >
> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
> x }
> 
> I can't seem to be able to find the disassembly part for these. The reason I've
> noticed is because I was meaning to figure how you deal with EVEX.nf needing
> to be zero for those (and others). For that bit specifically I expect we want a
> new %NF macro, which would be used as !%NF when (bad) (or some such)
> wants printing instead of {nf}.
> 
Their disassembly part are putted in NDD patch, I'll move them here. 
For EVEX.nf, I added a new macro %XN to print bad for Non-NF instructions. As you can see in [PATCH 7/8] Support APX NF.

> Similary I don't think I can spot anywhere that you would check the other bits
> which need to be zero in extended EVEX. Nor Improper use of EVEX.x4 in
> certain pre-existing encodings (S/G insns at least; perhaps all others are okay).

Sorry, I can't get you here, what are S/G insns, could you provide more details here, thanks.

Lili.
  
Jan Beulich Oct. 17, 2023, 4:11 p.m. UTC | #7
On 17.10.2023 17:50, Cui, Lili wrote:
>>> +  /* PREFIX_EVEX_MAP4_60 */
>>> +  {
>>> +    { "movbeS",	{ Gv, Ev }, 0 },
>>> +    { Bad_Opcode },
>>> +    { "movbeS",	{ Gv, Ev }, 0 },
>>> +  },
>>> +  /* PREFIX_EVEX_MAP4_61 */
>>> +  {
>>> +    { "movbeS",	{ Ev, Gv }, 0 },
>>> +    { Bad_Opcode },
>>> +    { "movbeS",	{ Ev, Gv }, 0 },
>>> +  },
>>
>> In cases like this (of which, aiui, there will be many more), where only prefix
>> 66 is valid, and only to modify operand size, it would be quite desirable to
>> have a new PREFIX_... identifier to use in the parent table entry, such that this
>> additional decode step can be avoided.
>>
> 
> Added PREFIX_DATA_AND_NP_ONLY to report bad for f2 and f3 prefix. Do you have a better name for the new PREFIX ?

If PREFIX_DATA itself cannot suitable be re-used here, maybe PREFIX_NP_OR_DATA?

Jan
  
Jan Beulich Oct. 17, 2023, 4:12 p.m. UTC | #8
On 17.10.2023 17:52, Cui, Lili wrote:
>> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
>> x }
>>>
>>>  neg, 0xf6/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
>>>
>> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
>> }
>>> +
>>>  not, 0xf6/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, {
>>>
>> Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex
>> }
>>> +not, 0xf6/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, {
>>>
>> +Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseInde
>> x }
>>
>> I can't seem to be able to find the disassembly part for these. The reason I've
>> noticed is because I was meaning to figure how you deal with EVEX.nf needing
>> to be zero for those (and others). For that bit specifically I expect we want a
>> new %NF macro, which would be used as !%NF when (bad) (or some such)
>> wants printing instead of {nf}.
>>
> Their disassembly part are putted in NDD patch, I'll move them here. 
> For EVEX.nf, I added a new macro %XN to print bad for Non-NF instructions. As you can see in [PATCH 7/8] Support APX NF.
> 
>> Similary I don't think I can spot anywhere that you would check the other bits
>> which need to be zero in extended EVEX. Nor Improper use of EVEX.x4 in
>> certain pre-existing encodings (S/G insns at least; perhaps all others are okay).
> 
> Sorry, I can't get you here, what are S/G insns, could you provide more details here, thanks.

S/G is scatter/gather (i.e. AVX2 and AVX512).

Jan
  
Cui, Lili Oct. 18, 2023, 2:02 a.m. UTC | #9
> Subject: Re: [PATCH 2/8] Support APX GPR32 with extend evex prefix
> 
> On 17.10.2023 17:50, Cui, Lili wrote:
> >>> +  /* PREFIX_EVEX_MAP4_60 */
> >>> +  {
> >>> +    { "movbeS",	{ Gv, Ev }, 0 },
> >>> +    { Bad_Opcode },
> >>> +    { "movbeS",	{ Gv, Ev }, 0 },
> >>> +  },
> >>> +  /* PREFIX_EVEX_MAP4_61 */
> >>> +  {
> >>> +    { "movbeS",	{ Ev, Gv }, 0 },
> >>> +    { Bad_Opcode },
> >>> +    { "movbeS",	{ Ev, Gv }, 0 },
> >>> +  },
> >>
> >> In cases like this (of which, aiui, there will be many more), where
> >> only prefix
> >> 66 is valid, and only to modify operand size, it would be quite
> >> desirable to have a new PREFIX_... identifier to use in the parent
> >> table entry, such that this additional decode step can be avoided.
> >>
> >
> > Added PREFIX_DATA_AND_NP_ONLY to report bad for f2 and f3 prefix. Do
> you have a better name for the new PREFIX ?
> 
> If PREFIX_DATA itself cannot suitable be re-used here, maybe
> PREFIX_NP_OR_DATA?

Ok, thanks! I'll update it in V3 and the same goes for other emails.

Lili.
  
Jan Beulich Oct. 18, 2023, 6:10 a.m. UTC | #10
On 18.10.2023 04:02, Cui, Lili wrote:
>> Subject: Re: [PATCH 2/8] Support APX GPR32 with extend evex prefix
>>
>> On 17.10.2023 17:50, Cui, Lili wrote:
>>>>> +  /* PREFIX_EVEX_MAP4_60 */
>>>>> +  {
>>>>> +    { "movbeS",	{ Gv, Ev }, 0 },
>>>>> +    { Bad_Opcode },
>>>>> +    { "movbeS",	{ Gv, Ev }, 0 },
>>>>> +  },
>>>>> +  /* PREFIX_EVEX_MAP4_61 */
>>>>> +  {
>>>>> +    { "movbeS",	{ Ev, Gv }, 0 },
>>>>> +    { Bad_Opcode },
>>>>> +    { "movbeS",	{ Ev, Gv }, 0 },
>>>>> +  },
>>>>
>>>> In cases like this (of which, aiui, there will be many more), where
>>>> only prefix
>>>> 66 is valid, and only to modify operand size, it would be quite
>>>> desirable to have a new PREFIX_... identifier to use in the parent
>>>> table entry, such that this additional decode step can be avoided.
>>>>
>>>
>>> Added PREFIX_DATA_AND_NP_ONLY to report bad for f2 and f3 prefix. Do
>> you have a better name for the new PREFIX ?
>>
>> If PREFIX_DATA itself cannot suitable be re-used here, maybe
>> PREFIX_NP_OR_DATA?
> 
> Ok, thanks! I'll update it in V3 and the same goes for other emails.

Btw, it typically helps to wait a little further for replies on the
earlier version threads before sending a new version, especially (but
not limited to) when you raise questions in your replies.

Jan
  
Cui, Lili Oct. 18, 2023, 6:31 a.m. UTC | #11
> >> Similary I don't think I can spot anywhere that you would check the
> >> other bits which need to be zero in extended EVEX. Nor Improper use
> >> of EVEX.x4 in certain pre-existing encodings (S/G insns at least; perhaps all
> others are okay).
> >
> > Sorry, I can't get you here, what are S/G insns, could you provide more
> details here, thanks.
> 
> S/G is scatter/gather (i.e. AVX2 and AVX512).
> 
I think you mean EVEX.V4, scatter/gather has VSIB which needs to reuse this bit (EVEX.x4 is EVEX.p[10] which is fixed value 1),
For our current code we will reassign that bit and now I changed it to check the upper 16 registers of GPR32 , do you think it is ok?

@@ -4252,7 +4252,7 @@ build_apx_evex_prefix (void)
   if (i.rex2 & REX_R)
     i.vex.bytes[1] &= 0xef;
   if (i.vex.register_specifier
-      && register_number (i.vex.register_specifier) > 0xf)
+      && (i.vex.register_specifier->reg_flags & RegRex2))
     i.vex.bytes[3] &= 0xf7;
   if (i.rex2 & REX_B)
     i.vex.bytes[1] |= 0x08;

Thanks,
Lili
  
Jan Beulich Oct. 18, 2023, 6:40 a.m. UTC | #12
On 17.10.2023 17:48, Cui, Lili wrote:
>> On 19.09.2023 17:25, Cui, Lili wrote:
> 
>>> --- a/gas/config/tc-i386.c
> 
>>> +++ b/gas/config/tc-i386.c
> 
>>> @@ -1945,6 +1945,30 @@ cpu_flags_match (const insn_template *t)
> 
>>>                         && (!x.bitfield.cpuvpclmulqdq ||
> 
>> cpu.bitfield.cpuvpclmulqdq))
> 
>>>                       match |= CPU_FLAGS_ARCH_MATCH;
> 
>>>             }
> 
>>> +        else if (x.bitfield.cpuapx_f)
> 
>>> +          {
> 
>>> +            if (cpu.bitfield.cpuapx_f
> 
>>> +                      && (!x.bitfield.cpumovbe || cpu.bitfield.cpumovbe)
> 
>>> +                      && (!x.bitfield.cpuept || cpu.bitfield.cpuept)
> 
>>> +                      && (!x.bitfield.cpuinvpcid || cpu.bitfield.cpuinvpcid)
> 
>>> +                      && (!x.bitfield.cpusse4_2 || cpu.bitfield.cpusse4_2)
> 
>>> +                      && (!x.bitfield.cpubmi2 || cpu.bitfield.cpubmi2)
> 
>>> +                      && (!x.bitfield.cpubmi || cpu.bitfield.cpubmi)
> 
>>> +                      && (!x.bitfield.cpuadx || cpu.bitfield.cpuadx)
> 
>>> +                      && (!x.bitfield.cpusha || cpu.bitfield.cpusha)
> 
>>> +                      && (!x.bitfield.cpuavx512bw || cpu.bitfield.cpuavx512bw)
> 
>>> +                      && (!x.bitfield.cpuavx512dq || cpu.bitfield.cpuavx512dq)
> 
>>> +                      && (!x.bitfield.cpuavx512f || cpu.bitfield.cpuavx512f)
> 
>>> +                      && (!x.bitfield.cpushstk || cpu.bitfield.cpushstk)
> 
>>> +                      && (!x.bitfield.cpumovdir64b || cpu.bitfield.cpumovdir64b)
> 
>>> +                      && (!x.bitfield.cpumovdiri || cpu.bitfield.cpumovdiri)
> 
>>> +                      && (!x.bitfield.cpuenqcmd || cpu.bitfield.cpuenqcmd)
> 
>>> +                      && (!x.bitfield.cpukl || cpu.bitfield.cpukl)
> 
>>> +                      && (!x.bitfield.cpuwidekl || cpu.bitfield.cpuwidekl)
> 
>>> +                      && (!x.bitfield.cpucmpccxadd || cpu.bitfield.cpucmpccxadd)
> 
>>> +                      && (!x.bitfield.cpurao_int || cpu.bitfield.cpurao_int))
> 
>>> +                    match |= CPU_FLAGS_ARCH_MATCH;
> 
>>> +          }
> 
>>>           else
> 
>>>             match |= CPU_FLAGS_ARCH_MATCH;
> 
>>>
> 
>>
> 
>> This is getting unwieldy, so I think we will need to think of a better way of
> 
>> expressing both "multiple ISAs need to be enabled" and "one of a set of ISAs
> 
>> needs to be enabled". It's only the mix of these expressed in a uniform way in
> 
>> the insn table that requires these extra conditionals. With the size of
> 
>> i386_cpu_attr greatly shrunk as of recently, I wonder if we couldn't simply add
> 
>> a 2nd instance of it to insn_template. One would be "all of these are required",
> 
>> while the other would be "any one of these is sufficient".
> 
>>
> 
> 
> 
> I didn't find a better way to distinguish these two types of requirements in insn_template.
> 
> I wrote a new function "cpu_flags_not_or_check" to replace these clumsy judgments.
> 
> 
> 
> static INLINE int
> 
> cpu_flags_not_or_check (const union i386_cpu_flags *x,
> 
>                         const union i386_cpu_flags *y)
> 
> {
> 
>   switch (ARRAY_SIZE(x->array))
> 
>     {
> 
>     case 5:
> 
>       if ((~x->array[4] | y->array[4]) != 0xffffffff)
> 
>         return 0;
> 
>       /* Fall through.  */
> 
>     case 4:
> 
>       if ((~x->array[3] | y->array[3]) != 0xffffffff)
> 
>         return 0;
> 
>       /* Fall through.  */
> 
>     case 3:
> 
>       if ((~x->array[2] | y->array[2]) != 0xffffffff)
> 
>         return 0;
> 
>       /* Fall through.  */
> 
>     case 2:
> 
>       if ((~x->array[1] | y->array[1]) != 0xffffffff)
> 
>         return 0;
> 
>       /* Fall through.  */
> 
>     case 1:
> 
>      return ((~x->array[1] | y->array[1]) == 0Xffffffff);
> 
>       break;
> 
>     default:
> 
>       abort ();
> 
>     }
> 
> }

Without seeing how this is used I can't comment on it. It feels though
as if you may not have fully understood my earlier reply: Even prior
to APX we already have cases where one CPU specifier in the opcode
table using | means "both" and another means "either". I think we want
to split that, and thus simplify the logic in cpu_flags_match(). That's
separate prereq work, of course.

Thing is that prior to 734dfd1cc966 this would have been prohibitively
expensive in terms of table size growth. But now we can afford having
two i386_cpu_attr fields, one meaning "all of these", the other meaning
"any of these". To limit churn in the opcode table, I'd be inclined to
continue to express CPU requirements in a single field there, using e.g.
(CpuA|CpuB)&CpuC&CpuD (Cpu prefixes re-added here for clarity, even if
they aren't present in the opcode table anymore).

I'd be happy to do that prereq work (if we can agree on the approach),
but it may mean a little bit of delay, as after my vacation I need to
catch up with a few other thinghs first.

>>> +  if (i.prefix[DATA_PREFIX] != 0)
> 
>>> +    {
> 
>>> +      i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
> 
>>> +      i.prefix[DATA_PREFIX] = 0;
> 
>>> +    }
> 
>>
> 
>> While this looks to be correct for the case when the prefix was derived from an
> 
>> insn template and the use of 16-bit operands, I don't think it is uniformly
> 
>> correct when "data16" was used as a prefix explicitly. In such a case either
> 
>> REX2 encoding needs to be used, or an error needs emitting.
> 
>>
> 
>> You may further want to assert that i.tm.opcode_modifier.opcodeprefix is still
> 
>> zero ahead of the assignment.
> 
>>
> 
> 
> 
> For REX2 encoding, we add no special handling, just follow REX.
> 
> For EVEX-promoted encoding, such as “data16 aand   %r25d,0x123(%r31,%rax,4)”, the following existing code will report an error.
> 
> 
> 
>   if (is_any_vex_encoding (&i.tm)
> 
>       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
> 
>       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX)
> 
>     {
> 
>       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD insns.  */
> 
>       if (i.prefix[DATA_PREFIX])
> 
>         {
> 
>           as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
> 
>           return;
> 
>         }

Thinking of it, I may need to revise my earlier comment some: RAO-INT insns
are a bad example here, since despite being legacy-encoded they don't permit
a data16 prefix to specify 16-bit operand size. Consider the same for e.g.
AND. The legacy form permits use of data16 (leaving aside that it's better /
clearer to simply use 16-bit register names), so the promoted forms likely
ought to permit such as well. IOW perhaps the code you have is correct, but
the check you quote may need adjusting.

>>> @@ -1121,6 +1122,15 @@ process_i386_opcode_modifier (FILE *table,
> 
>> char *mod, unsigned int space,
> 
>>>         fprintf (stderr,
> 
>>>                        "%s: %d: W modifier without Word/Dword/Qword
> 
>> operand(s)\n",
> 
>>>                        filename, lineno);
> 
>>> +      if (modifiers[Vex].value
> 
>>> +        || (space > SPACE_0F
> 
>>> +            && !(space == SPACE_EVEXMAP4
> 
>>> +                       || modifiers[EVex].value
> 
>>> +                       || modifiers[Disp8MemShift].value
> 
>>> +                       || modifiers[Broadcast].value
> 
>>> +                       || modifiers[Masking].value
> 
>>> +                       || modifiers[SAE].value)))
> 
>>
> 
>> First of all, this wants simplifying to
> 
>>
> 
>>       if (modifiers[Vex].value
> 
>>              || (space > SPACE_0F
> 
>>                  && space != SPACE_EVEXMAP4
> 
>>                  && !modifiers[EVex].value
> 
>>                  && !modifiers[Disp8MemShift].value
> 
>>                  && !modifiers[Broadcast].value
> 
>>                  && !modifiers[Masking].value
> 
>>                  && !modifiers[SAE].value))
> 
>>
> 
>> which helps readability and makes more obvious that this parallels tc-
> 
>> i386.c:is_evex_encoding(). Such a connection, where updates need to be
> 
>> made in sync, needs pointing out in code comments at both sites.
> 
>>
> 
> 
> 
> Done.
> 
> 
> 
>> Yet of course this condition won't hold anymore for combined VEX/EVEX
> 
>> templates.
> 
>>
> 
> 
> 
> I rebased master and as you predicted this doesn't work, one entry contains both VEX and EVEX, VEX requires No_egpr=1 and EVEX requires No_egpr=0,
> 
> Finally I chose to add "No_egpr=1" for it. And added the following judgment in check_EgprOperands.
> 
> 
> 
> check_EgprOperands (const insn_template *t)
> 
> {
> 
> -  if (t->opcode_modifier.noegpr)
> 
> if (t->opcode_modifier.noegpr && !need_evex_encoding())

I'll need to look at this in context, so I can't comment right here.

Also, just to mention it: Something's wrong with your reply (also visible
in the list archive copy), harming readability quite a bit. There are
extra blank lines between any two real ones.

Jan
  
Jan Beulich Oct. 18, 2023, 6:47 a.m. UTC | #13
On 18.10.2023 08:31, Cui, Lili wrote:
>>>> Similary I don't think I can spot anywhere that you would check the
>>>> other bits which need to be zero in extended EVEX. Nor Improper use
>>>> of EVEX.x4 in certain pre-existing encodings (S/G insns at least; perhaps all
>> others are okay).
>>>
>>> Sorry, I can't get you here, what are S/G insns, could you provide more
>> details here, thanks.
>>
>> S/G is scatter/gather (i.e. AVX2 and AVX512).
>>
> I think you mean EVEX.V4, scatter/gather has VSIB which needs to reuse this bit (EVEX.x4 is EVEX.p[10] which is fixed value 1),
> For our current code we will reassign that bit and now I changed it to check the upper 16 registers of GPR32 , do you think it is ok?
> 
> @@ -4252,7 +4252,7 @@ build_apx_evex_prefix (void)
>    if (i.rex2 & REX_R)
>      i.vex.bytes[1] &= 0xef;
>    if (i.vex.register_specifier
> -      && register_number (i.vex.register_specifier) > 0xf)
> +      && (i.vex.register_specifier->reg_flags & RegRex2))
>      i.vex.bytes[3] &= 0xf7;
>    if (i.rex2 & REX_B)
>      i.vex.bytes[1] |= 0x08;

First of all my comment was disassembly related; you stripped a little
too much context for this to remain visible here. And then I think I
did mean EVEX.x4 - as you say it needs to be fixed-1 in e.g. S/G insns,
and the checking thereof is what I'm missing.

Jan
  
Cui, Lili Oct. 18, 2023, 7:52 a.m. UTC | #14
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Wednesday, October 18, 2023 2:47 PM
> To: Cui, Lili <lili.cui@intel.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; binutils@sourceware.org
> Subject: Re: [PATCH 2/8] Support APX GPR32 with extend evex prefix
> 
> On 18.10.2023 08:31, Cui, Lili wrote:
> >>>> Similary I don't think I can spot anywhere that you would check the
> >>>> other bits which need to be zero in extended EVEX. Nor Improper use
> >>>> of EVEX.x4 in certain pre-existing encodings (S/G insns at least;
> >>>> perhaps all
> >> others are okay).
> >>>
> >>> Sorry, I can't get you here, what are S/G insns, could you provide
> >>> more
> >> details here, thanks.
> >>
> >> S/G is scatter/gather (i.e. AVX2 and AVX512).
> >>
> > I think you mean EVEX.V4, scatter/gather has VSIB which needs to reuse
> > this bit (EVEX.x4 is EVEX.p[10] which is fixed value 1), For our current code
> we will reassign that bit and now I changed it to check the upper 16 registers
> of GPR32 , do you think it is ok?
> >
> > @@ -4252,7 +4252,7 @@ build_apx_evex_prefix (void)
> >    if (i.rex2 & REX_R)
> >      i.vex.bytes[1] &= 0xef;
> >    if (i.vex.register_specifier
> > -      && register_number (i.vex.register_specifier) > 0xf)
> > +      && (i.vex.register_specifier->reg_flags & RegRex2))
> >      i.vex.bytes[3] &= 0xf7;
> >    if (i.rex2 & REX_B)
> >      i.vex.bytes[1] |= 0x08;
> 
> First of all my comment was disassembly related; you stripped a little too
> much context for this to remain visible here. And then I think I did mean
> EVEX.x4 - as you say it needs to be fixed-1 in e.g. S/G insns, and the checking
> thereof is what I'm missing.
> 
Ok ,  I will drop the encoder changes, and add an X4 check when instruction has vex_vsib* type in decoder.

Lili.
  
Jan Beulich Oct. 18, 2023, 8:21 a.m. UTC | #15
On 18.10.2023 09:52, Cui, Lili wrote:
> 
> 
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Wednesday, October 18, 2023 2:47 PM
>> To: Cui, Lili <lili.cui@intel.com>
>> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; binutils@sourceware.org
>> Subject: Re: [PATCH 2/8] Support APX GPR32 with extend evex prefix
>>
>> On 18.10.2023 08:31, Cui, Lili wrote:
>>>>>> Similary I don't think I can spot anywhere that you would check the
>>>>>> other bits which need to be zero in extended EVEX. Nor Improper use
>>>>>> of EVEX.x4 in certain pre-existing encodings (S/G insns at least;
>>>>>> perhaps all
>>>> others are okay).
>>>>>
>>>>> Sorry, I can't get you here, what are S/G insns, could you provide
>>>>> more
>>>> details here, thanks.
>>>>
>>>> S/G is scatter/gather (i.e. AVX2 and AVX512).
>>>>
>>> I think you mean EVEX.V4, scatter/gather has VSIB which needs to reuse
>>> this bit (EVEX.x4 is EVEX.p[10] which is fixed value 1), For our current code
>> we will reassign that bit and now I changed it to check the upper 16 registers
>> of GPR32 , do you think it is ok?
>>>
>>> @@ -4252,7 +4252,7 @@ build_apx_evex_prefix (void)
>>>    if (i.rex2 & REX_R)
>>>      i.vex.bytes[1] &= 0xef;
>>>    if (i.vex.register_specifier
>>> -      && register_number (i.vex.register_specifier) > 0xf)
>>> +      && (i.vex.register_specifier->reg_flags & RegRex2))
>>>      i.vex.bytes[3] &= 0xf7;
>>>    if (i.rex2 & REX_B)
>>>      i.vex.bytes[1] |= 0x08;
>>
>> First of all my comment was disassembly related; you stripped a little too
>> much context for this to remain visible here. And then I think I did mean
>> EVEX.x4 - as you say it needs to be fixed-1 in e.g. S/G insns, and the checking
>> thereof is what I'm missing.
>>
> Ok ,  I will drop the encoder changes, and add an X4 check when instruction has vex_vsib* type in decoder.

You understand though that I used the S/G insns as example only. I didn't do
a proper check whether any others might also be affected. In particular ones
not allowing for memory operands might be.

Jan
  
Cui, Lili Oct. 18, 2023, 10:44 a.m. UTC | #16
> Without seeing how this is used I can't comment on it. It feels though as if you
> may not have fully understood my earlier reply: Even prior to APX we already
> have cases where one CPU specifier in the opcode table using | means "both"
> and another means "either". I think we want to split that, and thus simplify
> the logic in cpu_flags_match(). That's separate prereq work, of course.
> 
> Thing is that prior to 734dfd1cc966 this would have been prohibitively
> expensive in terms of table size growth. But now we can afford having two
> i386_cpu_attr fields, one meaning "all of these", the other meaning "any of
> these". To limit churn in the opcode table, I'd be inclined to continue to
> express CPU requirements in a single field there, using e.g.
> (CpuA|CpuB)&CpuC&CpuD (Cpu prefixes re-added here for clarity, even if
> they aren't present in the opcode table anymore).
> 
> I'd be happy to do that prereq work (if we can agree on the approach), but it
> may mean a little bit of delay, as after my vacation I need to catch up with a
> few other thinghs first.
> 

It would be great if you could do this (agree with the approach), it is not hurry, it may take a long time to commit the APX  to the master.

> >>> +  if (i.prefix[DATA_PREFIX] != 0)
> >
> >>> +    {
> >
> >>> +      i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
> >
> >>> +      i.prefix[DATA_PREFIX] = 0;
> >
> >>> +    }
> >
> >>
> >
> >> While this looks to be correct for the case when the prefix was
> >> derived from an
> >
> >> insn template and the use of 16-bit operands, I don't think it is
> >> uniformly
> >
> >> correct when "data16" was used as a prefix explicitly. In such a case
> >> either
> >
> >> REX2 encoding needs to be used, or an error needs emitting.
> >
> >>
> >
> >> You may further want to assert that i.tm.opcode_modifier.opcodeprefix
> >> is still
> >
> >> zero ahead of the assignment.
> >
> >>
> >
> >
> >
> > For REX2 encoding, we add no special handling, just follow REX.
> >
> > For EVEX-promoted encoding, such as “data16
> aand   %r25d,0x123(%r31,%rax,4)”, the following existing code will report an
> error.
> >
> >
> >
> >   if (is_any_vex_encoding (&i.tm)
> >
> >       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
> >
> >       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >=
> > RegMMX)
> >
> >     {
> >
> >       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD
> > insns.  */
> >
> >       if (i.prefix[DATA_PREFIX])
> >
> >         {
> >
> >           as_bad (_("data size prefix invalid with `%s'"), insn_name
> > (&i.tm));
> >
> >           return;
> >
> >         }
> 
> Thinking of it, I may need to revise my earlier comment some: RAO-INT insns
> are a bad example here, since despite being legacy-encoded they don't permit
> a data16 prefix to specify 16-bit operand size. Consider the same for e.g.
> AND. The legacy form permits use of data16 (leaving aside that it's better /
> clearer to simply use 16-bit register names), so the promoted forms likely
> ought to permit such as well. IOW perhaps the code you have is correct, but
> the check you quote may need adjusting.
> 

I listed 5 instructions to check the data16 prefix, is the last one what you want?

$ cat add.s

and %ebx, %eax
data16 and %ebx, %eax
and %ebx, %r16d
data16 and %ebx, %r16d
{evex} data16 and %ebx, %r16d

$ objdump -dw  and.o

   0:   21 d8                   and    %ebx,%eax
   2:   66 21 d8                and    %bx,%ax
   5:   d5 10 21 d8             and    %ebx,%r16d
   9:   66 d5 10 21 d8          and    %bx,%r16w
   Error: data size prefix invalid with `and'

> >
> > I rebased master and as you predicted this doesn't work, one entry
> > contains both VEX and EVEX, VEX requires No_egpr=1 and EVEX requires
> > No_egpr=0,
> >
> > Finally I chose to add "No_egpr=1" for it. And added the following judgment
> in check_EgprOperands.
> >
> >
> >
> > check_EgprOperands (const insn_template *t)
> >
> > {
> >
> > -  if (t->opcode_modifier.noegpr)
> >
> > if (t->opcode_modifier.noegpr && !need_evex_encoding())
> 
> I'll need to look at this in context, so I can't comment right here.
> 
> Also, just to mention it: Something's wrong with your reply (also visible in the
> list archive copy), harming readability quite a bit. There are extra blank lines
> between any two real ones.
> 
Oh, sorry, I tried inserting a table into the email and changed it to HTML format, but forgot to change it back.

Thanks,
Lili.
  
Jan Beulich Oct. 18, 2023, 10:50 a.m. UTC | #17
On 18.10.2023 12:44, Cui, Lili wrote:
>> Without seeing how this is used I can't comment on it. It feels though as if you
>> may not have fully understood my earlier reply: Even prior to APX we already
>> have cases where one CPU specifier in the opcode table using | means "both"
>> and another means "either". I think we want to split that, and thus simplify
>> the logic in cpu_flags_match(). That's separate prereq work, of course.
>>
>> Thing is that prior to 734dfd1cc966 this would have been prohibitively
>> expensive in terms of table size growth. But now we can afford having two
>> i386_cpu_attr fields, one meaning "all of these", the other meaning "any of
>> these". To limit churn in the opcode table, I'd be inclined to continue to
>> express CPU requirements in a single field there, using e.g.
>> (CpuA|CpuB)&CpuC&CpuD (Cpu prefixes re-added here for clarity, even if
>> they aren't present in the opcode table anymore).
>>
>> I'd be happy to do that prereq work (if we can agree on the approach), but it
>> may mean a little bit of delay, as after my vacation I need to catch up with a
>> few other thinghs first.
>>
> 
> It would be great if you could do this (agree with the approach), it is not hurry, it may take a long time to commit the APX  to the master.
> 
>>>>> +  if (i.prefix[DATA_PREFIX] != 0)
>>>
>>>>> +    {
>>>
>>>>> +      i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
>>>
>>>>> +      i.prefix[DATA_PREFIX] = 0;
>>>
>>>>> +    }
>>>
>>>>
>>>
>>>> While this looks to be correct for the case when the prefix was
>>>> derived from an
>>>
>>>> insn template and the use of 16-bit operands, I don't think it is
>>>> uniformly
>>>
>>>> correct when "data16" was used as a prefix explicitly. In such a case
>>>> either
>>>
>>>> REX2 encoding needs to be used, or an error needs emitting.
>>>
>>>>
>>>
>>>> You may further want to assert that i.tm.opcode_modifier.opcodeprefix
>>>> is still
>>>
>>>> zero ahead of the assignment.
>>>
>>>>
>>>
>>>
>>>
>>> For REX2 encoding, we add no special handling, just follow REX.
>>>
>>> For EVEX-promoted encoding, such as “data16
>> aand   %r25d,0x123(%r31,%rax,4)”, the following existing code will report an
>> error.
>>>
>>>
>>>
>>>   if (is_any_vex_encoding (&i.tm)
>>>
>>>       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
>>>
>>>       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >=
>>> RegMMX)
>>>
>>>     {
>>>
>>>       /* Check for data size prefix on VEX/XOP/EVEX encoded and SIMD
>>> insns.  */
>>>
>>>       if (i.prefix[DATA_PREFIX])
>>>
>>>         {
>>>
>>>           as_bad (_("data size prefix invalid with `%s'"), insn_name
>>> (&i.tm));
>>>
>>>           return;
>>>
>>>         }
>>
>> Thinking of it, I may need to revise my earlier comment some: RAO-INT insns
>> are a bad example here, since despite being legacy-encoded they don't permit
>> a data16 prefix to specify 16-bit operand size. Consider the same for e.g.
>> AND. The legacy form permits use of data16 (leaving aside that it's better /
>> clearer to simply use 16-bit register names), so the promoted forms likely
>> ought to permit such as well. IOW perhaps the code you have is correct, but
>> the check you quote may need adjusting.
>>
> 
> I listed 5 instructions to check the data16 prefix, is the last one what you want?
> 
> $ cat add.s
> 
> and %ebx, %eax
> data16 and %ebx, %eax
> and %ebx, %r16d
> data16 and %ebx, %r16d
> {evex} data16 and %ebx, %r16d
> 
> $ objdump -dw  and.o
> 
>    0:   21 d8                   and    %ebx,%eax
>    2:   66 21 d8                and    %bx,%ax
>    5:   d5 10 21 d8             and    %ebx,%r16d
>    9:   66 d5 10 21 d8          and    %bx,%r16w
>    Error: data size prefix invalid with `and'

Kind of. I was really thinking of e.g.

data16 and %ebx, %ecx, %edx

but yes, forms with {evex} would be similarly affected.

Jan
  
Cui, Lili Oct. 18, 2023, 11:30 a.m. UTC | #18
> >> Subject: Re: [PATCH 2/8] Support APX GPR32 with extend evex prefix
> >>
> >> On 18.10.2023 08:31, Cui, Lili wrote:
> >>>>>> Similary I don't think I can spot anywhere that you would check
> >>>>>> the other bits which need to be zero in extended EVEX. Nor
> >>>>>> Improper use of EVEX.x4 in certain pre-existing encodings (S/G
> >>>>>> insns at least; perhaps all
> >>>> others are okay).
> >>>>>
> >>>>> Sorry, I can't get you here, what are S/G insns, could you provide
> >>>>> more
> >>>> details here, thanks.
> >>>>
> >>>> S/G is scatter/gather (i.e. AVX2 and AVX512).
> >>>>
> >>> I think you mean EVEX.V4, scatter/gather has VSIB which needs to
> >>> reuse this bit (EVEX.x4 is EVEX.p[10] which is fixed value 1), For
> >>> our current code
> >> we will reassign that bit and now I changed it to check the upper 16
> >> registers of GPR32 , do you think it is ok?
> >>>
> >>> @@ -4252,7 +4252,7 @@ build_apx_evex_prefix (void)
> >>>    if (i.rex2 & REX_R)
> >>>      i.vex.bytes[1] &= 0xef;
> >>>    if (i.vex.register_specifier
> >>> -      && register_number (i.vex.register_specifier) > 0xf)
> >>> +      && (i.vex.register_specifier->reg_flags & RegRex2))
> >>>      i.vex.bytes[3] &= 0xf7;
> >>>    if (i.rex2 & REX_B)
> >>>      i.vex.bytes[1] |= 0x08;
> >>
> >> First of all my comment was disassembly related; you stripped a
> >> little too much context for this to remain visible here. And then I
> >> think I did mean
> >> EVEX.x4 - as you say it needs to be fixed-1 in e.g. S/G insns, and
> >> the checking thereof is what I'm missing.
> >>
> > Ok ,  I will drop the encoder changes, and add an X4 check when instruction
> has vex_vsib* type in decoder.
> 
> You understand though that I used the S/G insns as example only. I didn't do a
> proper check whether any others might also be affected. In particular ones not
> allowing for memory operands might be.
> 
OK, I'll try to add more checks, and add description for them.

Lili.
  
Cui, Lili Oct. 19, 2023, 11:58 a.m. UTC | #19
> -----Original Message-----
> From: Cui, Lili
> Sent: Wednesday, October 18, 2023 7:31 PM
> To: Jan Beulich <jbeulich@suse.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; binutils@sourceware.org
> Subject: RE: [PATCH 2/8] Support APX GPR32 with extend evex prefix
> 
> > >> Subject: Re: [PATCH 2/8] Support APX GPR32 with extend evex prefix
> > >>
> > >> On 18.10.2023 08:31, Cui, Lili wrote:
> > >>>>>> Similary I don't think I can spot anywhere that you would check
> > >>>>>> the other bits which need to be zero in extended EVEX. Nor
> > >>>>>> Improper use of EVEX.x4 in certain pre-existing encodings (S/G
> > >>>>>> insns at least; perhaps all
> > >>>> others are okay).
> > >>>>>
> > >>>>> Sorry, I can't get you here, what are S/G insns, could you
> > >>>>> provide more
> > >>>> details here, thanks.
> > >>>>
> > >>>> S/G is scatter/gather (i.e. AVX2 and AVX512).
> > >>>>
> > >>> I think you mean EVEX.V4, scatter/gather has VSIB which needs to
> > >>> reuse this bit (EVEX.x4 is EVEX.p[10] which is fixed value 1), For
> > >>> our current code
> > >> we will reassign that bit and now I changed it to check the upper
> > >> 16 registers of GPR32 , do you think it is ok?
> > >>>
> > >>> @@ -4252,7 +4252,7 @@ build_apx_evex_prefix (void)
> > >>>    if (i.rex2 & REX_R)
> > >>>      i.vex.bytes[1] &= 0xef;
> > >>>    if (i.vex.register_specifier
> > >>> -      && register_number (i.vex.register_specifier) > 0xf)
> > >>> +      && (i.vex.register_specifier->reg_flags & RegRex2))
> > >>>      i.vex.bytes[3] &= 0xf7;
> > >>>    if (i.rex2 & REX_B)
> > >>>      i.vex.bytes[1] |= 0x08;
> > >>
> > >> First of all my comment was disassembly related; you stripped a
> > >> little too much context for this to remain visible here. And then I
> > >> think I did mean
> > >> EVEX.x4 - as you say it needs to be fixed-1 in e.g. S/G insns, and
> > >> the checking thereof is what I'm missing.
> > >>
> > > Ok ,  I will drop the encoder changes, and add an X4 check when
> > > instruction
> > has vex_vsib* type in decoder.
> >
> > You understand though that I used the S/G insns as example only. I
> > didn't do a proper check whether any others might also be affected. In
> > particular ones not allowing for memory operands might be.
> >
> OK, I'll try to add more checks, and add description for them.
> 

Added some bit checks in the decoder.

--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -11533,6 +11642,13 @@ OP_E_memory (instr_info *ins, int bytemode, int sizeflag)
                abort ();
              if (ins->vex.evex)
                {
+                 /* S/G EVEX insns require EVEX.P[10] == 1 */
+                 if (ins->rex2 & REX_X)
+                   {
+                     oappend (ins, "(bad)");
+                     return true;
+                   }
+
@@ -9003,6 +9003,9 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
        case 0x4:
          vex_table_index = EVEX_MAP4;
          ins->evex_type = evex_from_legacy;
+         /* EVEX from legacy instrucions require EVEX.P[2:0] must be 0x04.  */
+         if (!((*ins->codep & 0x7) == 0x04))
+           return  &bad_opcode;
          break;
        case 0x5:
          vex_table_index = EVEX_MAP5;
@@ -9063,6 +9066,22 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
        }

       ins->need_vex = 4;
+
+      /* EVEX from legacy requrie EVEX.P[17:16] must be 0, EVEX.P[23:21] must
+        be 0.
+        EVEX from evex requrie EVEX.P[17:16] must be 0. EVEX.P[23:22] must
+        be 0, EVEX.P[20] must be 0.  */
+      if (ins->evex_type == evex_from_legacy || ins->evex_type == evex_from_vex)
+       {
+         if (!((*ins->codep & 0x3) == 0)
+             || !((*ins->codep >> 6 & 0x3) == 0)
+             || (ins->evex_type == evex_from_legacy
+                 && !((*ins->codep >> 5 & 0x1) == 0))
+             || (ins->evex_type == evex_from_vex
+                 && !ins->vex.b))
+           return &bad_opcode;

Corresponding testcase

        #VSIB vpgatherqq 0x7b(%rbp,%zmm17,8),%zmm16{%k1} set EVEX.P[10] == 0(illegal value).
        .byte 0x62, 0xe2, 0xf9, 0x41, 0x91, 0x84, 0xcd, 0x7b, 0x00, 0x00, 0x00
        .byte 0xff
        #EVEX_MAP4 adox %r25d,%edx set EVEX.P[2:0] == 1 (illegal value).
        .byte 0x62, 0xdd, 0x7e, 0x08, 0x66, 0xd1
        .byte 0xff
        #EVEX_MAP4 adox %r25d,%edx set EVEX.P[17:16] == 1 (illegal value).
        .byte 0x62, 0xdc, 0x7e, 0x09, 0x66, 0xd1
        #EVEX_MAP4 adox %r25d,%edx set EVEX.P[23:21] == 1 (illegal value).
        .byte 0x62, 0xdc, 0x7e, 0x18, 0x66, 0xd1
        #EVEX from VEX enqcmd 0x123(%r31,%rax,4),%r31 EVEX.P[17:16] == 1 (illegal value).
        .byte 0x62, 0x4c, 0x7f, 0x09, 0xf8, 0xbc, 0x87, 0x23, 0x01, 0x00, 0x00
        .byte 0xff
        #EVEX from VEX enqcmd 0x123(%r31,%rax,4),%r31 EVEX.P[23:22] == 1 (illegal value).
        .byte 0x62, 0x4c, 0x7f, 0x18, 0xf8, 0xbc, 0x87, 0x23, 0x01, 0x00, 0x00

Lili.
  
Jan Beulich Oct. 19, 2023, 3:24 p.m. UTC | #20
On 19.10.2023 13:58, Cui, Lili wrote:
>> -----Original Message-----
>> From: Cui, Lili
>> Sent: Wednesday, October 18, 2023 7:31 PM
>>
>> OK, I'll try to add more checks, and add description for them.
>>
> 
> Added some bit checks in the decoder.
> 
> --- a/opcodes/i386-dis.c
> +++ b/opcodes/i386-dis.c
> @@ -11533,6 +11642,13 @@ OP_E_memory (instr_info *ins, int bytemode, int sizeflag)
>                 abort ();
>               if (ins->vex.evex)
>                 {
> +                 /* S/G EVEX insns require EVEX.P[10] == 1 */
> +                 if (ins->rex2 & REX_X)
> +                   {
> +                     oappend (ins, "(bad)");
> +                     return true;
> +                   }

I think I understand this, but ...

> @@ -9003,6 +9003,9 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
>         case 0x4:
>           vex_table_index = EVEX_MAP4;
>           ins->evex_type = evex_from_legacy;
> +         /* EVEX from legacy instrucions require EVEX.P[2:0] must be 0x04.  */
> +         if (!((*ins->codep & 0x7) == 0x04))
> +           return  &bad_opcode;

... what's this about? Aren't you in a switch() dealing with the exact same
three bits?

> @@ -9063,6 +9066,22 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
>         }
> 
>        ins->need_vex = 4;
> +
> +      /* EVEX from legacy requrie EVEX.P[17:16] must be 0, EVEX.P[23:21] must
> +        be 0.
> +        EVEX from evex requrie EVEX.P[17:16] must be 0. EVEX.P[23:22] must
> +        be 0, EVEX.P[20] must be 0.  */
> +      if (ins->evex_type == evex_from_legacy || ins->evex_type == evex_from_vex)
> +       {
> +         if (!((*ins->codep & 0x3) == 0)
> +             || !((*ins->codep >> 6 & 0x3) == 0)
> +             || (ins->evex_type == evex_from_legacy
> +                 && !((*ins->codep >> 5 & 0x1) == 0))
> +             || (ins->evex_type == evex_from_vex
> +                 && !ins->vex.b))
> +           return &bad_opcode;

I guess I'm confused here: So far we don't use EVEX.P[] as notation in
comments. Can you please use the respective field names instead? Also
can you please improve readability by converting !(a == b) into a != b?

Jan
  
Cui, Lili Oct. 19, 2023, 4:38 p.m. UTC | #21
> >> OK, I'll try to add more checks, and add description for them.
> >>
> >
> > Added some bit checks in the decoder.
> >
> > --- a/opcodes/i386-dis.c
> > +++ b/opcodes/i386-dis.c
> > @@ -11533,6 +11642,13 @@ OP_E_memory (instr_info *ins, int bytemode,
> int sizeflag)
> >                 abort ();
> >               if (ins->vex.evex)
> >                 {
> > +                 /* S/G EVEX insns require EVEX.P[10] == 1 */
> > +                 if (ins->rex2 & REX_X)
> > +                   {
> > +                     oappend (ins, "(bad)");
> > +                     return true;
> > +                   }
> 
> I think I understand this, but ...
> 
> > @@ -9003,6 +9003,9 @@ get_valid_dis386 (const struct dis386 *dp,
> instr_info *ins)
> >         case 0x4:
> >           vex_table_index = EVEX_MAP4;
> >           ins->evex_type = evex_from_legacy;
> > +         /* EVEX from legacy instrucions require EVEX.P[2:0] must be 0x04.  */
> > +         if (!((*ins->codep & 0x7) == 0x04))
> > +           return  &bad_opcode;
> 
> ... what's this about? Aren't you in a switch() dealing with the exact same
> three bits?
> 

Oh, deleted it.

> > @@ -9063,6 +9066,22 @@ get_valid_dis386 (const struct dis386 *dp,
> instr_info *ins)
> >         }
> >
> >        ins->need_vex = 4;
> > +
> > +      /* EVEX from legacy requrie EVEX.P[17:16] must be 0, EVEX.P[23:21]
> must
> > +        be 0.
> > +        EVEX from evex requrie EVEX.P[17:16] must be 0. EVEX.P[23:22] must
> > +        be 0, EVEX.P[20] must be 0.  */
> > +      if (ins->evex_type == evex_from_legacy || ins->evex_type ==
> evex_from_vex)
> > +       {
> > +         if (!((*ins->codep & 0x3) == 0)
> > +             || !((*ins->codep >> 6 & 0x3) == 0)
> > +             || (ins->evex_type == evex_from_legacy
> > +                 && !((*ins->codep >> 5 & 0x1) == 0))
> > +             || (ins->evex_type == evex_from_vex
> > +                 && !ins->vex.b))
> > +           return &bad_opcode;
> 
> I guess I'm confused here: So far we don't use EVEX.P[] as notation in
> comments. Can you please use the respective field names instead? Also can
> you please improve readability by converting !(a == b) into a != b?
> 

Changed it to:

+      /* EVEX from legacy instructions requrie vex.mask_register_specifier, vex.ll
+        and vex.zeroing must be 0.
+        EVEX from evex instrucions requrie vex.mask_register_specifier and vex.ll
+        must be 0.  */
+      if (ins->evex_type == evex_from_legacy || ins->evex_type == evex_from_vex)
+       {
+         if ((*ins->codep & 0x3) != 0
+             || (*ins->codep >> 6 & 0x3) != 0
+             || (ins->evex_type == evex_from_legacy
+                 && (*ins->codep >> 5 & 0x1) != 0)
+             || (ins->evex_type == evex_from_vex
+                 && !ins->vex.b))
+           return &bad_opcode;
+       }

Lili.
  
Jan Beulich Oct. 20, 2023, 6:25 a.m. UTC | #22
On 19.10.2023 18:38, Cui, Lili wrote:
>>> @@ -9063,6 +9066,22 @@ get_valid_dis386 (const struct dis386 *dp,
>> instr_info *ins)
>>>         }
>>>
>>>        ins->need_vex = 4;
>>> +
>>> +      /* EVEX from legacy requrie EVEX.P[17:16] must be 0, EVEX.P[23:21]
>> must
>>> +        be 0.
>>> +        EVEX from evex requrie EVEX.P[17:16] must be 0. EVEX.P[23:22] must
>>> +        be 0, EVEX.P[20] must be 0.  */
>>> +      if (ins->evex_type == evex_from_legacy || ins->evex_type ==
>> evex_from_vex)
>>> +       {
>>> +         if (!((*ins->codep & 0x3) == 0)
>>> +             || !((*ins->codep >> 6 & 0x3) == 0)
>>> +             || (ins->evex_type == evex_from_legacy
>>> +                 && !((*ins->codep >> 5 & 0x1) == 0))
>>> +             || (ins->evex_type == evex_from_vex
>>> +                 && !ins->vex.b))
>>> +           return &bad_opcode;
>>
>> I guess I'm confused here: So far we don't use EVEX.P[] as notation in
>> comments. Can you please use the respective field names instead? Also can
>> you please improve readability by converting !(a == b) into a != b?
>>
> 
> Changed it to:
> 
> +      /* EVEX from legacy instructions requrie vex.mask_register_specifier, vex.ll
> +        and vex.zeroing must be 0.
> +        EVEX from evex instrucions requrie vex.mask_register_specifier and vex.ll
> +        must be 0.  */
> +      if (ins->evex_type == evex_from_legacy || ins->evex_type == evex_from_vex)
> +       {
> +         if ((*ins->codep & 0x3) != 0
> +             || (*ins->codep >> 6 & 0x3) != 0
> +             || (ins->evex_type == evex_from_legacy
> +                 && (*ins->codep >> 5 & 0x1) != 0)
> +             || (ins->evex_type == evex_from_vex
> +                 && !ins->vex.b))
> +           return &bad_opcode;
> +       }

I guess my earlier response was ambiguous, I'm sorry: I didn't mean the
disassembler's internal names, but the field names as per the SDM (e.g.
EVEX.W). Also (nit) please avoid typo-ing "require" even twice.

Jan
  
Cui, Lili Oct. 22, 2023, 2:33 p.m. UTC | #23
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, October 20, 2023 2:26 PM
> To: Cui, Lili <lili.cui@intel.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; binutils@sourceware.org
> Subject: Re: [PATCH 2/8] Support APX GPR32 with extend evex prefix
> 
> On 19.10.2023 18:38, Cui, Lili wrote:
> >>> @@ -9063,6 +9066,22 @@ get_valid_dis386 (const struct dis386 *dp,
> >> instr_info *ins)
> >>>         }
> >>>
> >>>        ins->need_vex = 4;
> >>> +
> >>> +      /* EVEX from legacy requrie EVEX.P[17:16] must be 0,
> >>> + EVEX.P[23:21]
> >> must
> >>> +        be 0.
> >>> +        EVEX from evex requrie EVEX.P[17:16] must be 0. EVEX.P[23:22] must
> >>> +        be 0, EVEX.P[20] must be 0.  */
> >>> +      if (ins->evex_type == evex_from_legacy || ins->evex_type ==
> >> evex_from_vex)
> >>> +       {
> >>> +         if (!((*ins->codep & 0x3) == 0)
> >>> +             || !((*ins->codep >> 6 & 0x3) == 0)
> >>> +             || (ins->evex_type == evex_from_legacy
> >>> +                 && !((*ins->codep >> 5 & 0x1) == 0))
> >>> +             || (ins->evex_type == evex_from_vex
> >>> +                 && !ins->vex.b))
> >>> +           return &bad_opcode;
> >>
> >> I guess I'm confused here: So far we don't use EVEX.P[] as notation
> >> in comments. Can you please use the respective field names instead?
> >> Also can you please improve readability by converting !(a == b) into a != b?
> >>
> >
> > Changed it to:
> >
> > +      /* EVEX from legacy instructions requrie vex.mask_register_specifier,
> vex.ll
> > +        and vex.zeroing must be 0.
> > +        EVEX from evex instrucions requrie vex.mask_register_specifier and
> vex.ll
> > +        must be 0.  */
> > +      if (ins->evex_type == evex_from_legacy || ins->evex_type ==
> evex_from_vex)
> > +       {
> > +         if ((*ins->codep & 0x3) != 0
> > +             || (*ins->codep >> 6 & 0x3) != 0
> > +             || (ins->evex_type == evex_from_legacy
> > +                 && (*ins->codep >> 5 & 0x1) != 0)
> > +             || (ins->evex_type == evex_from_vex
> > +                 && !ins->vex.b))
> > +           return &bad_opcode;
> > +       }
> 
> I guess my earlier response was ambiguous, I'm sorry: I didn't mean the
> disassembler's internal names, but the field names as per the SDM (e.g.
> EVEX.W). Also (nit) please avoid typo-ing "require" even twice.
> 
Changed.

+      /* EVEX from legacy instructions require that EVEX.L’L, EVEX.z and the
+        lower 2 bits of EVEX.aaa must be 0.
+        EVEX from evex instrucions require that EVEX.L’L and the lower 2 bits of
+        EVEX.aaa must be 0.  */

Thanks,
Lili.
  

Patch

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 51486985919..48916bc3846 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1945,6 +1945,30 @@  cpu_flags_match (const insn_template *t)
 		  && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
 		match |= CPU_FLAGS_ARCH_MATCH;
 	    }
+	  else if (x.bitfield.cpuapx_f)
+	    {
+	      if (cpu.bitfield.cpuapx_f
+		  && (!x.bitfield.cpumovbe || cpu.bitfield.cpumovbe)
+		  && (!x.bitfield.cpuept || cpu.bitfield.cpuept)
+		  && (!x.bitfield.cpuinvpcid || cpu.bitfield.cpuinvpcid)
+		  && (!x.bitfield.cpusse4_2 || cpu.bitfield.cpusse4_2)
+		  && (!x.bitfield.cpubmi2 || cpu.bitfield.cpubmi2)
+		  && (!x.bitfield.cpubmi || cpu.bitfield.cpubmi)
+		  && (!x.bitfield.cpuadx || cpu.bitfield.cpuadx)
+		  && (!x.bitfield.cpusha || cpu.bitfield.cpusha)
+		  && (!x.bitfield.cpuavx512bw || cpu.bitfield.cpuavx512bw)
+		  && (!x.bitfield.cpuavx512dq || cpu.bitfield.cpuavx512dq)
+		  && (!x.bitfield.cpuavx512f || cpu.bitfield.cpuavx512f)
+		  && (!x.bitfield.cpushstk || cpu.bitfield.cpushstk)
+		  && (!x.bitfield.cpumovdir64b || cpu.bitfield.cpumovdir64b)
+		  && (!x.bitfield.cpumovdiri || cpu.bitfield.cpumovdiri)
+		  && (!x.bitfield.cpuenqcmd || cpu.bitfield.cpuenqcmd)
+		  && (!x.bitfield.cpukl || cpu.bitfield.cpukl)
+		  && (!x.bitfield.cpuwidekl || cpu.bitfield.cpuwidekl)
+		  && (!x.bitfield.cpucmpccxadd || cpu.bitfield.cpucmpccxadd)
+		  && (!x.bitfield.cpurao_int || cpu.bitfield.cpurao_int))
+		match |= CPU_FLAGS_ARCH_MATCH;
+	    }
 	  else
 	    match |= CPU_FLAGS_ARCH_MATCH;
 	}
@@ -3850,7 +3874,10 @@  is_any_vex_encoding (const insn_template *t)
 static INLINE bool
 is_any_apx_encoding (void)
 {
-  return i.rex2 || i.rex2_encoding;
+  return i.rex2 
+    || i.rex2_encoding
+    || (i.vex.register_specifier
+	&& i.vex.register_specifier->reg_flags & RegRex2);
 }
 
 static INLINE bool
@@ -3859,6 +3886,12 @@  is_any_apx_rex2_encoding (void)
   return (i.rex2 && i.vex.length == 2) || i.rex2_encoding;
 }
 
+static INLINE bool
+is_any_apx_evex_encoding (void)
+{
+  return i.rex2 && i.vex.length == 4;
+}
+
 static unsigned int
 get_broadcast_bytes (const insn_template *t, bool diag)
 {
@@ -4129,6 +4162,50 @@  build_rex2_prefix (void)
 		    | (i.rex2 << 4) | i.rex);
 }
 
+/* Build the EVEX prefix (4-byte) for evex insn
+   | 62h |
+   | `R`X`B`R' | B'mmm |
+   | W | v`v`v`v | `x' | pp |
+   | z| L'L | b | `v | aaa |
+*/
+static void
+build_evex_insns_with_extend_evex_prefix (void)
+{
+  build_evex_prefix ();
+  if (i.rex2 & REX_R)
+    i.vex.bytes[1] &= 0xef;
+  if (i.vex.register_specifier
+      && register_number (i.vex.register_specifier) > 0xf)
+    i.vex.bytes[3] &=0xf7;
+  if (i.rex2 & REX_B)
+    i.vex.bytes[1] |= 0x08;
+  if (i.rex2 & REX_X)
+    i.vex.bytes[2] &= 0xfb;
+}
+
+/* Build the EVEX prefix (4-byte) for legacy insn
+   | 62h |
+   | `R`X`B`R' | B'100 |
+   | W | v`v`v`v | `x' | pp |
+   | 000 | ND | `v | NF | 00 |
+   For legacy insn without ndd nor nf, [vvvvv] must be all zero.  */
+static void
+build_legacy_insns_with_apx_encoding (void)
+{
+  /* map{0,1} of legacy space without ndd or nf could use rex2 prefix.  */
+  if (i.tm.opcode_space <= SPACE_0F
+      && !i.vex.register_specifier && !i.has_nf && !i.has_zero_upper)
+    return build_rex2_prefix ();
+
+  if (i.prefix[DATA_PREFIX] != 0)
+    {
+      i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
+      i.prefix[DATA_PREFIX] = 0;
+    }
+
+  build_evex_insns_with_extend_evex_prefix ();
+}
+
 static void
 process_immext (void)
 {
@@ -5544,9 +5621,10 @@  md_assemble (char *line)
   if (is_any_apx_encoding ())
     {
       if (!is_any_vex_encoding (&i.tm)
-	  && i.tm.opcode_space <= SPACE_0F
-	  && !i.vex.register_specifier && !i.has_nf && !i.has_zero_upper)
-	build_rex2_prefix ();
+	  || i.tm.opcode_space == SPACE_EVEXMAP4)
+	build_legacy_insns_with_apx_encoding ();
+      else
+	build_evex_insns_with_extend_evex_prefix ();
 
       /* The individual REX.RXBW bits got consumed.  */
       i.rex &= REX_OPCODE;
@@ -5616,7 +5694,7 @@  md_assemble (char *line)
 	  && (i.rex != 0 || i.rex2!=0)))
     {
       int x;
-      if (!i.rex2)
+      if (!is_any_apx_encoding ())
 	i.rex |= REX_OPCODE;
       for (x = 0; x < 2; x++)
 	{
@@ -7935,7 +8013,8 @@  process_suffix (void)
       if (i.suffix != QWORD_MNEM_SUFFIX
 	  && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
 	  && !i.tm.opcode_modifier.floatmf
-	  && !is_any_vex_encoding (&i.tm)
+	  && (!is_any_vex_encoding (&i.tm)
+	      || i.tm.opcode_space == SPACE_EVEXMAP4)
 	  && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
 	      || (flag_code == CODE_64BIT
 		  && i.tm.opcode_modifier.jump == JUMP_BYTE)))
@@ -10057,7 +10136,7 @@  output_insn (void)
 
       /* Since the VEX/EVEX prefix contains the implicit prefix, we
 	 don't need the explicit prefix.  */
-      if (!is_any_vex_encoding (&i.tm))
+      if (!is_any_vex_encoding (&i.tm) && !is_any_apx_evex_encoding ())
 	{
 	  switch (i.tm.opcode_modifier.opcodeprefix)
 	    {
diff --git a/opcodes/i386-dis-evex-len.h b/opcodes/i386-dis-evex-len.h
index a02609c50f2..1933a045822 100644
--- a/opcodes/i386-dis-evex-len.h
+++ b/opcodes/i386-dis-evex-len.h
@@ -62,6 +62,16 @@  static const struct dis386 evex_len_table[][3] = {
     { REG_TABLE (REG_EVEX_0F38C7_L_2) },
   },
 
+  /* EVEX_LEN_0F38F2 */
+  {
+    { "andnS",		{ Gdq, VexGdq, Edq }, 0 },
+  },
+
+  /* EVEX_LEN_0F38F3 */
+  {
+    { REG_TABLE(REG_EVEX_0F38F3_L_0) },
+  },
+
   /* EVEX_LEN_0F3A00 */
   {
     { Bad_Opcode },
diff --git a/opcodes/i386-dis-evex-mod.h b/opcodes/i386-dis-evex-mod.h
index f9f912c5094..5a1326a1b73 100644
--- a/opcodes/i386-dis-evex-mod.h
+++ b/opcodes/i386-dis-evex-mod.h
@@ -1 +1,51 @@ 
 /* Nothing at present.  */
+  /* MOD_EVEX_MAP4_65 */
+  {
+    { "wrussK",		{ M, Gdq }, PREFIX_DATA },
+  },
+  /* MOD_EVEX_MAP4_66_PREFIX_0 */
+  {
+    { "wrssK",		{ M, Gdq }, 0 },
+  },
+  /* MOD_EVEX_MAP4_DA_PREFIX_1 */
+  {
+    { Bad_Opcode },
+    { "encodekey128", { Gd, Ed }, 0 },
+  },
+  /* MOD_EVEX_MAP4_DB_PREFIX_1 */
+  {
+    { Bad_Opcode },
+    { "encodekey256", { Gd, Ed }, 0 },
+  },
+  /* MOD_EVEX_MAP4_DC_PREFIX_1 */
+  {
+    { "aesenc128kl",    { XM, M }, 0 },
+  },
+  /* MOD_EVEX_MAP4_DD_PREFIX_1 */
+  {
+    { "aesdec128kl",    { XM, M }, 0 },
+  },
+  /* MOD_EVEX_MAP4_DE_PREFIX_1 */
+  {
+    { "aesenc256kl",    { XM, M }, 0 },
+  },
+  /* MOD_EVEX_MAP4_DF_PREFIX_1 */
+  {
+    { "aesdec256kl",    { XM, M }, 0 },
+  },
+  /* MOD_EVEX_MAP4_F8_PREFIX_1 */
+  {
+    { "enqcmds",	{ Gva, M },  0 },
+  },
+  /* MOD_EVEX_MAP4_F8_PREFIX_2 */
+  {
+    { "movdir64b",	{ Gva, M }, 0 },
+  },
+  /* MOD_EVEX_MAP4_F8_PREFIX_3 */
+  {
+    { "enqcmd",		{ Gva, M }, 0 },
+  },
+  /* MOD_EVEX_MAP4_F9 */
+  {
+    { "movdiri",	{ Edq, Gdq }, 0 },
+  },
diff --git a/opcodes/i386-dis-evex-prefix.h b/opcodes/i386-dis-evex-prefix.h
index 28da54922c7..f6f02de6c47 100644
--- a/opcodes/i386-dis-evex-prefix.h
+++ b/opcodes/i386-dis-evex-prefix.h
@@ -338,6 +338,89 @@ 
     { "vcmpp%XH", { MaskG, Vex, EXxh, EXxEVexS, CMP }, 0 },
     { "vcmps%XH", { MaskG, VexScalar, EXw, EXxEVexS, CMP }, 0 },
   },
+  /* PREFIX_EVEX_MAP4_60 */
+  {
+    { "movbeS",	{ Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "movbeS",	{ Gv, Ev }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_61 */
+  {
+    { "movbeS",	{ Ev, Gv }, 0 },
+    { Bad_Opcode },
+    { "movbeS",	{ Ev, Gv }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_66 */
+  {
+    { MOD_TABLE (MOD_EVEX_MAP4_66_PREFIX_0) },
+    { "adoxS",	{ Gdq, Edq }, 0 },
+    { "adcxS",	{ Gdq, Edq }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_D8 */
+  {
+    { "sha1nexte", { XM, EXxmm }, 0 },
+    { REG_TABLE (REG_EVEX_MAP4_D8_PREFIX_1) },
+  },
+  /* PREFIX_EVEX_MAP4_DA */
+  {
+    { "sha1msg2", { XM, EXxmm }, 0 },
+    { MOD_TABLE (MOD_EVEX_MAP4_DA_PREFIX_1) },
+  },
+  /* PREFIX_EVEX_MAP4_DB */
+  {
+    { "sha256rnds2", { XM, EXxmm, XMM0 }, 0 },
+    { MOD_TABLE (MOD_EVEX_MAP4_DB_PREFIX_1) },
+  },
+  /* PREFIX_EVEX_MAP4_DC */
+  {
+    { "sha256msg1", { XM, EXxmm }, 0 },
+    { MOD_TABLE (MOD_EVEX_MAP4_DC_PREFIX_1) },
+  },
+  /* PREFIX_EVEX_MAP4_DD */
+  {
+    { "sha256msg2", { XM, EXxmm }, 0 },
+    { MOD_TABLE (MOD_EVEX_MAP4_DD_PREFIX_1) },
+  },
+  /* PREFIX_EVEX_MAP4_DE */
+  {
+    { Bad_Opcode },
+    { MOD_TABLE (MOD_EVEX_MAP4_DE_PREFIX_1) },
+  },
+  /* PREFIX_EVEX_MAP4_DF */
+  {
+    { Bad_Opcode },
+    { MOD_TABLE (MOD_EVEX_MAP4_DF_PREFIX_1) },
+  },
+  /* PREFIX_EVEX_MAP4_F0 */
+  {
+    { "crc32A",	{ Gdq, Eb }, 0 },
+    { "invept",	{ Gm, Mo }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_F1 */
+  {
+    { "crc32Q",	{ Gdq, Ev }, 0 },
+    { "invvpid", { Gm, Mo }, 0 },
+    { "crc32Q",	{ Gdq, Ev }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_F2 */
+  {
+    { Bad_Opcode },
+    { "invpcid", { Gm, M }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_F8 */
+  {
+    { Bad_Opcode },
+    { MOD_TABLE (MOD_EVEX_MAP4_F8_PREFIX_1) },
+    { MOD_TABLE (MOD_EVEX_MAP4_F8_PREFIX_2) },
+    { MOD_TABLE (MOD_EVEX_MAP4_F8_PREFIX_3) },
+  },
+  /* PREFIX_EVEX_MAP4_FC */
+  {
+    { "aadd",	{ Mdq, Gdq }, 0 },
+    { "axor",	{ Mdq, Gdq }, 0 },
+    { "aand",	{ Mdq, Gdq }, 0 },
+    { "aor",	{ Mdq, Gdq }, 0 },
+  },
   /* PREFIX_EVEX_MAP5_10 */
   {
     { Bad_Opcode },
diff --git a/opcodes/i386-dis-evex-reg.h b/opcodes/i386-dis-evex-reg.h
index 2885063628b..c3b4f083346 100644
--- a/opcodes/i386-dis-evex-reg.h
+++ b/opcodes/i386-dis-evex-reg.h
@@ -49,3 +49,17 @@ 
     { "vscatterpf0qp%XW",  { MVexVSIBQWpX }, PREFIX_DATA },
     { "vscatterpf1qp%XW",  { MVexVSIBQWpX }, PREFIX_DATA },
   },
+  /* REG_EVEX_0F38F3_L_0 */
+  {
+    { Bad_Opcode },
+    { "blsrS",		{ VexGdq, Edq }, 0 },
+    { "blsmskS",	{ VexGdq, Edq }, 0 },
+    { "blsiS",		{ VexGdq, Edq }, 0 },
+  },
+  /* REG_EVEX_MAP4_D8_PREFIX_1 */
+  {
+    { "aesencwide128kl",	{ M }, 0 },
+    { "aesdecwide128kl",	{ M }, 0 },
+    { "aesencwide256kl",	{ M }, 0 },
+    { "aesdecwide256kl",	{ M }, 0 },
+  },
diff --git a/opcodes/i386-dis-evex-x86.h b/opcodes/i386-dis-evex-x86.h
new file mode 100644
index 00000000000..1121223d877
--- /dev/null
+++ b/opcodes/i386-dis-evex-x86.h
@@ -0,0 +1,140 @@ 
+  /* X86_64_EVEX_0F90 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F90) },
+  },
+  /* X86_64_EVEX_0F91 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F91) },
+  },
+  /* X86_64_EVEX_0F92 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F92) },
+  },
+  /* X86_64_EVEX_0F93 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F93) },
+  },
+  /* X86_64_EVEX_0F3849 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F3849_X86_64) },
+  },
+  /* X86_64_EVEX_0F384B */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F384B_X86_64) },
+  },
+  /* X86_64_EVEX_0F38E0 */
+  {
+    { Bad_Opcode },
+    { "cmpoxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38E1 */
+  {
+    { Bad_Opcode },
+    { "cmpnoxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38E2 */
+  {
+    { Bad_Opcode },
+    { "cmpbxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38E3 */
+  {
+    { Bad_Opcode },
+    { "cmpnbxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38E4 */
+  {
+    { Bad_Opcode },
+    { "cmpzxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38E5 */
+  {
+    { Bad_Opcode },
+    { "cmpnzxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38E6 */
+  {
+    { Bad_Opcode },
+    { "cmpbexadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38E7 */
+  {
+    { Bad_Opcode },
+    { "cmpnbexadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38E8 */
+  {
+    { Bad_Opcode },
+    { "cmpsxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38E9 */
+  {
+    { Bad_Opcode },
+    { "cmpnsxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38EA */
+  {
+    { Bad_Opcode },
+    { "cmppxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38EB */
+  {
+    { Bad_Opcode },
+    { "cmpnpxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38EC */
+  {
+    { Bad_Opcode },
+    { "cmplxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38ED */
+  {
+    { Bad_Opcode },
+    { "cmpnlxadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38EE */
+  {
+    { Bad_Opcode },
+    { "cmplexadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38EF */
+  {
+    { Bad_Opcode },
+    { "cmpnlexadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
+  },
+  /* X86_64_EVEX_0F38F2 */
+  {
+    { Bad_Opcode },
+    { EVEX_LEN_TABLE (EVEX_LEN_0F38F2) },
+  },
+  /* X86_64_EVEX_0F38F3 */
+  {
+    { Bad_Opcode },
+    { EVEX_LEN_TABLE (EVEX_LEN_0F38F3) },
+  },
+  /* X86_64_EVEX_0F38F5 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F38F5) },
+  },
+  /* X86_64_EVEX_0F38F6 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F38F6) },
+  },
+  /* X86_64_EVEX_0F38F7 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F38F7) },
+  },
+  /* X86_64_EVEX_0F3AF0 */
+  {
+    { Bad_Opcode },
+    { VEX_LEN_TABLE (VEX_LEN_0F3AF0) },
+  },
diff --git a/opcodes/i386-dis-evex.h b/opcodes/i386-dis-evex.h
index e6295119d2b..2a8c80c5200 100644
--- a/opcodes/i386-dis-evex.h
+++ b/opcodes/i386-dis-evex.h
@@ -164,10 +164,10 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     /* 90 */
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F90) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F91) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F92) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F93) },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -375,9 +375,9 @@  static const struct dis386 evex_table[][256] = {
     { "vpsllv%DQ",	{ XM, Vex, EXx }, PREFIX_DATA },
     /* 48 */
     { Bad_Opcode },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F3849) },
     { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F384B) },
     { "vrcp14p%XW",	{ XM, EXx }, PREFIX_DATA },
     { "vrcp14s%XW",	{ XMScalar, VexScalar, EXdq }, PREFIX_DATA },
     { "vrsqrt14p%XW",	{ XM, EXx }, 0 },
@@ -545,32 +545,32 @@  static const struct dis386 evex_table[][256] = {
     { "%XEvaesdecY",	{ XM, Vex, EXx }, PREFIX_DATA },
     { "%XEvaesdeclastY", { XM, Vex, EXx }, PREFIX_DATA },
     /* E0 */
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E0) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E1) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E2) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E3) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E4) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E5) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E6) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E7) },
     /* E8 */
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E8) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38E9) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EA) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EB) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EC) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38ED) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EE) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38EF) },
     /* F0 */
     { Bad_Opcode },
     { Bad_Opcode },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F2) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F3) },
     { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F5) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F6) },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F38F7) },
     /* F8 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -854,7 +854,7 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     /* F0 */
-    { Bad_Opcode },
+    { X86_64_EVEX_FROM_VEX_TABLE (X86_64_EVEX_0F3AF0) },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -872,7 +872,7 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
   },
-  /* EVEX_MAP5_ */
+  /* EVEX_MAP4_ */
   {
     /* 00 */
     { Bad_Opcode },
@@ -893,8 +893,8 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     /* 10 */
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_10) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_11) },
+    { Bad_Opcode },
+    { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -907,7 +907,7 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_1D) },
+    { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     /* 20 */
@@ -922,12 +922,12 @@  static const struct dis386 evex_table[][256] = {
     /* 28 */
     { Bad_Opcode },
     { Bad_Opcode },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2A) },
     { Bad_Opcode },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2C) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2D) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2E) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2F) },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
     /* 30 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -966,7 +966,7 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     /* 50 */
     { Bad_Opcode },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_51) },
+    { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -974,15 +974,6 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     /* 58 */
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_58) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_59) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5A) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5B) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5C) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5D) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5E) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5F) },
-    /* 60 */
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -991,6 +982,15 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
+    /* 60 */
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_60) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_61) },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { MOD_TABLE (MOD_EVEX_MAP4_65) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_66) },
+    { Bad_Opcode },
     /* 68 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -998,7 +998,7 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { "vmovwY", { XMScalar, Edw }, PREFIX_DATA },
+    { Bad_Opcode },
     { Bad_Opcode },
     /* 70 */
     { Bad_Opcode },
@@ -1010,13 +1010,13 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     /* 78 */
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_78) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_79) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_7A) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_7B) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_7C) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP5_7D) },
-    { "vmovw",	  { Edw, XMScalar }, PREFIX_DATA },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
     { Bad_Opcode },
     /* 80 */
     { Bad_Opcode },
@@ -1113,19 +1113,19 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { Bad_Opcode },
+    { "sha1rnds4", { XM, EXxmm, Ib }, 0 },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     /* D8 */
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_D8) },
+    { "sha1msg1", { XM, EXxmm }, 0 },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_DA) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_DB) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_DC) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_DD) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_DE) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_DF) },
     /* E0 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1145,25 +1145,25 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     /* F0 */
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_F0) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_F1) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_F2) },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     /* F8 */
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_F8) },
+    { MOD_TABLE (MOD_EVEX_MAP4_F9) },
     { Bad_Opcode },
     { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_FC) },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
   },
-  /* EVEX_MAP6_ */
+  /* EVEX_MAP5_ */
   {
     /* 00 */
     { Bad_Opcode },
@@ -1184,11 +1184,11 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     /* 10 */
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_10) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_11) },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP6_13) },
-    { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1198,7 +1198,7 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_1D) },
     { Bad_Opcode },
     { Bad_Opcode },
     /* 20 */
@@ -1213,12 +1213,12 @@  static const struct dis386 evex_table[][256] = {
     /* 28 */
     { Bad_Opcode },
     { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2A) },
     { Bad_Opcode },
-    { Bad_Opcode },
-    { "vscalefp%XH",      { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vscalefs%XH",      { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2C) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2D) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2E) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_2F) },
     /* 30 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1240,39 +1240,39 @@  static const struct dis386 evex_table[][256] = {
     /* 40 */
     { Bad_Opcode },
     { Bad_Opcode },
-    { "vgetexpp%XH",      { XM, EXxh, EXxEVexS }, PREFIX_DATA },
-    { "vgetexps%XH",      { XMScalar, VexScalar, EXw, EXxEVexS }, PREFIX_DATA },
-    { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    /* 48 */
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
+    /* 48 */
     { Bad_Opcode },
-    { "vrcpp%XH",	  { XM, EXxh }, PREFIX_DATA },
-    { "vrcps%XH",	  { XMScalar, VexScalar, EXw }, PREFIX_DATA },
-    { "vrsqrtp%XH",       { XM, EXxh }, PREFIX_DATA },
-    { "vrsqrts%XH",       { XMScalar, VexScalar, EXw }, PREFIX_DATA },
-    /* 50 */
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP6_56) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP6_57) },
-    /* 58 */
     { Bad_Opcode },
+    /* 50 */
     { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_51) },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
+    /* 58 */
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_58) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_59) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5A) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5B) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5C) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5D) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5E) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_5F) },
     /* 60 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1289,7 +1289,7 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { Bad_Opcode },
+    { "vmovwY", { XMScalar, Edw }, PREFIX_DATA },
     { Bad_Opcode },
     /* 70 */
     { Bad_Opcode },
@@ -1301,7 +1301,15 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     /* 78 */
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_78) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_79) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_7A) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_7B) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_7C) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP5_7D) },
+    { "vmovw",	  { Edw, XMScalar }, PREFIX_DATA },
     { Bad_Opcode },
+    /* 80 */
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1309,8 +1317,8 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    /* 80 */
     { Bad_Opcode },
+    /* 88 */
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1318,8 +1326,8 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    /* 88 */
     { Bad_Opcode },
+    /* 90 */
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1327,24 +1335,16 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    /* 90 */
+    { Bad_Opcode },
+    /* 98 */
+    { Bad_Opcode },
+    { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { "vfmaddsub132p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfmsubadd132p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    /* 98 */
-    { "vfmadd132p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfmadd132s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { "vfmsub132p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfmsub132s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { "vfnmadd132p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfnmadd132s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { "vfnmsub132p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfnmsub132s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
     /* A0 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1352,17 +1352,17 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { "vfmaddsub213p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfmsubadd213p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { Bad_Opcode },
+    { Bad_Opcode },
     /* A8 */
-    { "vfmadd213p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfmadd213s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { "vfmsub213p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfmsub213s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { "vfnmadd213p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfnmadd213s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { "vfnmsub213p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfnmsub213s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
     /* B0 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1370,17 +1370,17 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { "vfmaddsub231p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfmsubadd231p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { Bad_Opcode },
+    { Bad_Opcode },
     /* B8 */
-    { "vfmadd231p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfmadd231s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { "vfmsub231p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfmsub231s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { "vfnmadd231p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfnmadd231s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
-    { "vfnmsub231p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
-    { "vfnmsub231s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
     /* C0 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -1406,8 +1406,590 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP6_D6) },
-    { PREFIX_TABLE (PREFIX_EVEX_MAP6_D7) },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* D8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* E0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* E8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* F0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* F8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+  },
+  /* EVEX_MAP6_ */
+  {
+    /* 00 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 08 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 10 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP6_13) },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 18 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 20 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 28 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { "vscalefp%XH",      { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vscalefs%XH",      { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 30 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 38 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 40 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { "vgetexpp%XH",      { XM, EXxh, EXxEVexS }, PREFIX_DATA },
+    { "vgetexps%XH",      { XMScalar, VexScalar, EXw, EXxEVexS }, PREFIX_DATA },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 48 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { "vrcpp%XH",	  { XM, EXxh }, PREFIX_DATA },
+    { "vrcps%XH",	  { XMScalar, VexScalar, EXw }, PREFIX_DATA },
+    { "vrsqrtp%XH",       { XM, EXxh }, PREFIX_DATA },
+    { "vrsqrts%XH",       { XMScalar, VexScalar, EXw }, PREFIX_DATA },
+    /* 50 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP6_56) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP6_57) },
+    /* 58 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 60 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 68 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 70 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 78 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 80 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 88 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 90 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { "vfmaddsub132p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfmsubadd132p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    /* 98 */
+    { "vfmadd132p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfmadd132s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { "vfmsub132p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfmsub132s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { "vfnmadd132p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfnmadd132s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { "vfnmsub132p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfnmsub132s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    /* A0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { "vfmaddsub213p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfmsubadd213p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    /* A8 */
+    { "vfmadd213p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfmadd213s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { "vfmsub213p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfmsub213s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { "vfnmadd213p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfnmadd213s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { "vfnmsub213p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfnmsub213s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    /* B0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { "vfmaddsub231p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfmsubadd231p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    /* B8 */
+    { "vfmadd231p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfmadd231s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { "vfmsub231p%XH",  { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfmsub231s%XH",  { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { "vfnmadd231p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfnmadd231s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    { "vfnmsub231p%XH", { XM, Vex, EXxh, EXxEVexR }, PREFIX_DATA },
+    { "vfnmsub231s%XH", { XMScalar, VexScalar, EXw, EXxEVexR }, PREFIX_DATA },
+    /* C0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* C8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* D0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP6_D6) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP6_D7) },
+    /* D8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* E0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* E8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* F0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* F8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+  },
+  /* EVEX_MAP7_ */
+  {
+    /* 00 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 08 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 10 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 18 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 20 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 28 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 30 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 38 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 40 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 48 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 50 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 58 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 60 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 68 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 70 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 78 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 80 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 88 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 90 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* 98 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* A0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* A8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* B0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* B8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* C0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* C8 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    /* D0 */
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
     /* D8 */
     { Bad_Opcode },
     { Bad_Opcode },
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 65bdd6f65db..c8f3cfb8149 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -132,6 +132,13 @@  enum x86_64_isa
   intel64
 };
 
+enum evex_type
+{
+  evex_default = 0,
+  evex_from_legacy,
+  evex_from_vex,
+};
+
 struct instr_info
 {
   enum address_mode address_mode;
@@ -212,7 +219,6 @@  struct instr_info
     int ll;
     bool w;
     bool evex;
-    bool r;
     bool v;
     bool zeroing;
     bool b;
@@ -220,6 +226,8 @@  struct instr_info
   }
   vex;
 
+  enum evex_type evex_type;
+
   /* Remember if the current op is a jump instruction.  */
   bool op_is_jump;
 
@@ -793,6 +801,7 @@  enum
   USE_RM_TABLE,
   USE_PREFIX_TABLE,
   USE_X86_64_TABLE,
+  USE_X86_64_EVEX_FROM_VEX_TABLE,
   USE_3BYTE_TABLE,
   USE_XOP_8F_TABLE,
   USE_VEX_C4_TABLE,
@@ -811,6 +820,8 @@  enum
 #define RM_TABLE(I)		DIS386 (USE_RM_TABLE, (I))
 #define PREFIX_TABLE(I)		DIS386 (USE_PREFIX_TABLE, (I))
 #define X86_64_TABLE(I)		DIS386 (USE_X86_64_TABLE, (I))
+#define X86_64_EVEX_FROM_VEX_TABLE(I) \
+  DIS386 (USE_X86_64_EVEX_FROM_VEX_TABLE, (I))
 #define THREE_BYTE_TABLE(I)	DIS386 (USE_3BYTE_TABLE, (I))
 #define XOP_8F_TABLE()		DIS386 (USE_XOP_8F_TABLE, 0)
 #define VEX_C4_TABLE()		DIS386 (USE_VEX_C4_TABLE, 0)
@@ -870,7 +881,9 @@  enum
   REG_EVEX_0F72,
   REG_EVEX_0F73,
   REG_EVEX_0F38C6_L_2,
-  REG_EVEX_0F38C7_L_2
+  REG_EVEX_0F38C7_L_2,
+  REG_EVEX_0F38F3_L_0,
+  REG_EVEX_MAP4_D8_PREFIX_1
 };
 
 enum
@@ -910,6 +923,19 @@  enum
   MOD_0F38DC_PREFIX_1,
 
   MOD_VEX_0F3849_X86_64_L_0_W_0,
+
+  MOD_EVEX_MAP4_65,
+  MOD_EVEX_MAP4_66_PREFIX_0,
+  MOD_EVEX_MAP4_DA_PREFIX_1,
+  MOD_EVEX_MAP4_DB_PREFIX_1,
+  MOD_EVEX_MAP4_DC_PREFIX_1,
+  MOD_EVEX_MAP4_DD_PREFIX_1,
+  MOD_EVEX_MAP4_DE_PREFIX_1,
+  MOD_EVEX_MAP4_DF_PREFIX_1,
+  MOD_EVEX_MAP4_F8_PREFIX_1,
+  MOD_EVEX_MAP4_F8_PREFIX_2,
+  MOD_EVEX_MAP4_F8_PREFIX_3,
+  MOD_EVEX_MAP4_F9,
 };
 
 enum
@@ -1145,6 +1171,22 @@  enum
   PREFIX_EVEX_0F3A67,
   PREFIX_EVEX_0F3AC2,
 
+  PREFIX_EVEX_MAP4_60,
+  PREFIX_EVEX_MAP4_61,
+  PREFIX_EVEX_MAP4_66,
+  PREFIX_EVEX_MAP4_D8,
+  PREFIX_EVEX_MAP4_DA,
+  PREFIX_EVEX_MAP4_DB,
+  PREFIX_EVEX_MAP4_DC,
+  PREFIX_EVEX_MAP4_DD,
+  PREFIX_EVEX_MAP4_DE,
+  PREFIX_EVEX_MAP4_DF,
+  PREFIX_EVEX_MAP4_F0,
+  PREFIX_EVEX_MAP4_F1,
+  PREFIX_EVEX_MAP4_F2,
+  PREFIX_EVEX_MAP4_F8,
+  PREFIX_EVEX_MAP4_FC,
+
   PREFIX_EVEX_MAP5_10,
   PREFIX_EVEX_MAP5_11,
   PREFIX_EVEX_MAP5_1D,
@@ -1255,6 +1297,35 @@  enum
   X86_64_VEX_0F38ED,
   X86_64_VEX_0F38EE,
   X86_64_VEX_0F38EF,
+
+  X86_64_EVEX_0F90,
+  X86_64_EVEX_0F91,
+  X86_64_EVEX_0F92,
+  X86_64_EVEX_0F93,
+  X86_64_EVEX_0F3849,
+  X86_64_EVEX_0F384B,
+  X86_64_EVEX_0F38E0,
+  X86_64_EVEX_0F38E1,
+  X86_64_EVEX_0F38E2,
+  X86_64_EVEX_0F38E3,
+  X86_64_EVEX_0F38E4,
+  X86_64_EVEX_0F38E5,
+  X86_64_EVEX_0F38E6,
+  X86_64_EVEX_0F38E7,
+  X86_64_EVEX_0F38E8,
+  X86_64_EVEX_0F38E9,
+  X86_64_EVEX_0F38EA,
+  X86_64_EVEX_0F38EB,
+  X86_64_EVEX_0F38EC,
+  X86_64_EVEX_0F38ED,
+  X86_64_EVEX_0F38EE,
+  X86_64_EVEX_0F38EF,
+  X86_64_EVEX_0F38F2,
+  X86_64_EVEX_0F38F3,
+  X86_64_EVEX_0F38F5,
+  X86_64_EVEX_0F38F6,
+  X86_64_EVEX_0F38F7,
+  X86_64_EVEX_0F3AF0,
 };
 
 enum
@@ -1282,8 +1353,10 @@  enum
   EVEX_0F = 0,
   EVEX_0F38,
   EVEX_0F3A,
+  EVEX_MAP4,
   EVEX_MAP5,
   EVEX_MAP6,
+  EVEX_MAP7,
 };
 
 enum
@@ -1436,6 +1509,8 @@  enum
   EVEX_LEN_0F385B,
   EVEX_LEN_0F38C6,
   EVEX_LEN_0F38C7,
+  EVEX_LEN_0F38F2,
+  EVEX_LEN_0F38F3,
   EVEX_LEN_0F3A00,
   EVEX_LEN_0F3A01,
   EVEX_LEN_0F3A18,
@@ -4476,6 +4551,8 @@  static const struct dis386 x86_64_table[][2] = {
     { Bad_Opcode },
     { "cmpnlexadd", { Mdq, Gdq, VexGdq }, PREFIX_DATA },
   },
+
+#include "i386-dis-evex-x86.h"
 };
 
 static const struct dis386 three_byte_table[][256] = {
@@ -8665,6 +8742,9 @@  get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
       dp = &prefix_table[dp->op[1].bytemode][vindex];
       break;
 
+    case USE_X86_64_EVEX_FROM_VEX_TABLE:
+      ins->evex_type = evex_from_vex;
+      /* Fall through.  */
     case USE_X86_64_TABLE:
       vindex = ins->address_mode == mode_64bit ? 1 : 0;
       dp = &x86_64_table[dp->op[1].bytemode][vindex];
@@ -8910,9 +8990,13 @@  get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
       if (!fetch_code (ins->info, ins->codep + 4))
 	return &err_opcode;
       /* The first byte after 0x62.  */
+      if (*ins->codep & 0x8)
+	ins->rex2 |= REX_B;
+      if (!(*ins->codep & 0x10))
+	ins->rex2 |= REX_R;
+
       ins->rex = ~(*ins->codep >> 5) & 0x7;
-      ins->vex.r = *ins->codep & 0x10;
-      switch ((*ins->codep & 0xf))
+      switch ((*ins->codep & 0x7))
 	{
 	default:
 	  return &bad_opcode;
@@ -8925,12 +9009,19 @@  get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
 	case 0x3:
 	  vex_table_index = EVEX_0F3A;
 	  break;
+	case 0x4:
+	  vex_table_index = EVEX_MAP4;
+	  ins->evex_type = evex_from_legacy;
+	  break;
 	case 0x5:
 	  vex_table_index = EVEX_MAP5;
 	  break;
 	case 0x6:
 	  vex_table_index = EVEX_MAP6;
 	  break;
+	case 0x7:
+	  vex_table_index = EVEX_MAP7;
+	  break;
 	}
 
       /* The second byte after 0x62.  */
@@ -8941,9 +9032,8 @@  get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
 
       ins->vex.register_specifier = (~(*ins->codep >> 3)) & 0xf;
 
-      /* The U bit.  */
       if (!(*ins->codep & 0x4))
-	return &bad_opcode;
+	ins->rex2 |= REX_X;
 
       switch ((*ins->codep & 0x3))
 	{
@@ -8973,9 +9063,12 @@  get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
 
       if (ins->address_mode != mode_64bit)
 	{
+	  if (ins->evex_type != evex_default
+	      || (ins->rex2 & (REX_B | REX_X)))
+	    return &bad_opcode;
 	  /* In 16/32-bit mode silently ignore following bits.  */
 	  ins->rex &= ~REX_B;
-	  ins->vex.r = true;
+	  ins->rex2 &= ~REX_R;
 	}
 
       ins->need_vex = 4;
@@ -9391,6 +9484,13 @@  print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
       dp = get_valid_dis386 (dp, &ins);
       if (dp == &err_opcode)
 	goto fetch_error_out;
+
+      /* For APX instructions promoted from legacy maps 0/1, prefix
+	 0x66 is interpreted as the operand size override.  */
+      if (ins.evex_type == evex_from_legacy
+	  && ins.vex.prefix == DATA_PREFIX_OPCODE)
+	sizeflag ^= DFLAG;
+
       if (dp != NULL && putop (&ins, dp->name, sizeflag) == 0)
 	{
 	  if (!get_sib (&ins, sizeflag))
@@ -10280,7 +10380,7 @@  putop (instr_info *ins, const char *in_template, int sizeflag)
 		{
 		case 'X':
 		  if (!ins->vex.evex || ins->vex.b || ins->vex.ll >= 2
-		      || !ins->vex.r
+		      || (ins->rex2 & REX_R)
 		      || (ins->modrm.mod == 3 && (ins->rex & REX_X))
 		      || !ins->vex.v || ins->vex.mask_register_specifier)
 		    break;
@@ -11174,7 +11274,7 @@  print_register (instr_info *ins, unsigned int reg, unsigned int rexmask,
     case b_swap_mode:
       if (reg & 4)
 	USED_REX (0);
-      if (ins->rex)
+      if (ins->rex || ins->rex2)
 	names = att_names8rex;
       else
 	names = att_names8;
@@ -11390,7 +11490,7 @@  OP_E_memory (instr_info *ins, int bytemode, int sizeflag)
   int riprel = 0;
   int shift;
 
-  if (ins->vex.evex)
+  if (ins->vex.evex && ins->evex_type == evex_default)
     {
 
       /* Zeroing-masking is invalid for memory destinations. Set the flag
@@ -11737,7 +11837,7 @@  OP_E_memory (instr_info *ins, int bytemode, int sizeflag)
 
 	      if (ins->rex & REX_R)
 	        modrm_reg += 8;
-	      if (!ins->vex.r)
+	      if (ins->rex2 & REX_R)
 	        modrm_reg += 16;
 	      if (vindex == modrm_reg)
 		oappend (ins, "/(bad)");
@@ -11939,10 +12039,7 @@  OP_indirE (instr_info *ins, int bytemode, int sizeflag)
 static bool
 OP_G (instr_info *ins, int bytemode, int sizeflag)
 {
-  if (ins->vex.evex && !ins->vex.r && ins->address_mode == mode_64bit)
-    oappend (ins, "(bad)");
-  else
-    print_register (ins, ins->modrm.reg, REX_R, bytemode, sizeflag);
+  print_register (ins, ins->modrm.reg, REX_R, bytemode, sizeflag);
   return true;
 }
 
@@ -12572,7 +12669,7 @@  OP_XMM (instr_info *ins, int bytemode, int sizeflag ATTRIBUTE_UNUSED)
     reg += 8;
   if (ins->vex.evex)
     {
-      if (!ins->vex.r)
+      if (ins->rex2 & REX_R)
 	reg += 16;
     }
 
@@ -13579,7 +13676,7 @@  DistinctDest_Fixup (instr_info *ins, int bytemode, int sizeflag)
   /* Calc destination register number.  */
   if (ins->rex & REX_R)
     modrm_reg += 8;
-  if (!ins->vex.r)
+  if (ins->rex2 & REX_R)
     modrm_reg += 16;
 
   /* Calc src1 register number.  */
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 6b8eb729797..f43cb1ecf7c 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -1023,6 +1023,7 @@  process_i386_opcode_modifier (FILE *table, char *mod, unsigned int space,
     SPACE(0F),
     SPACE(0F38),
     SPACE(0F3A),
+    SPACE(EVEXMAP4),
     SPACE(EVEXMAP5),
     SPACE(EVEXMAP6),
     SPACE(XOP08),
@@ -1121,6 +1122,15 @@  process_i386_opcode_modifier (FILE *table, char *mod, unsigned int space,
 	fprintf (stderr,
 		 "%s: %d: W modifier without Word/Dword/Qword operand(s)\n",
 		 filename, lineno);
+      if (modifiers[Vex].value
+	  || (space > SPACE_0F
+	      && !(space == SPACE_EVEXMAP4
+		   || modifiers[EVex].value
+		   || modifiers[Disp8MemShift].value
+		   || modifiers[Broadcast].value
+		   || modifiers[Masking].value
+		   || modifiers[SAE].value)))
+	modifiers[No_egpr].value = 1;
     }
 
   if (space >= ARRAY_SIZE (spaces) || !spaces[space])
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index a055db5ce42..9dd5625f54d 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -972,6 +972,7 @@  typedef struct insn_template
      1: 0F opcode prefix / space.
      2: 0F38 opcode prefix / space.
      3: 0F3A opcode prefix / space.
+     4: EVEXMAP4 opcode prefix / space.
      5: EVEXMAP5 opcode prefix / space.
      6: EVEXMAP6 opcode prefix / space.
      8: XOP 08 opcode space.
@@ -982,6 +983,7 @@  typedef struct insn_template
 #define SPACE_0F	1
 #define SPACE_0F38	2
 #define SPACE_0F3A	3
+#define SPACE_EVEXMAP4  4
 #define SPACE_EVEXMAP5	5
 #define SPACE_EVEXMAP6	6
 #define SPACE_XOP08	8
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 80248e5b72c..791a9fe0177 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -109,6 +109,7 @@ 
 #define SpaceXOP09 OpcodeSpace=SPACE_XOP09
 #define SpaceXOP0A OpcodeSpace=SPACE_XOP0A
 
+#define EVexMap4 OpcodeSpace=SPACE_EVEXMAP4
 #define EVexMap5 OpcodeSpace=SPACE_EVEXMAP5
 #define EVexMap6 OpcodeSpace=SPACE_EVEXMAP6
 
@@ -187,6 +188,7 @@  mov, 0xf24, i386|No64, D|RegMem|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|No_qSuf, { Te
 
 // Move after swapping the bytes
 movbe, 0x0f38f0, Movbe, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+movbe, 0x60, Movbe|APX_F|x64, D|Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVex128|EVexMap4, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 
 // Move with sign extend.
 movsb, 0xfbe, i386, Modrm|No_bSuf|No_sSuf, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
@@ -300,6 +302,9 @@  sbb, 0x18, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg
 sbb, 0x83/3, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
 sbb, 0x1c, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
 sbb, 0x80/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+sbb, 0x18, APX_F|x64, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+sbb, 0x83/3, APX_F|x64, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
+sbb, 0x80/3, APX_F|x64, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 
 cmp, 0x38, 0, D|W|CheckOperandSize|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 cmp, 0x83/7, 0, Modrm|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
@@ -332,9 +337,14 @@  adc, 0x10, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg
 adc, 0x83/2, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
 adc, 0x14, 0, W|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
 adc, 0x80/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+adc, 0x10, APX_F|x64, D|W|CheckOperandSize|Modrm|EVex128|EVexMap4|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+adc, 0x83/2, APX_F|x64, Modrm|EVex128|EVexMap4|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex }
+adc, 0x80/2, APX_F|x64, W|Modrm|EVex128|EVexMap4|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 
 neg, 0xf6/3, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+
 not, 0xf6/2, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+not, 0xf6/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 
 aaa, 0x37, No64, NoSuf, {}
 aas, 0x3f, No64, NoSuf, {}
@@ -395,11 +405,19 @@  rcl, 0xd0/2, 0, W|Modrm|No_sSuf, { Imm1, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|
 rcl, 0xc0/2, i186, W|Modrm|No_sSuf, { Imm8, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 rcl, 0xd2/2, 0, W|Modrm|No_sSuf, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 rcl, 0xd0/2, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+rcl, 0xd0/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { Imm1, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+rcl, 0xc0/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { Imm8, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+rcl, 0xd2/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+rcl, 0xd0/2, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 
 rcr, 0xd0/3, 0, W|Modrm|No_sSuf, { Imm1, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 rcr, 0xc0/3, i186, W|Modrm|No_sSuf, { Imm8, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 rcr, 0xd2/3, 0, W|Modrm|No_sSuf, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 rcr, 0xd0/3, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+rcr, 0xd0/3, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { Imm1, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+rcr, 0xc0/3, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { Imm8, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+rcr, 0xd2/3, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
+rcr, 0xd0/3, APX_F|x64, W|Modrm|No_sSuf|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 
 sal, 0xd0/4, 0, W|Modrm|No_sSuf, { Imm1, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
 sal, 0xc0/4, i186, W|Modrm|No_sSuf, { Imm8, Reg8|Reg16|Reg32|Reg64|Byte|Word|Dword|Qword|Unspecified|BaseIndex }
@@ -1312,13 +1330,16 @@  getsec, 0xf37, SMX, NoSuf, {}
 
 invept, 0x660f3880, EPT|No64, Modrm|IgnoreSize|NoSuf, { Oword|Unspecified|BaseIndex, Reg32 }
 invept, 0x660f3880, EPT|x64, Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }
+invept, 0xf3f0, APX_F|EPT|x64, Modrm|NoSuf|NoRex64|EVex128|EVexMap4, { Oword|Unspecified|BaseIndex, Reg64 }
 invvpid, 0x660f3881, EPT|No64, Modrm|IgnoreSize|NoSuf, { Oword|Unspecified|BaseIndex, Reg32 }
 invvpid, 0x660f3881, EPT|x64, Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }
+invvpid, 0xf3f1, APX_F|EPT|x64, Modrm|NoSuf|NoRex64|EVex128|EVexMap4, { Oword|Unspecified|BaseIndex, Reg64 }
 
 // INVPCID instruction
 
 invpcid, 0x660f3882, INVPCID|No64, Modrm|IgnoreSize|NoSuf, { Oword|Unspecified|BaseIndex, Reg32 }
 invpcid, 0x660f3882, INVPCID|x64, Modrm|NoSuf|NoRex64, { Oword|Unspecified|BaseIndex, Reg64 }
+invpcid, 0xf3f2, APX_F|INVPCID|x64, Modrm|NoSuf|NoRex64|EVex128|EVexMap4, { Oword|Unspecified|BaseIndex, Reg64 }
 
 // SSSE3 instructions.
 
@@ -1418,7 +1439,9 @@  pcmpestrm, 0x660f3a60, SSE4_2|x64, Modrm|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { I
 pcmpistri<sse42>, 0x660f3a63, <sse42:cpu>, Modrm|<sse42:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
 pcmpistrm<sse42>, 0x660f3a62, <sse42:cpu>, Modrm|<sse42:attr>|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegXMM }
 crc32, 0xf20f38f0, SSE4_2, W|Modrm|No_sSuf|No_qSuf, { Reg8|Reg16|Reg32|Unspecified|BaseIndex, Reg32 }
+crc32, 0xf0, APX_F|x64, W|Modrm|No_sSuf|No_qSuf|EVex128|EVexMap4, { Reg8|Reg16|Reg32|Unspecified|BaseIndex, Reg32 }
 crc32, 0xf20f38f0, SSE4_2|x64, W|Modrm|No_wSuf|No_lSuf|No_sSuf, { Reg8|Reg64|Unspecified|BaseIndex, Reg64 }
+crc32, 0xf0, APX_F|x64, W|Modrm|No_wSuf|No_lSuf|No_sSuf|EVex128|EVexMap4, { Reg8|Reg64|Unspecified|BaseIndex, Reg64 }
 
 // xsave/xrstor New Instructions.
 
@@ -1822,13 +1845,21 @@  xtest, 0xf01d6, HLE|RTM, NoSuf, {}
 // BMI2 instructions.
 
 bzhi, 0xf5, BMI2, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+bzhi, 0xf5, BMI2|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 mulx, 0xf2f6, BMI2, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+mulx, 0xf2f6, BMI2|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
 pdep, 0xf2f5, BMI2, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+pdep, 0xf2f5, BMI2|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
 pext, 0xf3f5, BMI2, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+pext, 0xf3f5, BMI2|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
 rorx, 0xf2f0, BMI2, Modrm|CheckOperandSize|Vex128|Space0F3A|No_bSuf|No_wSuf|No_sSuf, { Imm8|Imm8S, Reg32|Reg64|Dword|Qword|Unspecified|BaseIndex, Reg32|Reg64 }
+rorx, 0xf2f0, BMI2|APX_F, Modrm|CheckOperandSize|EVex128|Space0F3A|No_bSuf|No_wSuf|No_sSuf, { Imm8|Imm8S, Reg32|Reg64|Dword|Qword|Unspecified|BaseIndex, Reg32|Reg64 }
 sarx, 0xf3f7, BMI2, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+sarx, 0xf3f7, BMI2|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 shlx, 0x66f7, BMI2, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+shlx, 0x66f7, BMI2|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 shrx, 0xf2f7, BMI2, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+shrx, 0xf2f7, BMI2|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 
 // FMA4 instructions
 
@@ -1899,10 +1930,15 @@  lwpins, 0x12/0, LWP, Modrm|SpaceXOP0A|NoSuf|VexVVVV|Vex, { Imm32|Imm32S, Reg32|U
 // BMI instructions
 
 andn, 0xf2, BMI, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
+andn, 0xf2, BMI|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64, Reg32|Reg64 }
 bextr, 0xf7, BMI, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+bextr, 0xf7, BMI|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|SwapSources|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 blsi, 0xf3/3, BMI, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsi, 0xf3/3, BMI|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 blsmsk, 0xf3/2, BMI, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsmsk, 0xf3/2, BMI|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 blsr, 0xf3/1, BMI, Modrm|CheckOperandSize|Vex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+blsr, 0xf3/1, BMI|APX_F, Modrm|CheckOperandSize|EVex128|Space0F38|VexVVVV|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 tzcnt, 0xf30fbc, BMI, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|Word|Dword|Qword|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 
 // TBM instructions
@@ -2007,7 +2043,9 @@  xstore, 0xfa7c0, PadLock, NoSuf|RepPrefixOk, {}
 
 // Multy-precision Add Carry, rdseed instructions.
 adcx, 0x660f38f6, ADX, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+adcx, 0x6666, ADX|APX_F|x64, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|EVex128|EVexMap4, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 adox, 0xf30f38f6, ADX, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
+adox, 0xf366, ADX|APX_F|x64, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|EVex128|EVexMap4, { Reg32|Reg64|Unspecified|BaseIndex, Reg32|Reg64 }
 rdseed, 0xfc7/7, RdSeed, Modrm|NoSuf, { Reg16|Reg32|Reg64 }
 
 // SMAP instructions.
@@ -2031,13 +2069,20 @@  bndldx, 0x0f1a, MPX, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex, RegBND }
 
 // SHA instructions.
 sha1rnds4, 0xf3acc, SHA, Modrm|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
+sha1rnds4, 0xd4, SHA|APX_F, Modrm|NoSuf|EVex128|EVexMap4, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
 sha1nexte, 0xf38c8, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sha1nexte, 0xd8, SHA|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { RegXMM|Unspecified|BaseIndex, RegXMM }
 sha1msg1, 0xf38c9, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sha1msg1, 0xd9, SHA|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { RegXMM|Unspecified|BaseIndex, RegXMM }
 sha1msg2, 0xf38ca, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sha1msg2, 0xda, SHA|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { RegXMM|Unspecified|BaseIndex, RegXMM }
 sha256rnds2, 0xf38cb, SHA, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
 sha256rnds2, 0xf38cb, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sha256rnds2, 0xdb, SHA|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { RegXMM|Unspecified|BaseIndex, RegXMM }
 sha256msg1, 0xf38cc, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sha256msg1, 0xdc, SHA|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { RegXMM|Unspecified|BaseIndex, RegXMM }
 sha256msg2, 0xf38cd, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+sha256msg2, 0xdd, SHA|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { RegXMM|Unspecified|BaseIndex, RegXMM }
 
 // SHA512 instructions.
 
@@ -2104,8 +2149,11 @@  kxnor<bw>, 0x<bw:kpfx>46, <bw:kcpu>, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, {
 kxor<bw>, 0x<bw:kpfx>47, <bw:kcpu>, Modrm|Vex256|Space0F|VexVVVV|VexW0|NoSuf, { RegMask, RegMask, RegMask }
 
 kmov<bw>, 0x<bw:kpfx>90, <bw:kcpu>, Modrm|Vex128|Space0F|VexW0|NoSuf, { RegMask|<bw:elem>|Unspecified|BaseIndex, RegMask }
+kmov<bw>, 0x<bw:kpfx>90, <bw:kcpu>|APX_F, Modrm|EVex128|Space0F|VexW0|NoSuf, { RegMask|<bw:elem>|Unspecified|BaseIndex, RegMask }
 kmov<bw>, 0x<bw:kpfx>91, <bw:kcpu>, Modrm|Vex128|Space0F|VexW0|NoSuf, { RegMask, <bw:elem>|Unspecified|BaseIndex }
+kmov<bw>, 0x<bw:kpfx>91, <bw:kcpu>|APX_F, Modrm|EVex128|Space0F|VexW0|NoSuf, { RegMask, <bw:elem>|Unspecified|BaseIndex }
 kmov<bw>, 0x<bw:kpfx>92, <bw:kcpu>, D|Modrm|Vex128|Space0F|VexW0|NoSuf, { Reg32, RegMask }
+kmov<bw>, 0x<bw:kpfx>92, <bw:kcpu>|APX_F, D|Modrm|EVex128|Space0F|VexW0|NoSuf, { Reg32, RegMask }
 
 knot<bw>, 0x<bw:kpfx>44, <bw:kcpu>, Modrm|Vex128|Space0F|VexW0|NoSuf, { RegMask, RegMask }
 kortest<bw>, 0x<bw:kpfx>98, <bw:kcpu>, Modrm|Vex128|Space0F|VexW0|NoSuf, { RegMask, RegMask }
@@ -2626,8 +2674,11 @@  kadd<dq>, 0x<dq:kpfx>4a, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|
 kand<dq>, 0x<dq:kpfx>41, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask, RegMask }
 kandn<dq>, 0x<dq:kpfx>42, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|NoSuf|Optimize, { RegMask, RegMask, RegMask }
 kmov<dq>, 0x<dq:kpfx>90, AVX512BW, Modrm|Vex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask|<dq:elem>|Unspecified|BaseIndex, RegMask }
+kmov<dq>, 0x<dq:kpfx>90, AVX512BW|APX_F, Modrm|EVex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask|<dq:elem>|Unspecified|BaseIndex, RegMask }
 kmov<dq>, 0x<dq:kpfx>91, AVX512BW, Modrm|Vex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask, <dq:elem>|Unspecified|BaseIndex }
+kmov<dq>, 0x<dq:kpfx>91, AVX512BW|APX_F, Modrm|EVex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask, <dq:elem>|Unspecified|BaseIndex }
 kmov<dq>, 0xf292, AVX512BW, D|Modrm|Vex128|Space0F|<dq:vexw64>|<dq:kvsz>|NoSuf, { <dq:gpr>, RegMask }
+kmov<dq>, 0xf292, AVX512BW|APX_F, D|Modrm|EVex128|Space0F|<dq:vexw64>|<dq:kvsz>|NoSuf, { <dq:gpr>, RegMask }
 knot<dq>, 0x<dq:kpfx>44, AVX512BW, Modrm|Vex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask }
 kor<dq>, 0x<dq:kpfx>45, AVX512BW, Modrm|Vex256|Space0F|VexVVVV|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask, RegMask }
 kortest<dq>, 0x<dq:kpfx>98, AVX512BW, Modrm|Vex128|Space0F|VexW1|<dq:kvsz>|NoSuf, { RegMask, RegMask }
@@ -3046,9 +3097,13 @@  rdsspq, 0xf30f1e/1, SHSTK|x64, Modrm|NoSuf, { Reg64 }
 saveprevssp, 0xf30f01ea, SHSTK, NoSuf, {}
 rstorssp, 0xf30f01/5, SHSTK, Modrm|NoSuf, { Qword|Unspecified|BaseIndex }
 wrssd, 0x0f38f6, SHSTK, Modrm|IgnoreSize|NoSuf, { Reg32, Dword|Unspecified|BaseIndex }
+wrssd, 0x66, SHSTK|APX_F|x64, Modrm|IgnoreSize|NoSuf|EVex128|EVexMap4, { Reg32, Dword|Unspecified|BaseIndex }
 wrssq, 0x0f38f6, SHSTK|x64, Modrm|NoSuf|Size64, { Reg64, Qword|Unspecified|BaseIndex }
+wrssq, 0x66, APX_F|SHSTK|x64, Modrm|NoSuf|Size64|EVex128|EVexMap4, { Reg64, Qword|Unspecified|BaseIndex }
 wrussd, 0x660f38f5, SHSTK, Modrm|IgnoreSize|NoSuf, { Reg32, Dword|Unspecified|BaseIndex }
+wrussd, 0x6665, SHSTK|APX_F|x64, Modrm|IgnoreSize|NoSuf|EVex128|EVexMap4, { Reg32, Dword|Unspecified|BaseIndex }
 wrussq, 0x660f38f5, SHSTK|x64, Modrm|NoSuf, { Reg64, Qword|Unspecified|BaseIndex }
+wrussq, 0x6665, SHSTK|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Reg64, Qword|Unspecified|BaseIndex }
 setssbsy, 0xf30f01e8, SHSTK, NoSuf, {}
 clrssbsy, 0xf30fae/6, SHSTK, Modrm|NoSuf, { Qword|Unspecified|BaseIndex }
 endbr64, 0xf30f1efa, IBT, NoSuf, {}
@@ -3096,7 +3151,9 @@  cldemote, 0x0f1c/0, CLDEMOTE, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
 // MOVDIR[I,64B] instructions.
 
 movdiri, 0xf38f9, MOVDIRI, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+movdiri, 0xf9, MOVDIRI|APX_F|x64, Modrm|CheckOperandSize|IgnoreSize|No_bSuf|No_wSuf|No_sSuf|EVex128|EVexMap4, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 movdir64b, 0x660f38f8, MOVDIR64B, Modrm|AddrPrefixOpReg|NoSuf, { Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+movdir64b, 0x66f8, MOVDIR64B|APX_F|x64, Modrm|AddrPrefixOpReg|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex, Reg32|Reg64 }
 
 // MOVEDIR instructions end.
 
@@ -3125,7 +3182,9 @@  vcvtneps2bf16<Vxy>, 0xf372, AVX_NE_CONVERT, Modrm|<Vxy:vex>|Space0F38|VexW0|NoSu
 // ENQCMD instructions.
 
 enqcmd, 0xf20f38f8, ENQCMD, Modrm|AddrPrefixOpReg|NoSuf, { Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+enqcmd, 0xf2f8, ENQCMD|APX_F|x64, Modrm|AddrPrefixOpReg|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex, Reg32|Reg64 }
 enqcmds, 0xf30f38f8, ENQCMD, Modrm|AddrPrefixOpReg|NoSuf, { Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+enqcmds, 0xf3f8, ENQCMD|APX_F|x64, Modrm|AddrPrefixOpReg|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex, Reg32|Reg64 }
 
 // ENQCMD instructions end.
 
@@ -3187,7 +3246,9 @@  xresldtrk, 0xf20f01e9, TSXLDTRK, NoSuf, {}
 // AMX instructions.
 
 ldtilecfg, 0x49/0, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
+ldtilecfg, 0x49/0, AMX_TILE|APX_F|x64, Modrm|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
 sttilecfg, 0x6649/0, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
+sttilecfg, 0x6649/0, AMX_TILE|APX_F|x64, Modrm|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
 
 tcmmimfp16ps, 0x666c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
 tcmmrlfp16ps, 0x6c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
@@ -3200,8 +3261,11 @@  tdpbusd, 0x665e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|
 tdpbsud, 0xf35e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
 
 tileloadd, 0xf24b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
+tileloadd, 0xf24b, AMX_TILE|APX_F|x64, Sibmem|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
 tileloaddt1, 0x664b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
+tileloaddt1, 0x664b, AMX_TILE|APX_F|x64, Sibmem|EVex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
 tilestored, 0xf34b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, Unspecified|BaseIndex }
+tilestored, 0xf34b, AMX_TILE|APX_F|x64, Sibmem|EVex128|Space0F38|VexW0|NoSuf, { RegTMM, Unspecified|BaseIndex }
 
 tilerelease, 0x49c0, AMX_TILE|x64, Vex128|Space0F38|VexW0|NoSuf, {}
 
@@ -3213,15 +3277,25 @@  tilezero, 0xf249, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
 
 loadiwkey, 0xf30f38dc, KL, Load|Modrm|NoSuf, { RegXMM, RegXMM }
 encodekey128, 0xf30f38fa, KL, Modrm|NoSuf, { Reg32, Reg32 }
+encodekey128, 0xf3da, KL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Reg32, Reg32 }
 encodekey256, 0xf30f38fb, KL, Modrm|NoSuf, { Reg32, Reg32 }
+encodekey256, 0xf3db, KL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Reg32, Reg32 }
 aesenc128kl, 0xf30f38dc, KL, Modrm|NoSuf, { Unspecified|BaseIndex, RegXMM }
+aesenc128kl, 0xf3dc, KL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex, RegXMM }
 aesdec128kl, 0xf30f38dd, KL, Modrm|NoSuf, { Unspecified|BaseIndex, RegXMM }
+aesdec128kl, 0xf3dd, KL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex, RegXMM }
 aesenc256kl, 0xf30f38de, KL, Modrm|NoSuf, { Unspecified|BaseIndex, RegXMM }
+aesenc256kl, 0xf3de, KL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex, RegXMM }
 aesdec256kl, 0xf30f38df, KL, Modrm|NoSuf, { Unspecified|BaseIndex, RegXMM }
+aesdec256kl, 0xf3df, KL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex, RegXMM }
 aesencwide128kl, 0xf30f38d8/0, WideKL, Modrm|NoSuf, { Unspecified|BaseIndex }
+aesencwide128kl, 0xf3d8/0, WideKL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex }
 aesdecwide128kl, 0xf30f38d8/1, WideKL, Modrm|NoSuf, { Unspecified|BaseIndex }
+aesdecwide128kl, 0xf3d8/1, WideKL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex }
 aesencwide256kl, 0xf30f38d8/2, WideKL, Modrm|NoSuf, { Unspecified|BaseIndex }
+aesencwide256kl, 0xf3d8/2, WideKL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex }
 aesdecwide256kl, 0xf30f38d8/3, WideKL, Modrm|NoSuf, { Unspecified|BaseIndex }
+aesdecwide256kl, 0xf3d8/3, WideKL|APX_F|x64, Modrm|NoSuf|EVex128|EVexMap4, { Unspecified|BaseIndex }
 
 // KEYLOCKER instructions end.
 
@@ -3370,6 +3444,7 @@  prefetchit1, 0xf18/6, PREFETCHI|x64, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex
 // CMPCCXADD instructions.
 
 cmp<cc>xadd, 0x66e<cc:opc>, CMPCCXADD|x64, Modrm|Vex|Space0F38|VexVVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+cmp<cc>xadd, 0x66e<cc:opc>, CMPCCXADD|x64|APX_F, Modrm|EVex128|Space0F38|VexVVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 
 // CMPCCXADD instructions end.
 
@@ -3389,9 +3464,13 @@  wrmsrlist, 0xf30f01c6, MSRLIST|x64, NoSuf, {}
 // RAO-INT instructions.
 
 aadd, 0xf38fc, RAO_INT, Modrm|IgnoreSize|CheckOperandSize|NoSuf, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+aadd, 0xfc, RAO_INT|APX_F|x64, Modrm|IgnoreSize|CheckOperandSize|NoSuf|EVex128|EVexMap4, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 aand, 0x660f38fc, RAO_INT, Modrm|IgnoreSize|CheckOperandSize|NoSuf, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+aand, 0x66fc, RAO_INT|APX_F|x64, Modrm|IgnoreSize|CheckOperandSize|NoSuf|EVex128|EVexMap4, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 aor, 0xf20f38fc, RAO_INT, Modrm|IgnoreSize|CheckOperandSize|NoSuf, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+aor, 0xf2fc, RAO_INT|APX_F|x64, Modrm|IgnoreSize|CheckOperandSize|NoSuf|EVex128|EVexMap4, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 axor, 0xf30f38fc, RAO_INT, Modrm|IgnoreSize|CheckOperandSize|NoSuf, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+axor, 0xf3fc, RAO_INT|APX_F|x64, Modrm|IgnoreSize|CheckOperandSize|NoSuf|EVex128|EVexMap4, { Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 
 // RAO-INT instructions end.
 
@@ -3408,3 +3487,4 @@  erets, 0xf20f01ca, FRED|x64, NoSuf, {}
 eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
 
 // FRED instructions end.
+