Support APX zero-upper

Message ID 20240428105424.2428135-1-lili.cui@intel.com
State New
Headers
Series Support APX zero-upper |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Testing passed

Commit Message

Cui, Lili April 28, 2024, 10:54 a.m. UTC
  gas/ChangeLog:

        * config/tc-i386.c (need_evex_encoding): Added ZU.
        (is_apx_evex_encoding): Ditto.
        (build_apx_evex_prefix): Ditto.
        (process_suffix): ZU prefer second source register type.
        * testsuite/gas/i386/x86-64.exp: Added new tests for ZU.
        * testsuite/gas/i386/x86-64-apx-zu-intel.d: New test.
        * testsuite/gas/i386/x86-64-apx-zu-inval.l: Ditto.
        * testsuite/gas/i386/x86-64-apx-zu-inval.s: Ditto.
        * testsuite/gas/i386/x86-64-apx-zu.d: Ditto.
        * testsuite/gas/i386/x86-64-apx-zu.s: Ditto.

opcodes/ChangeLog:

        * i386-dis-evex-prefix.h: Handle PREFIX_EVEX_MAP4_40 ~
	PREFIX_EVEX_MAP4_4F.
        * i386-dis-evex.h: Ditto.
        * i386-dis.c (IMUL_Fixup): New function for ZU.
        * i386-gen.c: Added ZU.
        * i386-opc.h: Ditto.
        * i386-opc.tbl: Added new templates to support ZU.
---
 gas/config/tc-i386.c                         |  12 +-
 gas/testsuite/gas/i386/x86-64-apx-zu-intel.d |  58 ++++++++++
 gas/testsuite/gas/i386/x86-64-apx-zu-inval.l |  24 ++++
 gas/testsuite/gas/i386/x86-64-apx-zu-inval.s |  28 +++++
 gas/testsuite/gas/i386/x86-64-apx-zu.d       |  58 ++++++++++
 gas/testsuite/gas/i386/x86-64-apx-zu.s       |  53 +++++++++
 gas/testsuite/gas/i386/x86-64.exp            |   3 +
 opcodes/i386-dis-evex-prefix.h               | 112 +++++++++++++++++++
 opcodes/i386-dis-evex.h                      |  36 +++---
 opcodes/i386-dis.c                           |  29 +++++
 opcodes/i386-gen.c                           |   1 +
 opcodes/i386-opc.h                           |   4 +
 opcodes/i386-opc.tbl                         |   3 +
 13 files changed, 398 insertions(+), 23 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-zu-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-zu-inval.l
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-zu-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-zu.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-zu.s
  

Comments

Jan Beulich April 30, 2024, 4:26 p.m. UTC | #1
On 28.04.2024 12:54, Cui, Lili wrote:
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -1920,7 +1920,7 @@ static INLINE bool need_evex_encoding (const insn_template *t)
>    return i.encoding == encoding_evex
>  	|| i.encoding == encoding_evex512
>  	|| (t->opcode_modifier.vex && i.encoding == encoding_egpr)
> -	|| i.mask.reg;
> +	|| i.mask.reg || t->opcode_modifier.zu;
>  }

I wonder if this is really needed. Can you clarify why/how you found a need
to add this?

> @@ -3980,7 +3980,8 @@ is_apx_evex_encoding (void)
>  {
>    return i.rex2 || i.tm.opcode_space == SPACE_EVEXMAP4 || i.has_nf
>      || (i.vex.register_specifier
> -	&& (i.vex.register_specifier->reg_flags & RegRex2));
> +	&& (i.vex.register_specifier->reg_flags & RegRex2))
> +    || i.tm.opcode_modifier.zu;

This isn't needed - "i.tm.opcode_space == SPACE_EVEXMAP4" already covers
all you need.

> @@ -4285,8 +4286,9 @@ build_apx_evex_prefix (void)
>      i.vex.bytes[3] &= ~0x08;
>  
>    /* Encode the NDD bit of the instruction promoted from the legacy
> -     space.  */
> -  if (i.vex.register_specifier && i.tm.opcode_space == SPACE_EVEXMAP4)
> +     space. ZU shares the same bit with NDD.  */
> +  if ((i.vex.register_specifier && i.tm.opcode_space == SPACE_EVEXMAP4)
> +      || i.tm.opcode_modifier.zu)
>      i.vex.bytes[3] |= 0x10;
>  
>    /* Encode the NF bit.  */
> @@ -9204,7 +9206,7 @@ match_template (char mnem_suffix)
>    /* APX insns acting on byte operands are WIG, yet that can't be expressed
>       in the templates (they're also covering word/dword/qword operands).  */
>    if (t->opcode_space == SPACE_EVEXMAP4 && !t->opcode_modifier.vexw &&
> -      i.types[i.operands - 1].bitfield.byte)
> +      i.types[i.operands - 1].bitfield.byte && !t->opcode_modifier.zu)

With a change request at the bottom this won't be needed anymore either,
I think.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-zu-inval.s
> @@ -0,0 +1,28 @@
> +# Check illegal APX-ZU instructions
> +
> +	.allow_index_reg
> +	.text
> +_start:
> +	imulzub $0xa,%bl,%al
> +	imulzud $0xa,%ebx,%eax
> +	imulzu $0xa,%rbx,%rax
> +	imulzub $0xaaaa,%bl,%al
> +	imulzud $0xaaaa,%ebx,%eax
> +	imulzu $0xaaaa,%rbx,%rax
> +	imulzu $0xaaaa,%ebx,%rax
> +	imulzu $0xaaaa,%ebx,%rax
> +	setzuno  %eax
> +	setzub   %bx
> +	setzuae  %r8w
> +	setzue   %r9w
> +	setzune  %r10d
> +	setzube  %eax
> +	setzua   %bx
> +	setzus   %r18w
> +	setzuns  %r19w
> +	setzup   %r20d
> +	setzunp  %r21w
> +	setzul   %r22w
> +	setzuge  %r23d
> +	setzule  %r24w
> +	setzug   %r25w

How about having at least one case with a 64-bit register here, too?
Further perhaps better also have one use of %ah, %ch, %dh, or %bh here.

> @@ -14060,3 +14077,15 @@ JMPABS_Fixup (instr_info *ins, int bytemode, int sizeflag)
>      return OP_IMREG (ins, bytemode, sizeflag);
>    return OP_OFF64 (ins, bytemode, sizeflag);
>  }
> +
> +static bool
> +IMUL_Fixup (instr_info *ins, int bytemode, int sizeflag)
> +{
> +  /* Although imul do not support NDD, the EVEX.ND bit is used to control
> +     whether its destination register has its upper bits zeroed when OSIZE
> +     is 16b.  */
> +  if (ins->vex.nd)
> +    ins->mnemonicendp = stpcpy (ins->obuf, "imulzu");

Despite the comment this handling isn't restricted to 16-bit operand size.

> +  return OP_G (ins, bytemode, sizeflag);
> +}

Further for SETZUcc I can't even spot how you check that EVEX.NDD=1. With
EVEX.NDD=0 aiui this is ordinary SETcc, just EVEX-encoded.

> --- a/opcodes/i386-opc.h
> +++ b/opcodes/i386-opc.h
> @@ -753,6 +753,9 @@ enum
>    /* Instrucion requires REX2 prefix.  */
>    Rex2,
>  
> +  /* Support zero upper */
> +  ZU,
> +
>    /* The last bitfield in i386_opcode_modifier.  */
>    Opcode_Modifier_Num
>  };
> @@ -800,6 +803,7 @@ typedef struct i386_opcode_modifier
>    unsigned int noegpr:1;
>    unsigned int nf:1;
>    unsigned int rex2:1;
> +  unsigned int zu:1;
>  } i386_opcode_modifier;

Does this really need to be a new attribute? I would have expected a new
OperandConstraint value would suffice.

> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -399,8 +399,10 @@ imul, 0xfaf, i386, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|U
>  imul, 0xaf, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  imul, 0x6b, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  imul, 0x6b, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> +imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU, { Imm8S, Reg16|Unspecified|BaseIndex, Reg16 }
>  imul, 0x69, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  imul, 0x69, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
> +imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU, { Imm16, Reg16|Unspecified|BaseIndex, Reg16 }
>  // imul with 2 operands mimics imul with 3 by putting the register in
>  // both i.rm.reg & i.rm.regmem fields.  RegKludge enables this
>  // transformation.

There's (once again) another adjustment wanted below here.

> @@ -528,6 +530,7 @@ loopne, 0xe0, x64, JumpByte|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Disp8 }
>  
>  // Set byte on flag instructions.
>  set<cc>, 0xf9<cc:opc>/0, i386, Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf, { Reg8|Unspecified|BaseIndex }
> +setzu<cc>, 0xf24<cc:opc>/0, APX_F, Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf|EVexMap4|ZU, { Reg8 }

Didn't we kind of agree to also permit

set<cc>, 0xf24<cc:opc>/0, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|ZU, { Reg32|Reg64 }

? This then also makes more noticable the question regarding EVEX.W: In the
latter form, the register used selects it. In the form you add it ought to be
EVexWIG, though, I would say (matching the .IGNORED in the spec).

Plus, as per one of the comments on the disassembler, don't we also need yet
another line permitting "{evex} setz %dl" and alike to be used?

Jan
  
Jan Beulich May 2, 2024, 5:58 a.m. UTC | #2
On 30.04.2024 18:26, Jan Beulich wrote:
> On 28.04.2024 12:54, Cui, Lili wrote:
>> @@ -14060,3 +14077,15 @@ JMPABS_Fixup (instr_info *ins, int bytemode, int sizeflag)
>>      return OP_IMREG (ins, bytemode, sizeflag);
>>    return OP_OFF64 (ins, bytemode, sizeflag);
>>  }
>> +
>> +static bool
>> +IMUL_Fixup (instr_info *ins, int bytemode, int sizeflag)
>> +{
>> +  /* Although imul do not support NDD, the EVEX.ND bit is used to control
>> +     whether its destination register has its upper bits zeroed when OSIZE
>> +     is 16b.  */
>> +  if (ins->vex.nd)
>> +    ins->mnemonicendp = stpcpy (ins->obuf, "imulzu");
> 
> Despite the comment this handling isn't restricted to 16-bit operand size.

Plus in suffix-always mode no w suffix would be output. Together with

>> +  return OP_G (ins, bytemode, sizeflag);
>> +}
> 
> Further for SETZUcc I can't even spot how you check that EVEX.NDD=1. With
> EVEX.NDD=0 aiui this is ordinary SETcc, just EVEX-encoded.

... this and the disassembler implication from ...

>> @@ -528,6 +530,7 @@ loopne, 0xe0, x64, JumpByte|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Disp8 }
>>  
>>  // Set byte on flag instructions.
>>  set<cc>, 0xf9<cc:opc>/0, i386, Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf, { Reg8|Unspecified|BaseIndex }
>> +setzu<cc>, 0xf24<cc:opc>/0, APX_F, Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf|EVexMap4|ZU, { Reg8 }
> 
> Didn't we kind of agree to also permit
> 
> set<cc>, 0xf24<cc:opc>/0, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|ZU, { Reg32|Reg64 }
> 
> ? This then also makes more noticable the question regarding EVEX.W: In the
> latter form, the register used selects it. In the form you add it ought to be
> EVexWIG, though, I would say (matching the .IGNORED in the spec).
> 
> Plus, as per one of the comments on the disassembler, don't we also need yet
> another line permitting "{evex} setz %dl" and alike to be used?

... this, perhaps instead of a new fixup routine we want a %ZU macro,
emitting "zu" when EVEX.nd is set.

Jan
  
Cui, Lili May 8, 2024, 3:53 a.m. UTC | #3
> On 28.04.2024 12:54, Cui, Lili wrote:
> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -1920,7 +1920,7 @@ static INLINE bool need_evex_encoding (const
> insn_template *t)
> >    return i.encoding == encoding_evex
> >  	|| i.encoding == encoding_evex512
> >  	|| (t->opcode_modifier.vex && i.encoding == encoding_egpr)
> > -	|| i.mask.reg;
> > +	|| i.mask.reg || t->opcode_modifier.zu;
> >  }
> 
> I wonder if this is really needed. Can you clarify why/how you found a need
> to add this?
> 

Dropped.

> > @@ -3980,7 +3980,8 @@ is_apx_evex_encoding (void)  {
> >    return i.rex2 || i.tm.opcode_space == SPACE_EVEXMAP4 || i.has_nf
> >      || (i.vex.register_specifier
> > -	&& (i.vex.register_specifier->reg_flags & RegRex2));
> > +	&& (i.vex.register_specifier->reg_flags & RegRex2))
> > +    || i.tm.opcode_modifier.zu;
> 
> This isn't needed - "i.tm.opcode_space == SPACE_EVEXMAP4" already covers
> all you need.
> 

Dropped.

> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/x86-64-apx-zu-inval.s
> > @@ -0,0 +1,28 @@
> > +# Check illegal APX-ZU instructions
> > +
> > +	.allow_index_reg
> > +	.text
> > +_start:
> > +	imulzub $0xa,%bl,%al
> > +	imulzud $0xa,%ebx,%eax
> > +	imulzu $0xa,%rbx,%rax
> > +	imulzub $0xaaaa,%bl,%al
> > +	imulzud $0xaaaa,%ebx,%eax
> > +	imulzu $0xaaaa,%rbx,%rax
> > +	imulzu $0xaaaa,%ebx,%rax
> > +	imulzu $0xaaaa,%ebx,%rax
> > +	setzuno  %eax
> > +	setzub   %bx
> > +	setzuae  %r8w
> > +	setzue   %r9w
> > +	setzune  %r10d
> > +	setzube  %eax
> > +	setzua   %bx
> > +	setzus   %r18w
> > +	setzuns  %r19w
> > +	setzup   %r20d
> > +	setzunp  %r21w
> > +	setzul   %r22w
> > +	setzuge  %r23d
> > +	setzule  %r24w
> > +	setzug   %r25w
> 
> How about having at least one case with a 64-bit register here, too?
> Further perhaps better also have one use of %ah, %ch, %dh, or %bh here.
> 

Added.

> > --- a/opcodes/i386-opc.h
> > +++ b/opcodes/i386-opc.h
> > @@ -753,6 +753,9 @@ enum
> >    /* Instrucion requires REX2 prefix.  */
> >    Rex2,
> >
> > +  /* Support zero upper */
> > +  ZU,
> > +
> >    /* The last bitfield in i386_opcode_modifier.  */
> >    Opcode_Modifier_Num
> >  };
> > @@ -800,6 +803,7 @@ typedef struct i386_opcode_modifier
> >    unsigned int noegpr:1;
> >    unsigned int nf:1;
> >    unsigned int rex2:1;
> > +  unsigned int zu:1;
> >  } i386_opcode_modifier;
> 
> Does this really need to be a new attribute? I would have expected a new
> OperandConstraint value would suffice.
> 
I added ZU to OperandConstraint, but...

> > --- a/opcodes/i386-opc.tbl
> > +++ b/opcodes/i386-opc.tbl
> > @@ -399,8 +399,10 @@ imul, 0xfaf, i386,
> > Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|U
> imul,
> > 0xaf, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF,
> {
> > Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }  imul,
> > 0x6b, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm8S,
> > Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }  imul,
> > 0x6b, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF,
> {
> > Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex,
> Reg16|Reg32|Reg64 }
> > +imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU,
> { Imm8S,
> > +Reg16|Unspecified|BaseIndex, Reg16 }
> >  imul, 0x69, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, {
> > Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex,
> > Reg16|Reg32|Reg64 }  imul, 0x69, APX_F,
> > Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, {
> > Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex,
> > Reg16|Reg32|Reg64 }
> > +imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU,
> { Imm16,
> > +Reg16|Unspecified|BaseIndex, Reg16 }
> >  // imul with 2 operands mimics imul with 3 by putting the register in
> > // both i.rm.reg & i.rm.regmem fields.  RegKludge enables this  //
> > transformation.
> 
> There's (once again) another adjustment wanted below here.
> 

I found it conflicts with RegKludge, when adding these templates. 

+imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF|Zu, { Imm8S, Reg16 }
+imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF|Zu, { Imm16, Reg16 }

I think we need a new attribute for it.

Thanks,
Lili.
  
Cui, Lili May 9, 2024, 7:56 a.m. UTC | #4
> On 28.04.2024 12:54, Cui, Lili wrote:
> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -1920,7 +1920,7 @@ static INLINE bool need_evex_encoding (const
> insn_template *t)
> >    return i.encoding == encoding_evex
> >  	|| i.encoding == encoding_evex512
> >  	|| (t->opcode_modifier.vex && i.encoding == encoding_egpr)
> > @@ -4285,8 +4286,9 @@ build_apx_evex_prefix (void)
> >      i.vex.bytes[3] &= ~0x08;
> >
> >    /* Encode the NDD bit of the instruction promoted from the legacy
> > -     space.  */
> > -  if (i.vex.register_specifier && i.tm.opcode_space ==
> > SPACE_EVEXMAP4)
> > +     space. ZU shares the same bit with NDD.  */  if
> > + ((i.vex.register_specifier && i.tm.opcode_space == SPACE_EVEXMAP4)
> > +      || i.tm.opcode_modifier.zu)
> >      i.vex.bytes[3] |= 0x10;
> >
> >    /* Encode the NF bit.  */
> > @@ -9204,7 +9206,7 @@ match_template (char mnem_suffix)
> >    /* APX insns acting on byte operands are WIG, yet that can't be expressed
> >       in the templates (they're also covering word/dword/qword operands).
> */
> >    if (t->opcode_space == SPACE_EVEXMAP4 && !t->opcode_modifier.vexw
> &&
> > -      i.types[i.operands - 1].bitfield.byte)
> > +      i.types[i.operands - 1].bitfield.byte &&
> > + !t->opcode_modifier.zu)
> 
> With a change request at the bottom this won't be needed anymore either, I
> think.
> 

This's a good idea.

> > @@ -14060,3 +14077,15 @@ JMPABS_Fixup (instr_info *ins, int bytemode,
> int sizeflag)
> >      return OP_IMREG (ins, bytemode, sizeflag);
> >    return OP_OFF64 (ins, bytemode, sizeflag);  }
> > +
> > +static bool
> > +IMUL_Fixup (instr_info *ins, int bytemode, int sizeflag) {
> > +  /* Although imul do not support NDD, the EVEX.ND bit is used to control
> > +     whether its destination register has its upper bits zeroed when OSIZE
> > +     is 16b.  */
> > +  if (ins->vex.nd)
> > +    ins->mnemonicendp = stpcpy (ins->obuf, "imulzu");
> 
> Despite the comment this handling isn't restricted to 16-bit operand size.
> 
> > +  return OP_G (ins, bytemode, sizeflag); }
> 
> Further for SETZUcc I can't even spot how you check that EVEX.NDD=1. With
> EVEX.NDD=0 aiui this is ordinary SETcc, just EVEX-encoded.
> 

Good point, I also found other issues with "{nf} imulzu"( {nf} was flushed), I added a macro %ZU for them and dropped IMUL_Fixup. Also added more test cases for them.

+       case 'U':
+         if (l == 1 && (last[0] == 'Z'))
+           {
+             /* Although IMUL/SETcc does not support NDD, the EVEX.ND bit is
+                used to control whether its destination register has its upper
+                bits zeroed when OSIZE is 16b/8b.  */
+             if (ins->vex.nd)
+               {
+                 oappend (ins, "zu");
+                 /* When we print zu for the EVEX instruction, we no longer
+                    need prefix {evex}. */
+                 if (evex_printed == true && startswith (ins->obufp, "{evex}"))
+                   ins->obufp += 6;
+               }
+           }
+         else
+           abort ();
+         break;

> > @@ -528,6 +530,7 @@ loopne, 0xe0, x64,
> > JumpByte|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Disp8 }
> >
> >  // Set byte on flag instructions.
> >  set<cc>, 0xf9<cc:opc>/0, i386,
> Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf,
> > { Reg8|Unspecified|BaseIndex }
> > +setzu<cc>, 0xf24<cc:opc>/0, APX_F,
> > +Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf|EVexMap4|ZU, { Reg8 }
> 
> Didn't we kind of agree to also permit
> 
> set<cc>, 0xf24<cc:opc>/0, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|ZU,
> { Reg32|Reg64 }
> 

We discussed this internally, and the spec folks thought that adding two SETZU formats to the spec was a bit redundant and might confuse users. Therefore, the spec will not be updated, it's a bit strange that binutils adds a separate format.

Thanks,
Lili.
  
Jan Beulich May 14, 2024, 7:29 a.m. UTC | #5
On 09.05.2024 09:56, Cui, Lili wrote:
>> On 28.04.2024 12:54, Cui, Lili wrote:
>>> @@ -528,6 +530,7 @@ loopne, 0xe0, x64,
>>> JumpByte|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Disp8 }
>>>
>>>  // Set byte on flag instructions.
>>>  set<cc>, 0xf9<cc:opc>/0, i386,
>> Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf,
>>> { Reg8|Unspecified|BaseIndex }
>>> +setzu<cc>, 0xf24<cc:opc>/0, APX_F,
>>> +Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf|EVexMap4|ZU, { Reg8 }
>>
>> Didn't we kind of agree to also permit
>>
>> set<cc>, 0xf24<cc:opc>/0, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|ZU,
>> { Reg32|Reg64 }
>>
> 
> We discussed this internally, and the spec folks thought that adding two SETZU formats to the spec was a bit redundant and might confuse users. Therefore, the spec will not be updated, it's a bit strange that binutils adds a separate format.

People who feel confused can use SETZU. But why make life more difficult
for people like me, who don't feel confused? If I feel confused by
anything, then by the odd ZU infix in especially SETZUNZ and SETZUZ. If
they want to avoid confusion, they can limit ZU as a suffix (Intel) or
infix (AT&T) to IMUL, while leaving SETcc to remain entirely without.

Jan
  
Jan Beulich May 14, 2024, 7:30 a.m. UTC | #6
On 08.05.2024 05:53, Cui, Lili wrote:
> I found it conflicts with RegKludge, when adding these templates. 
> 
> +imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF|Zu, { Imm8S, Reg16 }
> +imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|RegKludge|EVexMap4|NF|Zu, { Imm16, Reg16 }
> 
> I think we need a new attribute for it.

Hmm, yes, that makes it pretty much unavoidable then.

Jan
  
Cui, Lili May 15, 2024, 2:26 a.m. UTC | #7
> On 09.05.2024 09:56, Cui, Lili wrote:
> >> On 28.04.2024 12:54, Cui, Lili wrote:
> >>> @@ -528,6 +530,7 @@ loopne, 0xe0, x64,
> >>> JumpByte|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Disp8 }
> >>>
> >>>  // Set byte on flag instructions.
> >>>  set<cc>, 0xf9<cc:opc>/0, i386,
> >> Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf,
> >>> { Reg8|Unspecified|BaseIndex }
> >>> +setzu<cc>, 0xf24<cc:opc>/0, APX_F,
> >>> +Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf|EVexMap4|ZU, { Reg8 }
> >>
> >> Didn't we kind of agree to also permit
> >>
> >> set<cc>, 0xf24<cc:opc>/0, APX_F,
> Modrm|No_bSuf|No_sSuf|EVexMap4|ZU,
> >> { Reg32|Reg64 }
> >>
> >
> > We discussed this internally, and the spec folks thought that adding two
> SETZU formats to the spec was a bit redundant and might confuse users.
> Therefore, the spec will not be updated, it's a bit strange that binutils adds a
> separate format.
> 
> People who feel confused can use SETZU. But why make life more difficult for
> people like me, who don't feel confused? If I feel confused by anything, then
> by the odd ZU infix in especially SETZUNZ and SETZUZ. If they want to avoid
> confusion, they can limit ZU as a suffix (Intel) or infix (AT&T) to IMUL, while
> leaving SETcc to remain entirely without.
> 

Personally, I think you always have a good understanding of command formats and names. But I don't have the final say, sorry about that.

Lili.
  

Patch

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index b03746852d6..76a1a354392 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1920,7 +1920,7 @@  static INLINE bool need_evex_encoding (const insn_template *t)
   return i.encoding == encoding_evex
 	|| i.encoding == encoding_evex512
 	|| (t->opcode_modifier.vex && i.encoding == encoding_egpr)
-	|| i.mask.reg;
+	|| i.mask.reg || t->opcode_modifier.zu;
 }
 
 #define CPU_FLAGS_ARCH_MATCH		0x1
@@ -3980,7 +3980,8 @@  is_apx_evex_encoding (void)
 {
   return i.rex2 || i.tm.opcode_space == SPACE_EVEXMAP4 || i.has_nf
     || (i.vex.register_specifier
-	&& (i.vex.register_specifier->reg_flags & RegRex2));
+	&& (i.vex.register_specifier->reg_flags & RegRex2))
+    || i.tm.opcode_modifier.zu;
 }
 
 static INLINE bool
@@ -4285,8 +4286,9 @@  build_apx_evex_prefix (void)
     i.vex.bytes[3] &= ~0x08;
 
   /* Encode the NDD bit of the instruction promoted from the legacy
-     space.  */
-  if (i.vex.register_specifier && i.tm.opcode_space == SPACE_EVEXMAP4)
+     space. ZU shares the same bit with NDD.  */
+  if ((i.vex.register_specifier && i.tm.opcode_space == SPACE_EVEXMAP4)
+      || i.tm.opcode_modifier.zu)
     i.vex.bytes[3] |= 0x10;
 
   /* Encode the NF bit.  */
@@ -9204,7 +9206,7 @@  match_template (char mnem_suffix)
   /* APX insns acting on byte operands are WIG, yet that can't be expressed
      in the templates (they're also covering word/dword/qword operands).  */
   if (t->opcode_space == SPACE_EVEXMAP4 && !t->opcode_modifier.vexw &&
-      i.types[i.operands - 1].bitfield.byte)
+      i.types[i.operands - 1].bitfield.byte && !t->opcode_modifier.zu)
     {
       gas_assert (t->opcode_modifier.w);
       i.tm.opcode_modifier.vexw = VEXWIG;
diff --git a/gas/testsuite/gas/i386/x86-64-apx-zu-intel.d b/gas/testsuite/gas/i386/x86-64-apx-zu-intel.d
new file mode 100644
index 00000000000..63247de6689
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-zu-intel.d
@@ -0,0 +1,58 @@ 
+#as:
+#objdump: -dw -Mintel
+#name: x86-64 APX ZU instructions with evex prefix encoding(Intel disassembly)
+#source: x86-64-apx-zu.s
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*62 f4 7d 18 6b c3 0a[ 	]+imulzu ax,bx,0xa
+\s*[a-f0-9]+:\s*67 62 f4 7d 18 6b 00 0a[ 	]+imulzu ax,WORD PTR \[eax\],0xa
+\s*[a-f0-9]+:\s*62 ec 7d 18 6b c1 0a[ 	]+imulzu r16w,r17w,0xa
+\s*[a-f0-9]+:\s*67 62 6c 7d 18 6b 38 0a[ 	]+imulzu r31w,WORD PTR \[r16d\],0xa
+\s*[a-f0-9]+:\s*62 f4 7d 18 69 c3 82 23[ 	]+imulzu ax,bx,0x2382
+\s*[a-f0-9]+:\s*67 62 f4 7d 18 69 00 82 23[ 	]+imulzu ax,WORD PTR \[eax\],0x2382
+\s*[a-f0-9]+:\s*62 ec 7d 18 69 c1 82 23[ 	]+imulzu r16w,r17w,0x2382
+\s*[a-f0-9]+:\s*67 62 6c 7d 18 69 38 82 23[ 	]+imulzu r31w,WORD PTR \[r16d\],0x2382
+\s*[a-f0-9]+:\s*62 f4 7f 18 41 c0[ 	]+setzuno al
+\s*[a-f0-9]+:\s*62 f4 7f 18 42 c3[ 	]+setzub bl
+\s*[a-f0-9]+:\s*62 d4 7f 18 43 c0[ 	]+setzuae r8b
+\s*[a-f0-9]+:\s*62 d4 7f 18 44 c1[ 	]+setzue r9b
+\s*[a-f0-9]+:\s*62 d4 7f 18 45 c2[ 	]+setzune r10b
+\s*[a-f0-9]+:\s*62 fc 7f 18 46 c0[ 	]+setzube r16b
+\s*[a-f0-9]+:\s*62 fc 7f 18 47 c1[ 	]+setzua r17b
+\s*[a-f0-9]+:\s*62 fc 7f 18 48 c2[ 	]+setzus r18b
+\s*[a-f0-9]+:\s*62 fc 7f 18 49 c3[ 	]+setzuns r19b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4a c4[ 	]+setzup r20b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4b c5[ 	]+setzunp r21b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4c c6[ 	]+setzul r22b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4d c7[ 	]+setzuge r23b
+\s*[a-f0-9]+:\s*62 dc 7f 18 4e c0[ 	]+setzule r24b
+\s*[a-f0-9]+:\s*62 dc 7f 18 4f c1[ 	]+setzug r25b
+\s*[a-f0-9]+:\s*62 f4 7d 18 6b c3 0a[ 	]+imulzu ax,bx,0xa
+\s*[a-f0-9]+:\s*67 62 f4 7d 18 6b 00 0a[ 	]+imulzu ax,WORD PTR \[eax\],0xa
+\s*[a-f0-9]+:\s*62 ec 7d 18 6b c1 0a[ 	]+imulzu r16w,r17w,0xa
+\s*[a-f0-9]+:\s*67 62 6c 7d 18 6b 38 0a[ 	]+imulzu r31w,WORD PTR \[r16d\],0xa
+\s*[a-f0-9]+:\s*62 f4 7d 18 69 c3 82 23[ 	]+imulzu ax,bx,0x2382
+\s*[a-f0-9]+:\s*67 62 f4 7d 18 69 00 82 23[ 	]+imulzu ax,WORD PTR \[eax\],0x2382
+\s*[a-f0-9]+:\s*62 ec 7d 18 69 c1 82 23[ 	]+imulzu r16w,r17w,0x2382
+\s*[a-f0-9]+:\s*67 62 6c 7d 18 69 38 82 23[ 	]+imulzu r31w,WORD PTR \[r16d\],0x2382
+\s*[a-f0-9]+:\s*62 f4 7f 18 41 c0[ 	]+setzuno al
+\s*[a-f0-9]+:\s*62 f4 7f 18 42 c3[ 	]+setzub bl
+\s*[a-f0-9]+:\s*62 d4 7f 18 43 c0[ 	]+setzuae r8b
+\s*[a-f0-9]+:\s*62 d4 7f 18 44 c1[ 	]+setzue r9b
+\s*[a-f0-9]+:\s*62 d4 7f 18 45 c2[ 	]+setzune r10b
+\s*[a-f0-9]+:\s*62 fc 7f 18 46 c0[ 	]+setzube r16b
+\s*[a-f0-9]+:\s*62 fc 7f 18 47 c1[ 	]+setzua r17b
+\s*[a-f0-9]+:\s*62 fc 7f 18 48 c2[ 	]+setzus r18b
+\s*[a-f0-9]+:\s*62 fc 7f 18 49 c3[ 	]+setzuns r19b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4a c4[ 	]+setzup r20b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4b c5[ 	]+setzunp r21b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4c c6[ 	]+setzul r22b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4d c7[ 	]+setzuge r23b
+\s*[a-f0-9]+:\s*62 dc 7f 18 4e c0[ 	]+setzule r24b
+\s*[a-f0-9]+:\s*62 dc 7f 18 4f c1[ 	]+setzug r25b
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-apx-zu-inval.l b/gas/testsuite/gas/i386/x86-64-apx-zu-inval.l
new file mode 100644
index 00000000000..b4fcba8558f
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-zu-inval.l
@@ -0,0 +1,24 @@ 
+.* Assembler messages:
+.*:6: Error: invalid instruction suffix for `imulzu'
+.*:7: Error: no such instruction: `imulzud \$0xa,%ebx,%eax'
+.*:8: Error: operand size mismatch for `imulzu'
+.*:9: Error: invalid instruction suffix for `imulzu'
+.*:10: Error: no such instruction: `imulzud \$0xaaaa,%ebx,%eax'
+.*:11: Error: operand size mismatch for `imulzu'
+.*:12: Error: operand size mismatch for `imulzu'
+.*:13: Error: operand size mismatch for `imulzu'
+.*:14: Error: operand size mismatch for `setzuno'
+.*:15: Error: operand size mismatch for `setzub'
+.*:16: Error: operand size mismatch for `setzuae'
+.*:17: Error: operand size mismatch for `setzue'
+.*:18: Error: operand size mismatch for `setzune'
+.*:19: Error: operand size mismatch for `setzube'
+.*:20: Error: operand size mismatch for `setzua'
+.*:21: Error: operand size mismatch for `setzus'
+.*:22: Error: operand size mismatch for `setzuns'
+.*:23: Error: operand size mismatch for `setzup'
+.*:24: Error: operand size mismatch for `setzunp'
+.*:25: Error: operand size mismatch for `setzul'
+.*:26: Error: operand size mismatch for `setzuge'
+.*:27: Error: operand size mismatch for `setzule'
+.*:28: Error: operand size mismatch for `setzug'
diff --git a/gas/testsuite/gas/i386/x86-64-apx-zu-inval.s b/gas/testsuite/gas/i386/x86-64-apx-zu-inval.s
new file mode 100644
index 00000000000..7e6df944223
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-zu-inval.s
@@ -0,0 +1,28 @@ 
+# Check illegal APX-ZU instructions
+
+	.allow_index_reg
+	.text
+_start:
+	imulzub $0xa,%bl,%al
+	imulzud $0xa,%ebx,%eax
+	imulzu $0xa,%rbx,%rax
+	imulzub $0xaaaa,%bl,%al
+	imulzud $0xaaaa,%ebx,%eax
+	imulzu $0xaaaa,%rbx,%rax
+	imulzu $0xaaaa,%ebx,%rax
+	imulzu $0xaaaa,%ebx,%rax
+	setzuno  %eax
+	setzub   %bx
+	setzuae  %r8w
+	setzue   %r9w
+	setzune  %r10d
+	setzube  %eax
+	setzua   %bx
+	setzus   %r18w
+	setzuns  %r19w
+	setzup   %r20d
+	setzunp  %r21w
+	setzul   %r22w
+	setzuge  %r23d
+	setzule  %r24w
+	setzug   %r25w
diff --git a/gas/testsuite/gas/i386/x86-64-apx-zu.d b/gas/testsuite/gas/i386/x86-64-apx-zu.d
new file mode 100644
index 00000000000..badab2a35c8
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-zu.d
@@ -0,0 +1,58 @@ 
+#as:
+#objdump: -dw
+#name: x86-64 APX ZU instructions with evex prefix encoding
+#source: x86-64-apx-zu.s
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*62 f4 7d 18 6b c3 0a[ 	]+imulzu \$0xa,%bx,%ax
+\s*[a-f0-9]+:\s*67 62 f4 7d 18 6b 00 0a[ 	]+imulzu \$0xa,\(%eax\),%ax
+\s*[a-f0-9]+:\s*62 ec 7d 18 6b c1 0a[ 	]+imulzu \$0xa,%r17w,%r16w
+\s*[a-f0-9]+:\s*67 62 6c 7d 18 6b 38 0a[ 	]+imulzu \$0xa,\(%r16d\),%r31w
+\s*[a-f0-9]+:\s*62 f4 7d 18 69 c3 82 23[ 	]+imulzu \$0x2382,%bx,%ax
+\s*[a-f0-9]+:\s*67 62 f4 7d 18 69 00 82 23[ 	]+imulzu \$0x2382,\(%eax\),%ax
+\s*[a-f0-9]+:\s*62 ec 7d 18 69 c1 82 23[ 	]+imulzu \$0x2382,%r17w,%r16w
+\s*[a-f0-9]+:\s*67 62 6c 7d 18 69 38 82 23[ 	]+imulzu \$0x2382,\(%r16d\),%r31w
+\s*[a-f0-9]+:\s*62 f4 7f 18 41 c0[ 	]+setzuno %al
+\s*[a-f0-9]+:\s*62 f4 7f 18 42 c3[ 	]+setzub %bl
+\s*[a-f0-9]+:\s*62 d4 7f 18 43 c0[ 	]+setzuae %r8b
+\s*[a-f0-9]+:\s*62 d4 7f 18 44 c1[ 	]+setzue %r9b
+\s*[a-f0-9]+:\s*62 d4 7f 18 45 c2[ 	]+setzune %r10b
+\s*[a-f0-9]+:\s*62 fc 7f 18 46 c0[ 	]+setzube %r16b
+\s*[a-f0-9]+:\s*62 fc 7f 18 47 c1[ 	]+setzua %r17b
+\s*[a-f0-9]+:\s*62 fc 7f 18 48 c2[ 	]+setzus %r18b
+\s*[a-f0-9]+:\s*62 fc 7f 18 49 c3[ 	]+setzuns %r19b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4a c4[ 	]+setzup %r20b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4b c5[ 	]+setzunp %r21b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4c c6[ 	]+setzul %r22b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4d c7[ 	]+setzuge %r23b
+\s*[a-f0-9]+:\s*62 dc 7f 18 4e c0[ 	]+setzule %r24b
+\s*[a-f0-9]+:\s*62 dc 7f 18 4f c1[ 	]+setzug %r25b
+\s*[a-f0-9]+:\s*62 f4 7d 18 6b c3 0a[ 	]+imulzu \$0xa,%bx,%ax
+\s*[a-f0-9]+:\s*67 62 f4 7d 18 6b 00 0a[ 	]+imulzu \$0xa,\(%eax\),%ax
+\s*[a-f0-9]+:\s*62 ec 7d 18 6b c1 0a[ 	]+imulzu \$0xa,%r17w,%r16w
+\s*[a-f0-9]+:\s*67 62 6c 7d 18 6b 38 0a[ 	]+imulzu \$0xa,\(%r16d\),%r31w
+\s*[a-f0-9]+:\s*62 f4 7d 18 69 c3 82 23[ 	]+imulzu \$0x2382,%bx,%ax
+\s*[a-f0-9]+:\s*67 62 f4 7d 18 69 00 82 23[ 	]+imulzu \$0x2382,\(%eax\),%ax
+\s*[a-f0-9]+:\s*62 ec 7d 18 69 c1 82 23[ 	]+imulzu \$0x2382,%r17w,%r16w
+\s*[a-f0-9]+:\s*67 62 6c 7d 18 69 38 82 23[ 	]+imulzu \$0x2382,\(%r16d\),%r31w
+\s*[a-f0-9]+:\s*62 f4 7f 18 41 c0[ 	]+setzuno %al
+\s*[a-f0-9]+:\s*62 f4 7f 18 42 c3[ 	]+setzub %bl
+\s*[a-f0-9]+:\s*62 d4 7f 18 43 c0[ 	]+setzuae %r8b
+\s*[a-f0-9]+:\s*62 d4 7f 18 44 c1[ 	]+setzue %r9b
+\s*[a-f0-9]+:\s*62 d4 7f 18 45 c2[ 	]+setzune %r10b
+\s*[a-f0-9]+:\s*62 fc 7f 18 46 c0[ 	]+setzube %r16b
+\s*[a-f0-9]+:\s*62 fc 7f 18 47 c1[ 	]+setzua %r17b
+\s*[a-f0-9]+:\s*62 fc 7f 18 48 c2[ 	]+setzus %r18b
+\s*[a-f0-9]+:\s*62 fc 7f 18 49 c3[ 	]+setzuns %r19b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4a c4[ 	]+setzup %r20b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4b c5[ 	]+setzunp %r21b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4c c6[ 	]+setzul %r22b
+\s*[a-f0-9]+:\s*62 fc 7f 18 4d c7[ 	]+setzuge %r23b
+\s*[a-f0-9]+:\s*62 dc 7f 18 4e c0[ 	]+setzule %r24b
+\s*[a-f0-9]+:\s*62 dc 7f 18 4f c1[ 	]+setzug %r25b
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-apx-zu.s b/gas/testsuite/gas/i386/x86-64-apx-zu.s
new file mode 100644
index 00000000000..84f6d64e1c1
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-zu.s
@@ -0,0 +1,53 @@ 
+# Check 64bit APX ZU instructions with evex prefix encoding
+
+	.allow_index_reg
+	.text
+_start:
+	imulzu $10, %bx, %ax
+	imulzu $10, (%eax), %ax
+	imulzu $10, %r17w, %r16w
+	imulzu $10, (%r16d), %r31w
+	imulzu $9090, %bx, %ax
+	imulzu $9090, (%eax), %ax
+	imulzu $9090, %r17w, %r16w
+	imulzu $9090, (%r16d), %r31w
+	setzuno  %al
+	setzub   %bl
+	setzuae  %r8b
+	setzue   %r9b
+	setzune  %r10b
+	setzube  %r16b
+	setzua   %r17b
+	setzus   %r18b
+	setzuns  %r19b
+	setzup   %r20b
+	setzunp  %r21b
+	setzul   %r22b
+	setzuge  %r23b
+	setzule  %r24b
+	setzug   %r25b
+
+	.intel_syntax noprefix
+	imulzu ax,bx, 10
+	imulzu ax,WORD PTR [eax],10
+	imulzu r16w,r17w,10
+	imulzu r31w,WORD PTR [r16d],10
+	imulzu ax,bx,9090
+	imulzu ax,WORD PTR [eax],9090
+	imulzu r16w,r17w,9090
+	imulzu r31w,WORD PTR [r16d],9090
+	setzuno  al
+	setzub  bl
+	setzuae  r8b
+	setzue  r9b
+	setzune  r10b
+	setzube  r16b
+	setzua   r17b
+	setzus   r18b
+	setzuns  r19b
+	setzup   r20b
+	setzunp  r21b
+	setzul   r22b
+	setzuge  r23b
+	setzule  r24b
+	setzug   r25b
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index e5e469cfd46..bbea1b3a2ac 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -389,6 +389,9 @@  run_dump_test "x86-64-apx-jmpabs-intel"
 run_dump_test "x86-64-apx-jmpabs-inval"
 run_dump_test "x86-64-apx-nf"
 run_dump_test "x86-64-apx-nf-intel"
+run_dump_test "x86-64-apx-zu"
+run_dump_test "x86-64-apx-zu-intel"
+run_list_test "x86-64-apx-zu-inval"
 run_dump_test "x86-64-apx_f-evex"
 run_dump_test "x86-64-avx512f-rcigrz-intel"
 run_dump_test "x86-64-avx512f-rcigrz"
diff --git a/opcodes/i386-dis-evex-prefix.h b/opcodes/i386-dis-evex-prefix.h
index 5e6423790b5..ca8926249c7 100644
--- a/opcodes/i386-dis-evex-prefix.h
+++ b/opcodes/i386-dis-evex-prefix.h
@@ -338,6 +338,118 @@ 
     { "vcmpp%XH", { MaskG, Vex, EXxh, EXxEVexS, CMP }, 0 },
     { "vcmps%XH", { MaskG, VexScalar, EXw, EXxEVexS, CMP }, 0 },
   },
+  /* PREFIX_EVEX_MAP4_40 */
+  {
+    { "%CFcmovoS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovoS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzuo",		{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_41 */
+  {
+    { "%CFcmovnoS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovnoS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzuno",	{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_42 */
+  {
+    { "%CFcmovbS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovbS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzub",		{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_43 */
+  {
+    { "%CFcmovaeS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovaeS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzuae",	{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_44 */
+  {
+    { "%CFcmoveS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmoveS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzue",		{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_45 */
+  {
+    { "%CFcmovneS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovneS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzune",	{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_46 */
+  {
+    { "%CFcmovbeS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovbeS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzube",	{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_47 */
+  {
+    { "%CFcmovaS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovaS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzua",		{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_48 */
+  {
+    { "%CFcmovsS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovsS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzus",		{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_49 */
+  {
+    { "%CFcmovnsS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovnsS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzuns",	{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_4A */
+  {
+    { "%CFcmovpS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovpS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzup",		{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_4B */
+  {
+    { "%CFcmovnpS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovnpS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzunp",	{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_4C */
+  {
+    { "%CFcmovlS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovlS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzul",		{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_4D */
+  {
+    { "%CFcmovgeS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovgeS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzuge",	{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_4E */
+  {
+    { "%CFcmovleS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovleS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzule",	{ Eb }, 0 },
+  },
+  /* PREFIX_EVEX_MAP4_4F */
+  {
+    { "%CFcmovgS",	{ VexGv, Gv, Ev }, 0 },
+    { Bad_Opcode },
+    { "%CFcmovgS",	{ VexGv, Gv, Ev }, 0 },
+    { "setzug",		{ Eb }, 0 },
+  },
   /* PREFIX_EVEX_MAP4_F0 */
   {
     { "crc32A", { Gdq, Eb }, 0 },
diff --git a/opcodes/i386-dis-evex.h b/opcodes/i386-dis-evex.h
index b158f2b0b89..79b9ebbb0e7 100644
--- a/opcodes/i386-dis-evex.h
+++ b/opcodes/i386-dis-evex.h
@@ -947,23 +947,23 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     /* 40 */
-    { "%CFcmovoS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovnoS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovbS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovaeS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmoveS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovneS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovbeS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovaS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_40) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_41) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_42) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_43) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_44) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_45) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_46) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_47) },
     /* 48 */
-    { "%CFcmovsS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovnsS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovpS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovnpS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovlS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovgeS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovleS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
-    { "%CFcmovgS",	{ VexGv, Gv, Ev }, PREFIX_NP_OR_DATA },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_48) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_49) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_4A) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_4B) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_4C) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_4D) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_4E) },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_4F) },
     /* 50 */
     { Bad_Opcode },
     { Bad_Opcode },
@@ -993,9 +993,9 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     /* 68 */
     { Bad_Opcode },
-    { "%NFimulS",	{ Gv, Ev, Iv }, PREFIX_NP_OR_DATA },
+    { "%NFimulS",	{ { IMUL_Fixup, v_mode }, Ev, Iv }, PREFIX_NP_OR_DATA },
     { Bad_Opcode },
-    { "%NFimulS",	{ Gv, Ev, sIb }, PREFIX_NP_OR_DATA },
+    { "%NFimulS",	{ { IMUL_Fixup, v_mode }, Ev, sIb }, PREFIX_NP_OR_DATA },
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 53f2d6cf6d8..c6eeff02103 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -107,6 +107,7 @@  static bool DistinctDest_Fixup (instr_info *, int, int);
 static bool PREFETCHI_Fixup (instr_info *, int, int);
 static bool PUSH2_POP2_Fixup (instr_info *, int, int);
 static bool JMPABS_Fixup (instr_info *, int, int);
+static bool IMUL_Fixup (instr_info *, int, int);
 
 static void ATTRIBUTE_PRINTF_3 i386_dis_printf (const disassemble_info *,
 						enum disassembler_style,
@@ -1199,6 +1200,22 @@  enum
   PREFIX_EVEX_0F3A67,
   PREFIX_EVEX_0F3AC2,
 
+  PREFIX_EVEX_MAP4_40,
+  PREFIX_EVEX_MAP4_41,
+  PREFIX_EVEX_MAP4_42,
+  PREFIX_EVEX_MAP4_43,
+  PREFIX_EVEX_MAP4_44,
+  PREFIX_EVEX_MAP4_45,
+  PREFIX_EVEX_MAP4_46,
+  PREFIX_EVEX_MAP4_47,
+  PREFIX_EVEX_MAP4_48,
+  PREFIX_EVEX_MAP4_49,
+  PREFIX_EVEX_MAP4_4A,
+  PREFIX_EVEX_MAP4_4B,
+  PREFIX_EVEX_MAP4_4C,
+  PREFIX_EVEX_MAP4_4D,
+  PREFIX_EVEX_MAP4_4E,
+  PREFIX_EVEX_MAP4_4F,
   PREFIX_EVEX_MAP4_F0,
   PREFIX_EVEX_MAP4_F1,
   PREFIX_EVEX_MAP4_F2,
@@ -14060,3 +14077,15 @@  JMPABS_Fixup (instr_info *ins, int bytemode, int sizeflag)
     return OP_IMREG (ins, bytemode, sizeflag);
   return OP_OFF64 (ins, bytemode, sizeflag);
 }
+
+static bool
+IMUL_Fixup (instr_info *ins, int bytemode, int sizeflag)
+{
+  /* Although imul do not support NDD, the EVEX.ND bit is used to control
+     whether its destination register has its upper bits zeroed when OSIZE
+     is 16b.  */
+  if (ins->vex.nd)
+    ins->mnemonicendp = stpcpy (ins->obuf, "imulzu");
+
+  return OP_G (ins, bytemode, sizeflag);
+}
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 3f63d157df6..a5e8ae540ea 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -491,6 +491,7 @@  static bitfield opcode_modifiers[] =
   BITFIELD (NoEgpr),
   BITFIELD (NF),
   BITFIELD (Rex2),
+  BITFIELD (ZU),
 };
 
 #define CLASS(n) #n, n
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index ce54c9d8d26..b08fbfabf7e 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -753,6 +753,9 @@  enum
   /* Instrucion requires REX2 prefix.  */
   Rex2,
 
+  /* Support zero upper */
+  ZU,
+
   /* The last bitfield in i386_opcode_modifier.  */
   Opcode_Modifier_Num
 };
@@ -800,6 +803,7 @@  typedef struct i386_opcode_modifier
   unsigned int noegpr:1;
   unsigned int nf:1;
   unsigned int rex2:1;
+  unsigned int zu:1;
 } i386_opcode_modifier;
 
 /* Operand classes.  */
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 7d248f64bbd..83b25d1d8b8 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -399,8 +399,10 @@  imul, 0xfaf, i386, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Reg16|Reg32|Reg64|U
 imul, 0xaf, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 imul, 0x6b, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 imul, 0x6b, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+imulzu, 0x6b, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU, { Imm8S, Reg16|Unspecified|BaseIndex, Reg16 }
 imul, 0x69, i186, Modrm|CheckOperandSize|No_bSuf|No_sSuf, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 imul, 0x69, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|EVexMap4|NF, { Imm16|Imm32|Imm32S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
+imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sSuf|EVexMap4|NF|ZU, { Imm16, Reg16|Unspecified|BaseIndex, Reg16 }
 // imul with 2 operands mimics imul with 3 by putting the register in
 // both i.rm.reg & i.rm.regmem fields.  RegKludge enables this
 // transformation.
@@ -528,6 +530,7 @@  loopne, 0xe0, x64, JumpByte|No_bSuf|No_wSuf|No_sSuf|NoRex64, { Disp8 }
 
 // Set byte on flag instructions.
 set<cc>, 0xf9<cc:opc>/0, i386, Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf, { Reg8|Unspecified|BaseIndex }
+setzu<cc>, 0xf24<cc:opc>/0, APX_F, Modrm|No_wSuf|No_lSuf|No_sSuf|No_qSuf|EVexMap4|ZU, { Reg8 }
 
 // String manipulation.
 cmps, 0xa6, 0, W|No_sSuf|RepPrefixOk, {}