[6/8] Support APX Push2/Pop2

Message ID 20230919152527.497773-7-lili.cui@intel.com
State New
Headers
Series Support Intel APX EGPR |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Testing passed

Commit Message

Frager, Neal via Binutils Sept. 19, 2023, 3:25 p.m. UTC
  From: "Mo, Zewei" <zewei.mo@intel.com>

gas/ChangeLog:

	* config/tc-i386.c: (is_any_apx_encoding):
	Add handler for APX-Push2/Pop2 when no EGPR32 is used,
	(md_assemble): Add handler for APX-Push2/Pop2.
	(build_modrm_byte): Add handler for operands order,
	vvvv encoding and modrm encoding of APX-Push2/Pop2.
	* testsuite/gas/i386/i386.exp: Add apx-push2pop2 tests.
	* testsuite/gas/i386/x86-64-apx-push2pop2.d: New test.
	* testsuite/gas/i386/x86-64-apx-push2pop2.s: Ditto.
	* testsuite/gas/i386/x86-64-apx-push2pop2-intel.d: Ditto.
	* testsuite/gas/i386/x86-64-apx-push2pop2-inval.l: Ditto.
	* testsuite/gas/i386/x86-64-apx-push2pop2-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.d: Ditto.
	* testsuite/gas/i386/apx-push2pop2-inval.s: Ditto.
	* testsuite/gas/i386/apx-push2pop2-inval.d: Ditto.

opcodes/ChangeLog:

	* i386-dis-evex.h: Add EVEX_MAP4_8F.
	* i386-dis-evex-w.h: Add EVEX_W_MAP4_8F_X86_64_L_0_M_1_R_0_P_0,
	EVEX_W_MAP4_FF_R_6_X86_64_L_0_M_1_P_0.
	* i386-dis-evex-len.h: Add EVEX_LEN_MAP4_8F_X86_64,
	EVEX_LEN_MAP4_FF_R_6_X86_64.
	* i386-dis-evex-mod.h: Add MOD_EVEX_MAP4_8F_X86_64_L_0,
	MOD_EVEX_MAP4_FF_R_6_X86_64_L_0.
	* i386-dis-evex-prefix.h:
	Add PREFIX_EVEX_MAP4_8F_X86_64_L_0_M_1_R_0,
	PREFIX_EVEX_MAP4_FF_R_6_X86_64_L_0_M_1.
	* i386-dis-evex-reg.h: Add REG_EVEX_MAP4_8F_X86_64_L_0_M_1.
	* i386-dis-evex-x86.h: Add X86_64_EVEX_MAP4_8F_X86_64,
	X86_64_EVEX_MAP4_FF_R_6_X86_64.
	* i386-dis.c: (OP) Add VexGq.
	(MOD_EVEX_MAP4_8F_X86_64_L_0): New.
	(MOD_EVEX_MAP4_FF_R_6_X86_64_L_0): Ditto.
	(PREFIX_EVEX_MAP4_8F_X86_64_L_0_M_1_R_0): Ditto.
	(PREFIX_EVEX_MAP4_FF_R_6_X86_64_L_0_M_1): Ditto.
	(EVEX_LEN_MAP4_8F_X86_64): Ditto.
	(EVEX_LEN_MAP4_FF_R_6_X86_64): Ditto.
	(EVEX_W_MAP4_8F_X86_64_L_0_M_1_R_0_P_0): Ditto.
	(EVEX_W_MAP4_FF_R_6_X86_64_L_0_M_1_P_0): Ditto.
	(REG_EVEX_MAP4_8F_X86_64_L_0_M_1): Ditto.
	(X86_64_EVEX_MAP4_8F): Ditto.
	(X86_64_EVEX_MAP4_FF_R_6): Ditto.
	(get_valid_dis386): Add handler of setting vector length for
	APX-Push2/Pop2 insn.
	(print_insn): Add handler of rounding control and cleaning
	vex.mask_register_specifier for APX-Push2/Pop2.
	(OP_VEX): Add handler of 64-bit vvvv register for APX-Push2/Pop2
	insn.
	* i386-opc.tbl: Add APX-Push2/Pop2 instructions.
	* i386-mnem.h: Regenerated.
	* i386-tbl.h: Regenerated.
---
 gas/config/tc-i386.c                          | 30 ++++++++++++-
 gas/testsuite/gas/i386/apx-push2pop2-inval.l  |  5 +++
 gas/testsuite/gas/i386/apx-push2pop2-inval.s  |  9 ++++
 gas/testsuite/gas/i386/i386.exp               |  1 +
 .../i386/x86-64-apx-push2pop2-decode-inval.d  | 29 ++++++++++++
 .../i386/x86-64-apx-push2pop2-decode-inval.s  | 19 ++++++++
 .../gas/i386/x86-64-apx-push2pop2-intel.d     | 42 ++++++++++++++++++
 .../gas/i386/x86-64-apx-push2pop2-inval.l     |  9 ++++
 .../gas/i386/x86-64-apx-push2pop2-inval.s     | 13 ++++++
 gas/testsuite/gas/i386/x86-64-apx-push2pop2.d | 42 ++++++++++++++++++
 gas/testsuite/gas/i386/x86-64-apx-push2pop2.s | 39 ++++++++++++++++
 gas/testsuite/gas/i386/x86-64.exp             |  4 ++
 opcodes/i386-dis-evex-len.h                   | 10 +++++
 opcodes/i386-dis-evex-mod.h                   | 10 +++++
 opcodes/i386-dis-evex-prefix.h                |  8 ++++
 opcodes/i386-dis-evex-reg.h                   |  9 ++++
 opcodes/i386-dis-evex-w.h                     | 10 +++++
 opcodes/i386-dis-evex-x86.h                   | 10 +++++
 opcodes/i386-dis-evex.h                       |  2 +-
 opcodes/i386-dis.c                            | 44 ++++++++++++++++++-
 opcodes/i386-gen.c                            |  1 +
 opcodes/i386-opc.h                            |  4 ++
 opcodes/i386-opc.tbl                          |  6 +++
 23 files changed, 351 insertions(+), 5 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/apx-push2pop2-inval.l
 create mode 100644 gas/testsuite/gas/i386/apx-push2pop2-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-push2pop2-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.l
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-push2pop2.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-apx-push2pop2.s
  

Comments

Jan Beulich Sept. 28, 2023, 11:37 a.m. UTC | #1
On 19.09.2023 17:25, Cui, Lili wrote:
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -5667,6 +5667,22 @@ md_assemble (char *line)
>        i.rex &= REX_OPCODE;
>      }
>  
> +  if (i.tm.opcode_modifier.push2pop2)
> +    {
> +      i.imm_operands = 0;

Why?

> +      unsigned int reg1 = register_number (i.op[0].regs);
> +      unsigned int reg2 = register_number (i.op[1].regs);
> +
> +      /* Push2/Pop2 cannot use RSP and Pop2 cannot pop two same registers.  */
> +      if (reg1 == 0x4 || reg2 == 0x4)
> +	as_bad (_("%s for `%s'"), _("invalid register operand"),
> +		insn_name (current_templates->start));
> +
> +      if ((i.tm.mnem_off == MN_pop2 || i.tm.mnem_off == MN_pop2p) && reg1 == reg2)

This would be easier with a single opcode check on the lhs of the &&.

> +	as_bad (_("%s for `%s'"), _("invalid register operand"),
> +		insn_name (current_templates->start));
> +    }

Both error messages want to be more specific, such that it's clear
what exactly is wrong. Also a string literal for %s is slightly odd,
and may pose issues to translators.

Furthermore there are pre-existing insns with restrictions on register
operands. I wonder whether these new checks wouldn't better be put
close to those.

> @@ -7100,7 +7116,11 @@ optimize_NDD_to_nonNDD (const insn_template *t)
>        && t->opcode_space == SPACE_EVEXMAP4
>        && i.reg_operands >= 2
>        && (i.types[i.operands - 1].bitfield.dword
> -	  || i.types[i.operands - 1].bitfield.qword))
> +	  || i.types[i.operands - 1].bitfield.qword)
> +      && (t->mnem_off != MN_pop2
> +	  && t->mnem_off != MN_pop2p
> +	  && t->mnem_off != MN_push2
> +	  && t->mnem_off != MN_push2p))

If an explicit check is needed here, why not use the push2pop2 attribute?

> @@ -8912,7 +8932,13 @@ build_modrm_byte (void)
>        dest = ~0;

This already sets dest ...

>      }
>    gas_assert (source < dest);
> -  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
> +  if (i.tm.opcode_modifier.push2pop2)
> +    {
> +      v = 1;
> +      dest = (unsigned int) ~0;

... to the intended value. Furthermore, doesn't ...

> +      source = 0;
> +    }
> +  else if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
>        && source != op)
>      {
>        unsigned int tmp = source;

... this logic already take care of the wanted swapping, provided the
templates get SwapSources added? Hmm, odd - you already have that
attribute on the two POP2 templates, but not the two PUSH2 ones. Yet both
use identical operand (encoding) order.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.d
> @@ -0,0 +1,29 @@
> +#as: --64
> +#objdump: -dw
> +#name: illegal decoding of APX-push2pop2 insns

Decoding cannot be illegal. Either you mean encoding, or you mean
"decoding of illegal APX-push2pop2 insn forms".

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.s
> @@ -0,0 +1,19 @@
> +# Check illegal bytecode of APX-Push2Pop2 instructions
> +# pop2 %rax, %rbx

What's illegal here? From ...

> +# pop2 %rax, %rsp
> +# push2 %rsp, %r17
> +# pop2 %r12, %r12
> +# pop2 %r31, %r31
> +
> +	.allow_index_reg
> +	.text
> +popnd0:
> +	.byte 0x62,0xF4,0x64,0x08,0x8F,0xC0

... the label name I guess EVEX.nd=0, but that needs saying. (As mentioned
earlier, the comments want moving next to the code emission anyway, and
.byte wants avoiding it at all possible.)

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.s
> @@ -0,0 +1,13 @@
> +# Check illegal APX-Push2Pop2 instructions
> +
> +	.allow_index_reg
> +	.text
> +_start:
> +	push2 %eax, %ebx
> +	pop2 %rax, %rsp
> +	push2 %rsp, %r17
> +	pop2 %r12, %r12
> +	push2p %eax, %ebx
> +	pop2p %rax, %rsp
> +	push2p %rsp, %r17
> +	pop2p %r12, %r12

Please make sure that at least one of the forms uses %rsp once as first
and once as second operand for both push and pop.

> --- a/opcodes/i386-dis-evex-x86.h
> +++ b/opcodes/i386-dis-evex-x86.h
> @@ -138,3 +138,13 @@
>      { Bad_Opcode },
>      { VEX_LEN_TABLE (VEX_LEN_0F3AF0) },
>    },
> +  /* X86_64_EVEX_MAP4_8F*/

Nit: Please make well-formed comments (...

> +  {
> +    { Bad_Opcode },
> +    { EVEX_LEN_TABLE (EVEX_LEN_MAP4_8F_X86_64) },
> +  },
> +  /* X86_64_EVEX_MAP4_FF_R_6*/

... i.e. also here and possibly elsewhere).

> @@ -1341,6 +1354,9 @@ enum
>    X86_64_EVEX_0F38F6,
>    X86_64_EVEX_0F38F7,
>    X86_64_EVEX_0F3AF0,
> +
> +  X86_64_EVEX_MAP4_8F,
> +  X86_64_EVEX_MAP4_FF_R_6,
>  };

This might indicate a problem in patch 4: Why is the x86-64 decode step
entirely missing there? Or, if correct there, why is it needed here? For
push and pop here I'd expect decode order and hence enumerators to be
entirely consistent.

> @@ -1537,7 +1553,10 @@ enum
>    EVEX_LEN_0F3A39,
>    EVEX_LEN_0F3A3A,
>    EVEX_LEN_0F3A3B,
> -  EVEX_LEN_0F3A43
> +  EVEX_LEN_0F3A43,
> +
> +  EVEX_LEN_MAP4_8F_X86_64,
> +  EVEX_LEN_MAP4_FF_R_6_X86_64,
>  };

Prior changes didn't find it necessary to handle EVEX.l through a table
lookup - why is this needed here? Map4 has uniform requirements, and an
earlier patch added respective checking, iirc.


> @@ -8757,10 +8779,24 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
>        dp = &prefix_table[dp->op[1].bytemode][vindex];
>        break;
>  
> +    case USE_X86_64_EVEX_PUSH2_TABLE:
> +    case USE_X86_64_EVEX_POP2_TABLE:
> +	ins->evex_type = evex_push2_pop2;
> +      unsigned int vvvv_reg = ins->vex.register_specifier
> +			      | !ins->vex.v << 4;
> +      unsigned int rm_reg = ins->modrm.rm + (ins->rex & REX_B ? 8 : 0)
> +			    + (ins->rex2 & REX_B ? 16 : 0);
> +      if (!ins->vex.b || vvvv_reg == 0x4 || rm_reg == 0x4
> +	  || (dp->op[0].bytemode == USE_X86_64_EVEX_POP2_TABLE
> +	      && vvvv_reg == rm_reg))
> +	  return &bad_opcode;
> +      goto use_x86_64_table;

I don't think this is the way to handle such restrictions. Since this
likely can't very well be handled in existing operand handlers, so far
the approach was to introduce new ...Fixup() ones. That'll also
produce more helpful output, e.g. "push2 (bad)", rather than ".byte ..."
with no hint at approximately what insn this was. New USE_..._TABLE
should imo really only appear when truly new tables are introduced.

>      case USE_X86_64_EVEX_FROM_VEX_TABLE:
>        ins->evex_type = evex_from_vex;
>        /* Fall through.  */
>      case USE_X86_64_TABLE:
> +use_x86_64_table:

While this is going to go away with the comment above, as a general
remark: Within a switch(), please align labels used by "goto" from one
case to another with the adjacent case label(s). Elsewhere, for
"diff -p", please indent labels by at least one blank.

> @@ -9570,7 +9606,8 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
>  	  /* Check whether rounding control was enabled for an insn not
>  	     supporting it.  */
>  	  if (ins.modrm.mod == 3 && ins.vex.b
> -	      && !(ins.evex_used & EVEX_b_used))
> +	      && !(ins.evex_used & EVEX_b_used)
> +	      && ins.evex_type != evex_push2_pop2)
>  	    {

Looks like addressing a comment on an earlier patch will render this
change (and hence the new evex_push2_pop2 enumerator) unnecessary. If
not, I'd ask whether you wouldn't better set EVEX_b_used at an
appropriate point.

> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -3555,3 +3555,9 @@ eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
>  
>  // FRED instructions end.
>  
> +// APX Push2/Pop2 instruction.
> +
> +push2, 0xff/6, APX_F|x64, Modrm|VexW0|EVex128|Push2Pop2|EVexMap4|VexVVVV|No_bSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> +push2p, 0xff/6, APX_F|x64, Modrm|VexW1|EVex128|Push2Pop2|EVexMap4|VexVVVV|No_bSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> +pop2, 0x8f/0, APX_F|x64, Modrm|VexW0|EVex128|Push2Pop2|SwapSources|EVexMap4|VexVVVV|No_bSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
> +pop2p, 0x8f/0, APX_F|x64, Modrm|VexW1|EVex128|Push2Pop2|SwapSources|EVexMap4|VexVVVV|No_bSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }

Missing No_wSuf in all 4 entries?

As to the suffixing 'p' - may I ask in how far that's aligned with other
assemblers? The doc doesn't even give a hint as to what mnemonic
representation should be used (personally I had been taking .x into
consideration). Furthermore the earlier REX2 patch also didn't deal with
the PPX functionality for PUSH/POP, unless I missed something.

Jan
  
Cui, Lili Oct. 30, 2023, 3:21 p.m. UTC | #2
> Subject: Re: [PATCH 6/8] Support APX Push2/Pop2
> 
> On 19.09.2023 17:25, Cui, Lili wrote:
> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -5667,6 +5667,22 @@ md_assemble (char *line)
> >        i.rex &= REX_OPCODE;
> >      }
> >
> > +  if (i.tm.opcode_modifier.push2pop2)
> > +    {
> > +      i.imm_operands = 0;
> 
> Why?

Removed, it is redundant.

> 
> > +      unsigned int reg1 = register_number (i.op[0].regs);
> > +      unsigned int reg2 = register_number (i.op[1].regs);
> > +
> > +      /* Push2/Pop2 cannot use RSP and Pop2 cannot pop two same
> registers.  */
> > +      if (reg1 == 0x4 || reg2 == 0x4)
> > +	as_bad (_("%s for `%s'"), _("invalid register operand"),
> > +		insn_name (current_templates->start));
> > +
> > +      if ((i.tm.mnem_off == MN_pop2 || i.tm.mnem_off == MN_pop2p) &&
> > + reg1 == reg2)
> 
> This would be easier with a single opcode check on the lhs of the &&.
> 

Done.

> > +	as_bad (_("%s for `%s'"), _("invalid register operand"),
> > +		insn_name (current_templates->start));
> > +    }
> 

> Both error messages want to be more specific, such that it's clear what exactly
> is wrong. Also a string literal for %s is slightly odd, and may pose issues to
> translators.
> 
> Furthermore there are pre-existing insns with restrictions on register
> operands. I wonder whether these new checks wouldn't better be put close to
> those.
> 

Done.

> > @@ -7100,7 +7116,11 @@ optimize_NDD_to_nonNDD (const insn_template
> *t)
> >        && t->opcode_space == SPACE_EVEXMAP4
> >        && i.reg_operands >= 2
> >        && (i.types[i.operands - 1].bitfield.dword
> > -	  || i.types[i.operands - 1].bitfield.qword))
> > +	  || i.types[i.operands - 1].bitfield.qword)
> > +      && (t->mnem_off != MN_pop2
> > +	  && t->mnem_off != MN_pop2p
> > +	  && t->mnem_off != MN_push2
> > +	  && t->mnem_off != MN_push2p))
> 
> If an explicit check is needed here, why not use the push2pop2 attribute?
> 

We will move it to NDD optimization encoding patch. and use i.tm.opcode_modifier.push2pop2 instead.

> > @@ -8912,7 +8932,13 @@ build_modrm_byte (void)
> >        dest = ~0;
> 
> This already sets dest ...
> 
> >      }
> >    gas_assert (source < dest);
> > -  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
> > +  if (i.tm.opcode_modifier.push2pop2)
> > +    {
> > +      v = 1;
> > +      dest = (unsigned int) ~0;
> 
> ... to the intended value. Furthermore, doesn't ...
> 
> > +      source = 0;
> > +    }
> > +  else if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
> >        && source != op)
> >      {
> >        unsigned int tmp = source;
> 
> ... this logic already take care of the wanted swapping, provided the templates
> get SwapSources added? Hmm, odd - you already have that attribute on the
> two POP2 templates, but not the two PUSH2 ones. Yet both use identical
> operand (encoding) order.

Push2 and Pop2 have extension opcode,  so they don't need to change anything in this function, and removed SWAP_SOURCES for POP2.

      if (i.tm.extension_opcode != None)
        {
          if (dest != source)
            v = dest;
          dest = ~0;

> 
> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.d
> > @@ -0,0 +1,29 @@
> > +#as: --64
> > +#objdump: -dw
> > +#name: illegal decoding of APX-push2pop2 insns
> 
> Decoding cannot be illegal. Either you mean encoding, or you mean "decoding
> of illegal APX-push2pop2 insn forms".
> 

It should be "illegal encoding of APX-push2pop2 insns".

> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.s
> > @@ -0,0 +1,19 @@
> > +# Check illegal bytecode of APX-Push2Pop2 instructions # pop2 %rax,
> > +%rbx
> 
> What's illegal here? From ...
> 

> > +# pop2 %rax, %rsp
> > +# push2 %rsp, %r17
> > +# pop2 %r12, %r12
> > +# pop2 %r31, %r31
> > +
> > +	.allow_index_reg
> > +	.text
> > +popnd0:
> > +	.byte 0x62,0xF4,0x64,0x08,0x8F,0xC0
> 
> ... the label name I guess EVEX.nd=0, but that needs saying. (As mentioned
> earlier, the comments want moving next to the code emission anyway,
> and .byte wants avoiding it at all possible.)
> 

Removed this invalid test file, some instructions were duplicated with tx86-64-apx-push2pop2-inval.s and moved some .byte (unavoidable but maybe not necessary) tests to x86-64-apx-evex-promoted-bad.s

> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.s
> > @@ -0,0 +1,13 @@
> > +# Check illegal APX-Push2Pop2 instructions
> > +
> > +	.allow_index_reg
> > +	.text
> > +_start:
> > +	push2 %eax, %ebx
> > +	pop2 %rax, %rsp
> > +	push2 %rsp, %r17
> > +	pop2 %r12, %r12
> > +	push2p %eax, %ebx
> > +	pop2p %rax, %rsp
> > +	push2p %rsp, %r17
> > +	pop2p %r12, %r12
> 
> Please make sure that at least one of the forms uses %rsp once as first and
> once as second operand for both push and pop.
> 

Done.

> > --- a/opcodes/i386-dis-evex-x86.h
> > +++ b/opcodes/i386-dis-evex-x86.h
> > @@ -138,3 +138,13 @@
> >      { Bad_Opcode },
> >      { VEX_LEN_TABLE (VEX_LEN_0F3AF0) },
> >    },
> > +  /* X86_64_EVEX_MAP4_8F*/
> 
> Nit: Please make well-formed comments (...
> 

Done.

> > +  {
> > +    { Bad_Opcode },
> > +    { EVEX_LEN_TABLE (EVEX_LEN_MAP4_8F_X86_64) },  },
> > +  /* X86_64_EVEX_MAP4_FF_R_6*/
> 
> ... i.e. also here and possibly elsewhere).
> 

Done.

> > @@ -1341,6 +1354,9 @@ enum
> >    X86_64_EVEX_0F38F6,
> >    X86_64_EVEX_0F38F7,
> >    X86_64_EVEX_0F3AF0,
> > +
> > +  X86_64_EVEX_MAP4_8F,
> > +  X86_64_EVEX_MAP4_FF_R_6,
> >  };
> 
> This might indicate a problem in patch 4: Why is the x86-64 decode step
> entirely missing there? Or, if correct there, why is it needed here? For push
> and pop here I'd expect decode order and hence enumerators to be entirely
> consistent.
> 

Confirmed with patch 2/8 and 4/8, all MAP4 are missing in x86-64 table. Added x86-64 check for all MAP4 in get_valid_dis386. Deleted push2 and  pop2 here.

        case 0x4:
          vex_table_index = EVEX_MAP4;
          ins->evex_type = evex_from_legacy;+
 +       if (ins->address_mode != mode_64bit)
 +         return &bad_opcode;
          break;

> > @@ -1537,7 +1553,10 @@ enum
> >    EVEX_LEN_0F3A39,
> >    EVEX_LEN_0F3A3A,
> >    EVEX_LEN_0F3A3B,
> > -  EVEX_LEN_0F3A43
> > +  EVEX_LEN_0F3A43,
> > +
> > +  EVEX_LEN_MAP4_8F_X86_64,
> > +  EVEX_LEN_MAP4_FF_R_6_X86_64,
> >  };
> 
> Prior changes didn't find it necessary to handle EVEX.l through a table lookup
> - why is this needed here? Map4 has uniform requirements, and an earlier
> patch added respective checking, iirc.
> 

Removed EVEX.l , and added check for it.

> > @@ -8757,10 +8779,24 @@ get_valid_dis386 (const struct dis386 *dp,
> instr_info *ins)
> >        dp = &prefix_table[dp->op[1].bytemode][vindex];
> >        break;
> >
> > +    case USE_X86_64_EVEX_PUSH2_TABLE:
> > +    case USE_X86_64_EVEX_POP2_TABLE:
> > +	ins->evex_type = evex_push2_pop2;
> > +      unsigned int vvvv_reg = ins->vex.register_specifier
> > +			      | !ins->vex.v << 4;
> > +      unsigned int rm_reg = ins->modrm.rm + (ins->rex & REX_B ? 8 : 0)
> > +			    + (ins->rex2 & REX_B ? 16 : 0);
> > +      if (!ins->vex.b || vvvv_reg == 0x4 || rm_reg == 0x4
> > +	  || (dp->op[0].bytemode == USE_X86_64_EVEX_POP2_TABLE
> > +	      && vvvv_reg == rm_reg))
> > +	  return &bad_opcode;
> > +      goto use_x86_64_table;
> 
> I don't think this is the way to handle such restrictions. Since this likely can't
> very well be handled in existing operand handlers, so far the approach was to
> introduce new ...Fixup() ones. That'll also produce more helpful output, e.g.
> "push2 (bad)", rather than ".byte ..."
> with no hint at approximately what insn this was. New USE_..._TABLE should
> imo really only appear when truly new tables are introduced.
> 

Put them in PUSH2_POP2_Fixup and removed USE_X86_64_EVEX_PUSH2_TABLE and USE_X86_64_EVEX_POP2_TABLE.

> >      case USE_X86_64_EVEX_FROM_VEX_TABLE:
> >        ins->evex_type = evex_from_vex;
> >        /* Fall through.  */
> >      case USE_X86_64_TABLE:
> > +use_x86_64_table:
> 
> While this is going to go away with the comment above, as a general
> remark: Within a switch(), please align labels used by "goto" from one case to
> another with the adjacent case label(s). Elsewhere, for "diff -p", please indent
> labels by at least one blank.
> 

Yes, they were deleted.

> > @@ -9570,7 +9606,8 @@ print_insn (bfd_vma pc, disassemble_info *info,
> int intel_syntax)
> >  	  /* Check whether rounding control was enabled for an insn not
> >  	     supporting it.  */
> >  	  if (ins.modrm.mod == 3 && ins.vex.b
> > -	      && !(ins.evex_used & EVEX_b_used))
> > +	      && !(ins.evex_used & EVEX_b_used)
> > +	      && ins.evex_type != evex_push2_pop2)
> >  	    {
> 
> Looks like addressing a comment on an earlier patch will render this change
> (and hence the new evex_push2_pop2 enumerator) unnecessary. If not, I'd
> ask whether you wouldn't better set EVEX_b_used at an appropriate point.
> 

Yes, it is redundant here, removed it. 

> > --- a/opcodes/i386-opc.tbl
> > +++ b/opcodes/i386-opc.tbl
> > @@ -3555,3 +3555,9 @@ eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
> >
> >  // FRED instructions end.
> >
> > +// APX Push2/Pop2 instruction.
> > +
> > +push2, 0xff/6, APX_F|x64,
> >
> +Modrm|VexW0|EVex128|Push2Pop2|EVexMap4|VexVVVV|No_bSuf|No_lSuf
> |No_sSu
> > +f, { Reg64, Reg64 } push2p, 0xff/6, APX_F|x64,
> >
> +Modrm|VexW1|EVex128|Push2Pop2|EVexMap4|VexVVVV|No_bSuf|No_lSuf
> |No_sSu
> > +f, { Reg64, Reg64 } pop2, 0x8f/0, APX_F|x64,
> >
> +Modrm|VexW0|EVex128|Push2Pop2|SwapSources|EVexMap4|VexVVVV|No_
> bSuf|No
> > +_lSuf|No_sSuf, { Reg64, Reg64 } pop2p, 0x8f/0, APX_F|x64,
> >
> +Modrm|VexW1|EVex128|Push2Pop2|SwapSources|EVexMap4|VexVVVV|No_
> bSuf|No
> > +_lSuf|No_sSuf, { Reg64, Reg64 }
> 
> Missing No_wSuf in all 4 entries?
>
 
Added.

> As to the suffixing 'p' - may I ask in how far that's aligned with other
> assemblers? The doc doesn't even give a hint as to what mnemonic
> representation should be used (personally I had been taking .x into
> consideration). Furthermore the earlier REX2 patch also didn't deal with the
> PPX functionality for PUSH/POP, unless I missed something.
>

Pop2p and Push2p are listed in sections 9.1 and 9.2 of the doc. PPX functionality for PUSH/POP is not implemented in this patch, It will be implemented separately later. I'll note it in the Push2/pop2 commit log.

EVEX.LLZ.NP.MAP4.W1 8F 11:000:bbb
POP2P {NF=0} {ND=1} r64, r64

EVEX.LLZ.NP.MAP4.W1 FF 11:110:bbb
PUSH2P {NF=0} {ND=1} r64, r64

Thanks,
Lili.
  
Jan Beulich Oct. 30, 2023, 3:31 p.m. UTC | #3
On 30.10.2023 16:21, Cui, Lili wrote:
>> As to the suffixing 'p' - may I ask in how far that's aligned with other
>> assemblers? The doc doesn't even give a hint as to what mnemonic
>> representation should be used (personally I had been taking .x into
>> consideration). Furthermore the earlier REX2 patch also didn't deal with the
>> PPX functionality for PUSH/POP, unless I missed something.
>>
> 
> Pop2p and Push2p are listed in sections 9.1 and 9.2 of the doc. PPX functionality for PUSH/POP is not implemented in this patch, It will be implemented separately later. I'll note it in the Push2/pop2 commit log.
> 
> EVEX.LLZ.NP.MAP4.W1 8F 11:000:bbb
> POP2P {NF=0} {ND=1} r64, r64
> 
> EVEX.LLZ.NP.MAP4.W1 FF 11:110:bbb
> PUSH2P {NF=0} {ND=1} r64, r64

Oh, I see - somehow I managed to overlook that "P" there. Does that mean the
PPX forms of PUSH/POP and then going to be PUSHP/POPP (there's no mention of
such mnemonics in the doc, afaics)?

Jan
  
Cui, Lili Nov. 20, 2023, 1:05 p.m. UTC | #4
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Monday, October 30, 2023 11:31 PM
> To: Cui, Lili <lili.cui@intel.com>
> Cc: Lu, Hongjiu <hongjiu.lu@intel.com>; Mo, Zewei <zewei.mo@intel.com>;
> binutils@sourceware.org
> Subject: Re: [PATCH 6/8] Support APX Push2/Pop2
> 
> On 30.10.2023 16:21, Cui, Lili wrote:
> >> As to the suffixing 'p' - may I ask in how far that's aligned with
> >> other assemblers? The doc doesn't even give a hint as to what
> >> mnemonic representation should be used (personally I had been taking
> >> .x into consideration). Furthermore the earlier REX2 patch also
> >> didn't deal with the PPX functionality for PUSH/POP, unless I missed
> something.
> >>
> >
> > Pop2p and Push2p are listed in sections 9.1 and 9.2 of the doc. PPX
> functionality for PUSH/POP is not implemented in this patch, It will be
> implemented separately later. I'll note it in the Push2/pop2 commit log.
> >
> > EVEX.LLZ.NP.MAP4.W1 8F 11:000:bbb
> > POP2P {NF=0} {ND=1} r64, r64
> >
> > EVEX.LLZ.NP.MAP4.W1 FF 11:110:bbb
> > PUSH2P {NF=0} {ND=1} r64, r64
> 
> Oh, I see - somehow I managed to overlook that "P" there. Does that mean
> the PPX forms of PUSH/POP and then going to be PUSHP/POPP (there's no
> mention of such mnemonics in the doc, afaics)?
> 
Yes, the PPX form of PUSH/POP is PUSHP/POPP and it will be included in the doc, maybe next release.

Thanks,
Lili.
  

Patch

diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index fba97ae37d8..dd3af5dd2d5 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -5667,6 +5667,22 @@  md_assemble (char *line)
       i.rex &= REX_OPCODE;
     }
 
+  if (i.tm.opcode_modifier.push2pop2)
+    {
+      i.imm_operands = 0;
+      unsigned int reg1 = register_number (i.op[0].regs);
+      unsigned int reg2 = register_number (i.op[1].regs);
+
+      /* Push2/Pop2 cannot use RSP and Pop2 cannot pop two same registers.  */
+      if (reg1 == 0x4 || reg2 == 0x4)
+	as_bad (_("%s for `%s'"), _("invalid register operand"),
+		insn_name (current_templates->start));
+
+      if ((i.tm.mnem_off == MN_pop2 || i.tm.mnem_off == MN_pop2p) && reg1 == reg2)
+	as_bad (_("%s for `%s'"), _("invalid register operand"),
+		insn_name (current_templates->start));
+    }
+
   /* Handle conversion of 'int $3' --> special int3 insn.  */
   if (i.tm.mnem_off == MN_int
       && i.op[0].imms->X_add_number == 3)
@@ -7100,7 +7116,11 @@  optimize_NDD_to_nonNDD (const insn_template *t)
       && t->opcode_space == SPACE_EVEXMAP4
       && i.reg_operands >= 2
       && (i.types[i.operands - 1].bitfield.dword
-	  || i.types[i.operands - 1].bitfield.qword))
+	  || i.types[i.operands - 1].bitfield.qword)
+      && (t->mnem_off != MN_pop2
+	  && t->mnem_off != MN_pop2p
+	  && t->mnem_off != MN_push2
+	  && t->mnem_off != MN_push2p))
     {
       int tmp_flag = -1;
       int dest = i.operands - 1;
@@ -8912,7 +8932,13 @@  build_modrm_byte (void)
       dest = ~0;
     }
   gas_assert (source < dest);
-  if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
+  if (i.tm.opcode_modifier.push2pop2)
+    {
+      v = 1;
+      dest = (unsigned int) ~0;
+      source = 0;
+    }
+  else if (i.tm.opcode_modifier.operandconstraint == SWAP_SOURCES
       && source != op)
     {
       unsigned int tmp = source;
diff --git a/gas/testsuite/gas/i386/apx-push2pop2-inval.l b/gas/testsuite/gas/i386/apx-push2pop2-inval.l
new file mode 100644
index 00000000000..a55a71520c8
--- /dev/null
+++ b/gas/testsuite/gas/i386/apx-push2pop2-inval.l
@@ -0,0 +1,5 @@ 
+.* Assembler messages:
+.*:6: Error: `push2' is only supported in 64-bit mode
+.*:7: Error: `push2p' is only supported in 64-bit mode
+.*:8: Error: `pop2' is only supported in 64-bit mode
+.*:9: Error: `pop2p' is only supported in 64-bit mode
diff --git a/gas/testsuite/gas/i386/apx-push2pop2-inval.s b/gas/testsuite/gas/i386/apx-push2pop2-inval.s
new file mode 100644
index 00000000000..77166327ed1
--- /dev/null
+++ b/gas/testsuite/gas/i386/apx-push2pop2-inval.s
@@ -0,0 +1,9 @@ 
+# Check 32bit APX-PUSH2/POP2 instructions
+
+	.allow_index_reg
+	.text
+_start:
+	push2 %rax, %rbx
+	push2p %rax, %rbx
+	pop2 %rax, %rbx
+	pop2p %rax, %rbx
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index d16eb888f24..7e0ad339141 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -507,6 +507,7 @@  if [gas_32_check] then {
     run_dump_test "sm4"
     run_dump_test "sm4-intel"
     run_list_test "pbndkb-inval"
+    run_list_test "apx-push2pop2-inval"
     run_list_test "sg"
     run_dump_test "clzero"
     run_dump_test "invlpgb"
diff --git a/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.d b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.d
new file mode 100644
index 00000000000..e63a44554a0
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.d
@@ -0,0 +1,29 @@ 
+#as: --64
+#objdump: -dw
+#name: illegal decoding of APX-push2pop2 insns
+#source: x86-64-apx-push2pop2-decode-inval.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <popnd0>:
+[ 	]*[a-f0-9]+:	62 f4 64             	\(bad\)
+[ 	]*[a-f0-9]+:	08                   	.byte 0x8
+[ 	]*[a-f0-9]+:	8f c0                	pop    %rax
+
+0+6 <poprspvvvv>:
+[ 	]*[a-f0-9]+:	62 f4 5c 18 8f       	\(bad\)
+[ 	]*[a-f0-9]+:	c0                   	.byte 0xc0
+
+0+c <pushrsprm>:
+[ 	]*[a-f0-9]+:	62 f4 74 10 ff       	\(bad\)
+[ 	]*[a-f0-9]+:	f4                   	hlt
+
+0+12 <popsamereg>:
+[ 	]*[a-f0-9]+:	62 d4 1c 18 8f       	\(bad\)
+[ 	]*[a-f0-9]+:	c4                   	.byte 0xc4
+
+0+18 <popsameegpr32>:
+[ 	]*[a-f0-9]+:	62 dc 04 10 8f       	\(bad\)
+[ 	]*[a-f0-9]+:	c7                   	.byte 0xc7
diff --git a/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.s b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.s
new file mode 100644
index 00000000000..ac7296bc94d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-decode-inval.s
@@ -0,0 +1,19 @@ 
+# Check illegal bytecode of APX-Push2Pop2 instructions
+# pop2 %rax, %rbx
+# pop2 %rax, %rsp
+# push2 %rsp, %r17
+# pop2 %r12, %r12
+# pop2 %r31, %r31
+
+	.allow_index_reg
+	.text
+popnd0:
+	.byte 0x62,0xF4,0x64,0x08,0x8F,0xC0
+poprspvvvv:
+	.byte 0x62,0xF4,0x5C,0x18,0x8F,0xC0
+pushrsprm:
+	.byte 0x62,0xF4,0x74,0x10,0xFF,0xF4
+popsamereg:
+	.byte 0x62,0xD4,0x1C,0x18,0x8F,0xC4
+popsameegpr32:
+	.byte 0x62,0xDC,0x04,0x10,0x8F,0xC7
diff --git a/gas/testsuite/gas/i386/x86-64-apx-push2pop2-intel.d b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-intel.d
new file mode 100644
index 00000000000..46b21219582
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-intel.d
@@ -0,0 +1,42 @@ 
+#as: --64
+#objdump: -dw -Mintel
+#name: i386 APX-push2pop2 insns (Intel disassembly)
+#source: x86-64-apx-push2pop2.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*62 f4 7c 18 ff f3\s+push2\s+rax,rbx
+\s*[a-f0-9]+:\s*62 fc 3c 18 ff f1\s+push2\s+r8,r17
+\s*[a-f0-9]+:\s*62 d4 04 10 ff f1\s+push2\s+r31,r9
+\s*[a-f0-9]+:\s*62 dc 3c 10 ff f7\s+push2\s+r24,r31
+\s*[a-f0-9]+:\s*62 f4 fc 18 ff f3\s+push2p\s+rax,rbx
+\s*[a-f0-9]+:\s*62 fc bc 18 ff f1\s+push2p\s+r8,r17
+\s*[a-f0-9]+:\s*62 d4 84 10 ff f1\s+push2p\s+r31,r9
+\s*[a-f0-9]+:\s*62 dc bc 10 ff f7\s+push2p\s+r24,r31
+\s*[a-f0-9]+:\s*62 f4 64 18 8f c0\s+pop2\s+rbx,rax
+\s*[a-f0-9]+:\s*62 d4 74 10 8f c0\s+pop2\s+r17,r8
+\s*[a-f0-9]+:\s*62 dc 34 18 8f c7\s+pop2\s+r9,r31
+\s*[a-f0-9]+:\s*62 dc 04 10 8f c0\s+pop2\s+r31,r24
+\s*[a-f0-9]+:\s*62 f4 e4 18 8f c0\s+pop2p\s+rbx,rax
+\s*[a-f0-9]+:\s*62 d4 f4 10 8f c0\s+pop2p\s+r17,r8
+\s*[a-f0-9]+:\s*62 dc b4 18 8f c7\s+pop2p\s+r9,r31
+\s*[a-f0-9]+:\s*62 dc 84 10 8f c0\s+pop2p\s+r31,r24
+\s*[a-f0-9]+:\s*62 f4 7c 18 ff f3\s+push2\s+rax,rbx
+\s*[a-f0-9]+:\s*62 fc 3c 18 ff f1\s+push2\s+r8,r17
+\s*[a-f0-9]+:\s*62 d4 04 10 ff f1\s+push2\s+r31,r9
+\s*[a-f0-9]+:\s*62 dc 3c 10 ff f7\s+push2\s+r24,r31
+\s*[a-f0-9]+:\s*62 f4 fc 18 ff f3\s+push2p\s+rax,rbx
+\s*[a-f0-9]+:\s*62 fc bc 18 ff f1\s+push2p\s+r8,r17
+\s*[a-f0-9]+:\s*62 d4 84 10 ff f1\s+push2p\s+r31,r9
+\s*[a-f0-9]+:\s*62 dc bc 10 ff f7\s+push2p\s+r24,r31
+\s*[a-f0-9]+:\s*62 f4 64 18 8f c0\s+pop2\s+rbx,rax
+\s*[a-f0-9]+:\s*62 d4 74 10 8f c0\s+pop2\s+r17,r8
+\s*[a-f0-9]+:\s*62 dc 34 18 8f c7\s+pop2\s+r9,r31
+\s*[a-f0-9]+:\s*62 dc 04 10 8f c0\s+pop2\s+r31,r24
+\s*[a-f0-9]+:\s*62 f4 e4 18 8f c0\s+pop2p\s+rbx,rax
+\s*[a-f0-9]+:\s*62 d4 f4 10 8f c0\s+pop2p\s+r17,r8
+\s*[a-f0-9]+:\s*62 dc b4 18 8f c7\s+pop2p\s+r9,r31
+\s*[a-f0-9]+:\s*62 dc 84 10 8f c0\s+pop2p\s+r31,r24
diff --git a/gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.l b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.l
new file mode 100644
index 00000000000..61a9672edcd
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.l
@@ -0,0 +1,9 @@ 
+.* Assembler messages:
+.*:6: Error: operand size mismatch for `push2'
+.*:7: Error: invalid register operand for `pop2'
+.*:8: Error: invalid register operand for `push2'
+.*:9: Error: invalid register operand for `pop2'
+.*:10: Error: operand size mismatch for `push2p'
+.*:11: Error: invalid register operand for `pop2p'
+.*:12: Error: invalid register operand for `push2p'
+.*:13: Error: invalid register operand for `pop2p'
diff --git a/gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.s b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.s
new file mode 100644
index 00000000000..c36bd5df88b
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2-inval.s
@@ -0,0 +1,13 @@ 
+# Check illegal APX-Push2Pop2 instructions
+
+	.allow_index_reg
+	.text
+_start:
+	push2 %eax, %ebx
+	pop2 %rax, %rsp
+	push2 %rsp, %r17
+	pop2 %r12, %r12
+	push2p %eax, %ebx
+	pop2p %rax, %rsp
+	push2p %rsp, %r17
+	pop2p %r12, %r12
diff --git a/gas/testsuite/gas/i386/x86-64-apx-push2pop2.d b/gas/testsuite/gas/i386/x86-64-apx-push2pop2.d
new file mode 100644
index 00000000000..54f22a7f94e
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2.d
@@ -0,0 +1,42 @@ 
+#as: --64
+#objdump: -dw
+#name: x86_64 APX-push2pop2 insns
+#source: x86-64-apx-push2pop2.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*62 f4 7c 18 ff f3\s+push2\s+%rbx,%rax
+\s*[a-f0-9]+:\s*62 fc 3c 18 ff f1\s+push2\s+%r17,%r8
+\s*[a-f0-9]+:\s*62 d4 04 10 ff f1\s+push2\s+%r9,%r31
+\s*[a-f0-9]+:\s*62 dc 3c 10 ff f7\s+push2\s+%r31,%r24
+\s*[a-f0-9]+:\s*62 f4 fc 18 ff f3\s+push2p\s+%rbx,%rax
+\s*[a-f0-9]+:\s*62 fc bc 18 ff f1\s+push2p\s+%r17,%r8
+\s*[a-f0-9]+:\s*62 d4 84 10 ff f1\s+push2p\s+%r9,%r31
+\s*[a-f0-9]+:\s*62 dc bc 10 ff f7\s+push2p\s+%r31,%r24
+\s*[a-f0-9]+:\s*62 f4 64 18 8f c0\s+pop2\s+%rax,%rbx
+\s*[a-f0-9]+:\s*62 d4 74 10 8f c0\s+pop2\s+%r8,%r17
+\s*[a-f0-9]+:\s*62 dc 34 18 8f c7\s+pop2\s+%r31,%r9
+\s*[a-f0-9]+:\s*62 dc 04 10 8f c0\s+pop2\s+%r24,%r31
+\s*[a-f0-9]+:\s*62 f4 e4 18 8f c0\s+pop2p\s+%rax,%rbx
+\s*[a-f0-9]+:\s*62 d4 f4 10 8f c0\s+pop2p\s+%r8,%r17
+\s*[a-f0-9]+:\s*62 dc b4 18 8f c7\s+pop2p\s+%r31,%r9
+\s*[a-f0-9]+:\s*62 dc 84 10 8f c0\s+pop2p\s+%r24,%r31
+\s*[a-f0-9]+:\s*62 f4 7c 18 ff f3\s+push2\s+%rbx,%rax
+\s*[a-f0-9]+:\s*62 fc 3c 18 ff f1\s+push2\s+%r17,%r8
+\s*[a-f0-9]+:\s*62 d4 04 10 ff f1\s+push2\s+%r9,%r31
+\s*[a-f0-9]+:\s*62 dc 3c 10 ff f7\s+push2\s+%r31,%r24
+\s*[a-f0-9]+:\s*62 f4 fc 18 ff f3\s+push2p\s+%rbx,%rax
+\s*[a-f0-9]+:\s*62 fc bc 18 ff f1\s+push2p\s+%r17,%r8
+\s*[a-f0-9]+:\s*62 d4 84 10 ff f1\s+push2p\s+%r9,%r31
+\s*[a-f0-9]+:\s*62 dc bc 10 ff f7\s+push2p\s+%r31,%r24
+\s*[a-f0-9]+:\s*62 f4 64 18 8f c0\s+pop2\s+%rax,%rbx
+\s*[a-f0-9]+:\s*62 d4 74 10 8f c0\s+pop2\s+%r8,%r17
+\s*[a-f0-9]+:\s*62 dc 34 18 8f c7\s+pop2\s+%r31,%r9
+\s*[a-f0-9]+:\s*62 dc 04 10 8f c0\s+pop2\s+%r24,%r31
+\s*[a-f0-9]+:\s*62 f4 e4 18 8f c0\s+pop2p\s+%rax,%rbx
+\s*[a-f0-9]+:\s*62 d4 f4 10 8f c0\s+pop2p\s+%r8,%r17
+\s*[a-f0-9]+:\s*62 dc b4 18 8f c7\s+pop2p\s+%r31,%r9
+\s*[a-f0-9]+:\s*62 dc 84 10 8f c0\s+pop2p\s+%r24,%r31
diff --git a/gas/testsuite/gas/i386/x86-64-apx-push2pop2.s b/gas/testsuite/gas/i386/x86-64-apx-push2pop2.s
new file mode 100644
index 00000000000..4cfc0a2185f
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-apx-push2pop2.s
@@ -0,0 +1,39 @@ 
+# Check 64bit APX-Push2Pop2 instructions
+
+	.allow_index_reg
+	.text
+_start:
+	push2 %rbx, %rax
+	push2 %r17, %r8
+	push2 %r9, %r31
+	push2 %r31, %r24
+	push2p %rbx, %rax
+	push2p %r17, %r8
+	push2p %r9, %r31
+	push2p %r31, %r24
+	pop2 %rax, %rbx
+	pop2 %r8, %r17
+	pop2 %r31, %r9
+	pop2 %r24, %r31
+	pop2p %rax, %rbx
+	pop2p %r8, %r17
+	pop2p %r31, %r9
+	pop2p %r24, %r31
+
+.intel_syntax noprefix
+	push2 rax, rbx
+	push2 r8, r17
+	push2 r31, r9
+	push2 r24, r31
+	push2p rax, rbx
+	push2p r8, r17
+	push2p r31, r9
+	push2p r24, r31
+	pop2 rbx, rax
+	pop2 r17, r8
+	pop2 r9, r31
+	pop2 r31, r24
+	pop2p rbx, rax
+	pop2p r17, r8
+	pop2p r9, r31
+	pop2p r31, r24
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index c48430ca7cb..2d3a0387497 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -342,6 +342,10 @@  run_dump_test "x86-64-avx512dq-rcigrd-intel"
 run_dump_test "x86-64-avx512dq-rcigrd"
 run_dump_test "x86-64-avx512dq-rcigrne-intel"
 run_dump_test "x86-64-avx512dq-rcigrne"
+run_dump_test "x86-64-apx-push2pop2"
+run_dump_test "x86-64-apx-push2pop2-intel"
+run_list_test "x86-64-apx-push2pop2-inval"
+run_dump_test "x86-64-apx-push2pop2-decode-inval"
 run_dump_test "x86-64-avx512dq-rcigru-intel"
 run_dump_test "x86-64-avx512dq-rcigru"
 run_dump_test "x86-64-avx512dq-rcigrz-intel"
diff --git a/opcodes/i386-dis-evex-len.h b/opcodes/i386-dis-evex-len.h
index 1933a045822..7c9b921bf6b 100644
--- a/opcodes/i386-dis-evex-len.h
+++ b/opcodes/i386-dis-evex-len.h
@@ -155,4 +155,14 @@  static const struct dis386 evex_len_table[][3] = {
     { VEX_W_TABLE (EVEX_W_0F3A43_L_n) },
     { VEX_W_TABLE (EVEX_W_0F3A43_L_n) },
   },
+
+  /* EVEX_LEN_MAP4_8F_X86_64 */
+  {
+    { MOD_TABLE (MOD_EVEX_MAP4_8F_X86_64_L_0) },
+  },
+
+  /* EVEX_LEN_MAP4_FF_R_6_X86_64 */
+  {
+    { MOD_TABLE (MOD_EVEX_MAP4_FF_R_6_X86_64_L_0) },
+  },
 };
diff --git a/opcodes/i386-dis-evex-mod.h b/opcodes/i386-dis-evex-mod.h
index 5a1326a1b73..ad7b0514720 100644
--- a/opcodes/i386-dis-evex-mod.h
+++ b/opcodes/i386-dis-evex-mod.h
@@ -7,6 +7,11 @@ 
   {
     { "wrssK",		{ M, Gdq }, 0 },
   },
+  /* MOD_EVEX_MAP4_8F_X86_64_L_0 */
+  {
+    { Bad_Opcode },
+    { REG_TABLE (REG_EVEX_MAP4_8F_X86_64_L_0_M_1) },
+  },
   /* MOD_EVEX_MAP4_DA_PREFIX_1 */
   {
     { Bad_Opcode },
@@ -49,3 +54,8 @@ 
   {
     { "movdiri",	{ Edq, Gdq }, 0 },
   },
+  /* MOD_EVEX_MAP4_FF_R_6_X86_64_L_0 */
+  {
+    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_FF_R_6_X86_64_L_0_M_1) },
+  },
diff --git a/opcodes/i386-dis-evex-prefix.h b/opcodes/i386-dis-evex-prefix.h
index 210783d7e88..5b2dff379bd 100644
--- a/opcodes/i386-dis-evex-prefix.h
+++ b/opcodes/i386-dis-evex-prefix.h
@@ -356,6 +356,10 @@ 
     { "adoxS",	{ VexGdq, Gdq, Edq }, 0 },
     { "adcxS",	{ VexGdq, Gdq, Edq }, 0 },
   },
+  /* PREFIX_EVEX_MAP4_8F_X86_64_L_0_M_1_R_0 */
+  {
+    { VEX_W_TABLE (EVEX_W_MAP4_8F_X86_64_L_0_M_1_R_0_P_0) }
+  },
   /* PREFIX_EVEX_MAP4_D8 */
   {
     { "sha1nexte", { XM, EXxmm }, 0 },
@@ -421,6 +425,10 @@ 
     { "aand",	{ Mdq, Gdq }, 0 },
     { "aor",	{ Mdq, Gdq }, 0 },
   },
+  /* PREFIX_EVEX_MAP4_FF_R_6_X86_64_L_0_M_1 */
+  {
+    { VEX_W_TABLE (EVEX_W_MAP4_FF_R_6_X86_64_L_0_M_1_P_0) },
+  },
   /* PREFIX_EVEX_MAP5_10 */
   {
     { Bad_Opcode },
diff --git a/opcodes/i386-dis-evex-reg.h b/opcodes/i386-dis-evex-reg.h
index d00c2843e12..f9d313a78e1 100644
--- a/opcodes/i386-dis-evex-reg.h
+++ b/opcodes/i386-dis-evex-reg.h
@@ -89,6 +89,10 @@ 
     { "xorQ",	{ VexGv, Ev, sIb }, 0 },
     { Bad_Opcode },
   },
+  /* REG_EVEX_MAP4_8F_X86_64_L_0_M_1 */
+  {
+    { PREFIX_TABLE (PREFIX_EVEX_MAP4_8F_X86_64_L_0_M_1_R_0) },
+  },
   /* REG_EVEX_MAP4_C0 */
   {
     { "rolA",	{ VexGb, Eb, Ib }, 0 },
@@ -185,4 +189,9 @@ 
   {
     { "incQ",   { VexGv ,Ev }, 0 },
     { "decQ",   { VexGv ,Ev }, 0 },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { X86_64_EVEX_PUSH2_TABLE (X86_64_EVEX_MAP4_FF_R_6) },
   },
diff --git a/opcodes/i386-dis-evex-w.h b/opcodes/i386-dis-evex-w.h
index b828277d413..da238764d59 100644
--- a/opcodes/i386-dis-evex-w.h
+++ b/opcodes/i386-dis-evex-w.h
@@ -442,6 +442,16 @@ 
     { Bad_Opcode },
     { "vpshrdw",   { XM, Vex, EXx, Ib }, 0 },
   },
+  /* EVEX_W_MAP4_8F_X86_64_L_0_M_1_R_0_P_0 */
+  {
+    { "pop2", { VexGq, Eq }, 0 },
+    { "pop2p", { VexGq, Eq }, 0 },
+  },
+  /* EVEX_W_MAP4_FF_R_6_X86_64_L_0_M_1_P_0 */
+  {
+    { "push2", { VexGq, Eq }, 0 },
+    { "push2p", { VexGq, Eq }, 0 },
+  },
   /* EVEX_W_MAP5_5B_P_0 */
   {
     { "vcvtdq2ph%XY",	{ XMxmmq, EXx, EXxEVexR }, 0 },
diff --git a/opcodes/i386-dis-evex-x86.h b/opcodes/i386-dis-evex-x86.h
index 1121223d877..b603f1af882 100644
--- a/opcodes/i386-dis-evex-x86.h
+++ b/opcodes/i386-dis-evex-x86.h
@@ -138,3 +138,13 @@ 
     { Bad_Opcode },
     { VEX_LEN_TABLE (VEX_LEN_0F3AF0) },
   },
+  /* X86_64_EVEX_MAP4_8F*/
+  {
+    { Bad_Opcode },
+    { EVEX_LEN_TABLE (EVEX_LEN_MAP4_8F_X86_64) },
+  },
+  /* X86_64_EVEX_MAP4_FF_R_6*/
+  {
+    { Bad_Opcode },
+    { EVEX_LEN_TABLE (EVEX_LEN_MAP4_FF_R_6_X86_64) },
+  },
diff --git a/opcodes/i386-dis-evex.h b/opcodes/i386-dis-evex.h
index 1787be6dbf0..22fa9b2b067 100644
--- a/opcodes/i386-dis-evex.h
+++ b/opcodes/i386-dis-evex.h
@@ -1035,7 +1035,7 @@  static const struct dis386 evex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { Bad_Opcode },
+    { X86_64_EVEX_POP2_TABLE (X86_64_EVEX_MAP4_8F) },
     /* 90 */
     { Bad_Opcode },
     { Bad_Opcode },
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index c702fd9e756..4671d2e4b0e 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -137,6 +137,7 @@  enum evex_type
   evex_default = 0,
   evex_from_legacy,
   evex_from_vex,
+  evex_push2_pop2,
 };
 
 struct instr_info
@@ -571,6 +572,7 @@  fetch_error (const instr_info *ins)
 #define VexGatherD { OP_VEX, vex_vsib_d_w_dq_mode }
 #define VexGatherQ { OP_VEX, vex_vsib_q_w_dq_mode }
 #define VexGdq { OP_VEX, dq_mode }
+#define VexGq { OP_VEX, q_mode }
 #define VexGb { OP_VEX, b_mode }
 #define VexGv { OP_VEX, v_mode }
 #define VexTmm { OP_VEX, tmm_mode }
@@ -804,6 +806,8 @@  enum
   USE_PREFIX_TABLE,
   USE_X86_64_TABLE,
   USE_X86_64_EVEX_FROM_VEX_TABLE,
+  USE_X86_64_EVEX_PUSH2_TABLE,
+  USE_X86_64_EVEX_POP2_TABLE,
   USE_3BYTE_TABLE,
   USE_XOP_8F_TABLE,
   USE_VEX_C4_TABLE,
@@ -824,6 +828,10 @@  enum
 #define X86_64_TABLE(I)		DIS386 (USE_X86_64_TABLE, (I))
 #define X86_64_EVEX_FROM_VEX_TABLE(I) \
   DIS386 (USE_X86_64_EVEX_FROM_VEX_TABLE, (I))
+#define X86_64_EVEX_PUSH2_TABLE(I) \
+  DIS386 (USE_X86_64_EVEX_PUSH2_TABLE, (I))
+#define X86_64_EVEX_POP2_TABLE(I) \
+  DIS386 (USE_X86_64_EVEX_POP2_TABLE, (I))
 #define THREE_BYTE_TABLE(I)	DIS386 (USE_3BYTE_TABLE, (I))
 #define XOP_8F_TABLE()		DIS386 (USE_XOP_8F_TABLE, 0)
 #define VEX_C4_TABLE()		DIS386 (USE_VEX_C4_TABLE, 0)
@@ -888,6 +896,7 @@  enum
   REG_EVEX_MAP4_80,
   REG_EVEX_MAP4_81,
   REG_EVEX_MAP4_83,
+  REG_EVEX_MAP4_8F_X86_64_L_0_M_1,
   REG_EVEX_MAP4_C0,
   REG_EVEX_MAP4_C1,
   REG_EVEX_MAP4_D0,
@@ -941,6 +950,7 @@  enum
 
   MOD_EVEX_MAP4_65,
   MOD_EVEX_MAP4_66_PREFIX_0,
+  MOD_EVEX_MAP4_8F_X86_64_L_0,
   MOD_EVEX_MAP4_DA_PREFIX_1,
   MOD_EVEX_MAP4_DB_PREFIX_1,
   MOD_EVEX_MAP4_DC_PREFIX_1,
@@ -951,6 +961,7 @@  enum
   MOD_EVEX_MAP4_F8_PREFIX_2,
   MOD_EVEX_MAP4_F8_PREFIX_3,
   MOD_EVEX_MAP4_F9,
+  MOD_EVEX_MAP4_FF_R_6_X86_64_L_0,
 };
 
 enum
@@ -1189,6 +1200,7 @@  enum
   PREFIX_EVEX_MAP4_60,
   PREFIX_EVEX_MAP4_61,
   PREFIX_EVEX_MAP4_66,
+  PREFIX_EVEX_MAP4_8F_X86_64_L_0_M_1_R_0,
   PREFIX_EVEX_MAP4_D8,
   PREFIX_EVEX_MAP4_DA,
   PREFIX_EVEX_MAP4_DB,
@@ -1201,6 +1213,7 @@  enum
   PREFIX_EVEX_MAP4_F2,
   PREFIX_EVEX_MAP4_F8,
   PREFIX_EVEX_MAP4_FC,
+  PREFIX_EVEX_MAP4_FF_R_6_X86_64_L_0_M_1,
 
   PREFIX_EVEX_MAP5_10,
   PREFIX_EVEX_MAP5_11,
@@ -1341,6 +1354,9 @@  enum
   X86_64_EVEX_0F38F6,
   X86_64_EVEX_0F38F7,
   X86_64_EVEX_0F3AF0,
+
+  X86_64_EVEX_MAP4_8F,
+  X86_64_EVEX_MAP4_FF_R_6,
 };
 
 enum
@@ -1537,7 +1553,10 @@  enum
   EVEX_LEN_0F3A39,
   EVEX_LEN_0F3A3A,
   EVEX_LEN_0F3A3B,
-  EVEX_LEN_0F3A43
+  EVEX_LEN_0F3A43,
+
+  EVEX_LEN_MAP4_8F_X86_64,
+  EVEX_LEN_MAP4_FF_R_6_X86_64,
 };
 
 enum
@@ -1761,6 +1780,9 @@  enum
   EVEX_W_0F3A70,
   EVEX_W_0F3A72,
 
+  EVEX_W_MAP4_8F_X86_64_L_0_M_1_R_0_P_0,
+  EVEX_W_MAP4_FF_R_6_X86_64_L_0_M_1_P_0,
+
   EVEX_W_MAP5_5B_P_0,
   EVEX_W_MAP5_7A_P_3,
 };
@@ -8757,10 +8779,24 @@  get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
       dp = &prefix_table[dp->op[1].bytemode][vindex];
       break;
 
+    case USE_X86_64_EVEX_PUSH2_TABLE:
+    case USE_X86_64_EVEX_POP2_TABLE:
+	ins->evex_type = evex_push2_pop2;
+      unsigned int vvvv_reg = ins->vex.register_specifier
+			      | !ins->vex.v << 4;
+      unsigned int rm_reg = ins->modrm.rm + (ins->rex & REX_B ? 8 : 0)
+			    + (ins->rex2 & REX_B ? 16 : 0);
+      if (!ins->vex.b || vvvv_reg == 0x4 || rm_reg == 0x4
+	  || (dp->op[0].bytemode == USE_X86_64_EVEX_POP2_TABLE
+	      && vvvv_reg == rm_reg))
+	  return &bad_opcode;
+      goto use_x86_64_table;
+
     case USE_X86_64_EVEX_FROM_VEX_TABLE:
       ins->evex_type = evex_from_vex;
       /* Fall through.  */
     case USE_X86_64_TABLE:
+use_x86_64_table:
       vindex = ins->address_mode == mode_64bit ? 1 : 0;
       dp = &x86_64_table[dp->op[1].bytemode][vindex];
       break;
@@ -9570,7 +9606,8 @@  print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
 	  /* Check whether rounding control was enabled for an insn not
 	     supporting it.  */
 	  if (ins.modrm.mod == 3 && ins.vex.b
-	      && !(ins.evex_used & EVEX_b_used))
+	      && !(ins.evex_used & EVEX_b_used)
+	      && ins.evex_type != evex_push2_pop2)
 	    {
 	      for (i = 0; i < MAX_OPERANDS; ++i)
 		{
@@ -13416,6 +13453,9 @@  OP_VEX (instr_info *ins, int bytemode, int sizeflag ATTRIBUTE_UNUSED)
 	case b_mode:
 	  names = att_names8rex;
 	  break;
+	case q_mode:
+	  names = att_names64;
+	  break;
 	case mask_bd_mode:
 	case mask_mode:
 	  if (reg > 0x7)
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index f43cb1ecf7c..f951c452cc3 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -471,6 +471,7 @@  static bitfield opcode_modifiers[] =
   BITFIELD (IntelSyntax),
   BITFIELD (ISA64),
   BITFIELD (No_egpr),
+  BITFIELD (Push2Pop2),
 };
 
 #define CLASS(n) #n, n
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index f36a8da5cbe..1663cc74937 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -749,6 +749,9 @@  enum
   /* egprs (r16-r31) on instruction illegal.  */
   No_egpr,
 
+  /* APX Push2Pop2 bit  */
+  Push2Pop2,
+
   /* The last bitfield in i386_opcode_modifier.  */
   Opcode_Modifier_Num
 };
@@ -797,6 +800,7 @@  typedef struct i386_opcode_modifier
   unsigned int intelsyntax:1;
   unsigned int isa64:2;
   unsigned int no_egpr:1;
+  unsigned int push2pop2:1;
 } i386_opcode_modifier;
 
 /* Operand classes.  */
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 4bb0c9f4906..583b6676b0e 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3555,3 +3555,9 @@  eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
 
 // FRED instructions end.
 
+// APX Push2/Pop2 instruction.
+
+push2, 0xff/6, APX_F|x64, Modrm|VexW0|EVex128|Push2Pop2|EVexMap4|VexVVVV|No_bSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+push2p, 0xff/6, APX_F|x64, Modrm|VexW1|EVex128|Push2Pop2|EVexMap4|VexVVVV|No_bSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+pop2, 0x8f/0, APX_F|x64, Modrm|VexW0|EVex128|Push2Pop2|SwapSources|EVexMap4|VexVVVV|No_bSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }
+pop2p, 0x8f/0, APX_F|x64, Modrm|VexW1|EVex128|Push2Pop2|SwapSources|EVexMap4|VexVVVV|No_bSuf|No_lSuf|No_sSuf, { Reg64, Reg64 }