On 19.09.2023 17:25, Cui, Lili wrote:
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -247,6 +247,7 @@ enum i386_error
> invalid_vector_register_set,
> invalid_tmm_register_set,
> invalid_dest_and_src_register_set,
> + invalid_pseudo_prefix,
> unsupported_vector_index_register,
> unsupported_broadcast,
> broadcast_needed,
> @@ -353,6 +354,7 @@ struct _i386_insn
> modrm_byte rm;
> rex_byte rex;
> rex_byte vrex;
> + rex_byte rex2; // for extends gpr32 r16-r31
Malformed comment. I'm not convinced one needs to be here in the first place.
> @@ -405,6 +407,11 @@ struct _i386_insn
> /* Compressed disp8*N attribute. */
> unsigned int memshift;
>
> + /* No CSPAZO flags update.*/
> + bool has_nf;
> +
> + bool has_zero_upper;
> +
> /* Prefer load or store in encoding. */
> enum
> {
> @@ -426,6 +433,9 @@ struct _i386_insn
> /* Prefer the REX byte in encoding. */
> bool rex_encoding;
>
> + /* Prefer the REX2 byte in encoding. */
> + bool rex2_encoding;
What is "the REX2 byte"? There are two bytes involved there ...
> @@ -1165,6 +1175,7 @@ static const arch_entry cpu_arch[] =
> VECARCH (sm4, SM4, ANY_SM4, reset),
> SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
> VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
> + SUBARCH (apx_f, APX_F, APX_F, false),
> };
>
> #undef SUBARCH
> @@ -1694,6 +1705,7 @@ is_cpu (const insn_template *t, enum i386_cpu cpu)
> case CpuHLE: return t->cpu.bitfield.cpuhle;
> case CpuAVX512F: return t->cpu.bitfield.cpuavx512f;
> case CpuAVX512VL: return t->cpu.bitfield.cpuavx512vl;
> + case CpuAPX_F: return t->cpu.bitfield.cpuapx_f;
Nit: Please get padding right.
> case Cpu64: return t->cpu.bitfield.cpu64;
> case CpuNo64: return t->cpu.bitfield.cpuno64;
> default:
> @@ -2332,6 +2344,9 @@ register_number (const reg_entry *r)
> if (r->reg_flags & RegRex)
> nr += 8;
>
> + if (r->reg_flags & RegRex2)
> + nr += 16;
> +
> if (r->reg_flags & RegVRex)
> nr += 16;
>
> @@ -3832,6 +3847,18 @@ is_any_vex_encoding (const insn_template *t)
> return t->opcode_modifier.vex || is_evex_encoding (t);
> }
>
> +static INLINE bool
> +is_any_apx_encoding (void)
> +{
> + return i.rex2 || i.rex2_encoding;
> +}
> +
> +static INLINE bool
> +is_any_apx_rex2_encoding (void)
> +{
> + return (i.rex2 && i.vex.length == 2) || i.rex2_encoding;
> +}
There's no particularly good place to make this remark: I was expecting
REX2 handling to rather follow REX handling, not VEX/EVEX one. I certainly
consider at least the first helper's name misleading (APX also includes
various EVEX encodings, after all), and I also don't really like you
(ab)using i.vex.length for REX2 handling.
> @@ -4089,6 +4116,19 @@ build_evex_prefix (void)
> i.vex.bytes[3] |= i.mask.reg->reg_num;
> }
>
> +/* Build (2 bytes) rex2 prefix.
> + | D5h |
> + | m | R4 X4 B4 | W R X B |
> +*/
> +static void
> +build_rex2_prefix (void)
> +{
> + i.vex.length = 2;
> + i.vex.bytes[0] = 0xd5;
> + i.vex.bytes[1] = ((i.tm.opcode_space << 7)
> + | (i.rex2 << 4) | i.rex);
> +}
> +
> static void
> process_immext (void)
> {
> @@ -4354,12 +4394,12 @@ optimize_encoding (void)
> i.suffix = 0;
> /* Convert to byte registers. */
> if (i.types[1].bitfield.word)
> - j = 16;
> + j = 16 + 16; // new 16 apx additional gprs.
> else if (i.types[1].bitfield.dword)
> - j = 32;
> + j = 32 + 16 * 2; // new 16 apx additional gprs
> else
> - j = 48;
> - if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
> + j = 48 + 16 * 3; // new 16 apx additional gprs
> + if (!(i.op[1].regs->reg_flags & (RegRex | RegRex2)) && base_regnum < 4)
> j += 8;
This is getting unwieldy: There are too many hard-coded literal numbers
here, and there continues to be zero indication in i386-reg.tbl that the
order of entries is actually relevant.
Also again, please write wellformed comments (when such are useful).
> @@ -5269,6 +5309,9 @@ md_assemble (char *line)
> case invalid_dest_and_src_register_set:
> err_msg = _("destination and source registers must be distinct");
> break;
> + case invalid_pseudo_prefix:
> + err_msg = _("unsupport rex2 pseudo prefix");
If at all, "unsupported". Maybe better "cannot be used here"?
> @@ -5498,7 +5541,17 @@ md_assemble (char *line)
> as_warn (_("translating to `%sp'"), insn_name (&i.tm));
> }
>
> - if (is_any_vex_encoding (&i.tm))
> + if (is_any_apx_encoding ())
> + {
> + if (!is_any_vex_encoding (&i.tm)
I think you should be able to use a cheaper predicate here. No VEX-
encoded APX insns exist, aiui.
> + && i.tm.opcode_space <= SPACE_0F
> + && !i.vex.register_specifier && !i.has_nf && !i.has_zero_upper)
Is the i.vex.register_specifier check really needed here? Any such template
would be an EVEX one, wouldn't it (so the earlier check already covered
those)?
> + build_rex2_prefix ();
> +
> + /* The individual REX.RXBW bits got consumed. */
> + i.rex &= REX_OPCODE;
As to my earlier naming remark - much of course depends on what the further
plans here are.
> + }
> + else if (is_any_vex_encoding (&i.tm))
> {
> if (!cpu_arch_flags.bitfield.cpui286)
> {
> @@ -5514,6 +5567,13 @@ md_assemble (char *line)
> return;
> }
>
> + /* Check for explicit REX2 prefix. */
> + if (i.rex2 || i.rex2_encoding)
> + {
> + as_bad (_("REX2 prefix invalid with `%s'"), insn_name (&i.tm));
> + return;
> + }
> +
> if (i.tm.opcode_modifier.vex)
> build_vex_prefix (t);
> else
> @@ -5553,11 +5613,11 @@ md_assemble (char *line)
> && (i.op[1].regs->reg_flags & RegRex64) != 0)
> || (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
> || (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
> - && i.rex != 0))
> + && (i.rex != 0 || i.rex2!=0)))
Nit: Please get coding style right (also elsewhere).
> {
> int x;
> -
> - i.rex |= REX_OPCODE;
> + if (!i.rex2)
> + i.rex |= REX_OPCODE;
> for (x = 0; x < 2; x++)
> {
> /* Look for 8 bit operand that uses old registers. */
> @@ -5567,9 +5627,16 @@ md_assemble (char *line)
> gas_assert (!(i.op[x].regs->reg_flags & RegRex));
> /* In case it is "hi" register, give up. */
> if (i.op[x].regs->reg_num > 3)
> - as_bad (_("can't encode register '%s%s' in an "
> - "instruction requiring REX prefix."),
> - register_prefix, i.op[x].regs->reg_name);
> + {
> + if (i.rex)
> + as_bad (_("can't encode register '%s%s' in an "
> + "instruction requiring REX prefix."),
> + register_prefix, i.op[x].regs->reg_name);
> + else
> + as_bad (_("can't encode register '%s%s' in an "
> + "instruction requiring REX2 prefix."),
> + register_prefix, i.op[x].regs->reg_name);
> + }
I don't think separate messages are needed here, Just alter the
existing one to say "... REX/REX2 ...".
> @@ -5580,7 +5647,7 @@ md_assemble (char *line)
> }
> }
>
> - if (i.rex == 0 && i.rex_encoding)
> + if ((i.rex == 0 && i.rex_encoding) || (i.rex2 == 0 && i.rex2_encoding))
> {
> /* Check if we can add a REX_OPCODE byte. Look for 8 bit operand
> that uses legacy register. If it is "hi" register, don't add
I think this comment wants updating as well, so there's no question of
it having gone stale (by mentioning only REX_OPCODE).
> @@ -6899,6 +6971,42 @@ VEX_check_encoding (const insn_template *t)
> return 0;
> }
>
> +/* Check if Egprs operands are valid for the instruction. */
> +
> +static int
> +check_EgprOperands (const insn_template *t)
> +{
> + if (t->opcode_modifier.no_egpr)
> + {
> + for (unsigned int op = 0; op < i.operands; op++)
> + {
> + if (i.types[op].bitfield.class != Reg)
> + continue;
> +
> + if (i.op[op].regs->reg_flags & RegRex2)
> + {
> + i.error = register_type_mismatch;
Already here I wonder if re-using this error indicator (and hence
issuing the same error message as is issued for other reasons) is
going to be helpful. However, ...
> + return 1;
> + }
> + }
> +
> + if ((i.index_reg && (i.index_reg->reg_flags & RegRex2))
> + || (i.base_reg && (i.base_reg->reg_flags & RegRex2)))
> + {
> + i.error = register_type_mismatch;
... here I'm certain it needs to be a different one. It should be
made obvious that the register used is part of the address for the
memory operand, not a register one.
> + return 1;
> + }
> +
> + /* Check pseudo prefix {rex2} are valid. */
> + if (i.rex2_encoding)
> + {
> + i.error = invalid_pseudo_prefix;
> + return 1;
> + }
> + }
> + return 0;
> +}
Transiently, until more checking is added (like patch 2 in the second
series you've sent), you'll break all kinds of insns which don't
have No_egpr set, but which aren't valid to be used with the extended
registers (e.g. all VEX encodings, to name one large group).
> @@ -13985,6 +14115,14 @@ static bool check_register (const reg_entry *r)
> i.vec_encoding = vex_encoding_error;
> }
>
> + if (r->reg_flags & RegRex2)
> + {
> + if (!cpu_arch_flags.bitfield.cpuapx_f
> + || flag_code != CODE_64BIT
> + || i.rex_encoding)
I'm not sure i.rex_encoding is valid to check (already) here. Or else
you'd also need to check i.vec_encoding.
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-egpr-inval.s
> @@ -0,0 +1,18 @@
> +# Check Illegal 64bit APX instructions
> + .text
> + .arch .noapx_f
> + test $0x7, %r17d
> + .arch .apx_f
> + test $0x7, %r17d
> + xsave (%r16, %rbx)
> + xsave64 (%r16, %rbx)
> + xrstor (%r16, %rbx)
> + xrstor64 (%r16, %rbx)
> + xsaves (%r16, %rbx)
> + xsaves64 (%r16, %rbx)
> + xrstors (%r16, %rbx)
> + xrstors64 (%r16, %rbx)
> + xsaveopt (%r16, %rbx)
> + xsaveopt64 (%r16, %rbx)
> + xsavec (%r16, %rbx)
> + xsavec64 (%r16, %rbx)
Don't you also want to check the index register?
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-rex2-inval.d
> @@ -0,0 +1,29 @@
> +#as:
> +#objdump: -dw
> +#name: x86-64 APX use gpr32 with rex2 prefix illegal check
> +#source: x86-64-apx-rex2-inval.s
> +
> +.*: +file format .*
> +
> +
> +Disassembly of section .text:
> +
> +0+ <_start>:
> +\s*[a-f0-9]+:\s*d5 f0 d5 f0\s+{rex2} pmullw %mm0,%mm6
> +\s*[a-f0-9]+:\s*d5 f9 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*d5 88 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*d5 f7 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*d5 80 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*66 d5 f9 d5 f9\s+{rex2} pmullw %xmm9,%xmm7
These all look valid, yet the test name says "invalid" and the title says
"illegal". Can you clarify what this is about?
Also may I ask that you use [ ] instead of \s, to aid readability?
> +\s*[a-f0-9]+:\s*66 41\s+data16 rex.B
> +\s*[a-f0-9]+:\s*d5 f9 d5 f9\s+{rex2} pmullw %mm1,%mm7
> +\s*[a-f0-9]+:\s*d5 ff 21 f8\s+{rex2} mov %db15,%r24
> +\s*[a-f0-9]+:\s*d5 01 21 00\s+{rex2} and %eax,\(%r8\)
> +\s*[a-f0-9]+:\s*d5 00 00 f7\s+{rex2} add %sil,%dil
> +\s*[a-f0-9]+:\s*d5 ff 20 f8\s+{rex2} mov %cr15,%r24
> +\s*[a-f0-9]+:\s*d5 81 ae\s+\(bad\)
> +\s*[a-f0-9]+:\s*27\s+\(bad\)
> +\s*[a-f0-9]+:\s*d5 c1 38\s+\(bad\)
> +\s*[a-f0-9]+:\s*f6\s+.byte 0xf6
> +\s*[a-f0-9]+:\s*07\s+\(bad\)
> +#pass
> diff --git a/gas/testsuite/gas/i386/x86-64-apx-rex2-inval.s b/gas/testsuite/gas/i386/x86-64-apx-rex2-inval.s
> new file mode 100644
> index 00000000000..51dd8df79d6
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-rex2-inval.s
> @@ -0,0 +1,25 @@
> +# Check 64bit instructions with rex2 prefix bad encoding
> +
> + .allow_index_reg
> + .text
> +_start:
> +# check {rex2} pseudo prefix to force REX2 encoding.
> +.byte 0xd5, 0xf0, 0xd5, 0xf0
> +.byte 0xd5, 0xf9, 0xd5, 0xf9
> +.byte 0xd5, 0x88, 0xd5, 0xf9
> +.byte 0xd5, 0xf7, 0xd5, 0xf9
> +.byte 0xd5, 0x80, 0xd5, 0xf9
> +
> +.byte 0x66
> +.byte 0xd5, 0xf9, 0xd5, 0xf9
> +.byte 0x66, 0x41
> +.byte 0xd5, 0xf9, 0xd5, 0xf9
> +.byte 0xd5, 0xff, 0x21, 0xf8
> +.byte 0xd5, 0x01, 0x21, 0x00
> +.byte 0xd5, 0x00, 0x00, 0xf7
> +.byte 0xd5, 0xff, 0x20, 0xf8
> +# check xsave/xstore are not allowed to use rex2.
> +.byte 0xd5, 0x81, 0xae, 0x27
> +# check rex2 only use for map0/1
> +.byte 0xd5, 0xc1, 0x38, 0xf6, 0x07
Please try to limit .byte use in source as much as possible. Emitting
bogus prefixes may require its use, but that should be about it.
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-apx-rex2.s
> @@ -0,0 +1,175 @@
> +# Check 64bit instructions with rex2 prefix encoding
> +
> + .allow_index_reg
> + .text
> +_start:
> + test $0x7, %r24b
> + test $0x7, %r24d
> + test $0x7, %r24
> + test $0x7, %r24w
> +## R bit
> + leal (%rax), %r16d
> + leal (%rax), %r17d
> + leal (%rax), %r18d
> + leal (%rax), %r19d
> + leal (%rax), %r20d
> + leal (%rax), %r21d
> + leal (%rax), %r22d
> + leal (%rax), %r23d
> + leal (%rax), %r24d
> + leal (%rax), %r25d
> + leal (%rax), %r26d
> + leal (%rax), %r27d
> + leal (%rax), %r28d
> + leal (%rax), %r29d
> + leal (%rax), %r30d
> + leal (%rax), %r31d
> +## X bit
> + leal (,%r16), %eax
> + leal (,%r17), %eax
> + leal (,%r18), %eax
> + leal (,%r19), %eax
> + leal (,%r20), %eax
> + leal (,%r21), %eax
> + leal (,%r22), %eax
> + leal (,%r23), %eax
> + leal (,%r24), %eax
> + leal (,%r25), %eax
> + leal (,%r26), %eax
> + leal (,%r27), %eax
> + leal (,%r28), %eax
> + leal (,%r29), %eax
> + leal (,%r30), %eax
> + leal (,%r31), %eax
> +## B bit
> + leal (%r16), %eax
> + leal (%r17), %eax
> + leal (%r18), %eax
> + leal (%r19), %eax
> + leal (%r20), %eax
> + leal (%r21), %eax
> + leal (%r22), %eax
> + leal (%r23), %eax
> + leal (%r24), %eax
> + leal (%r25), %eax
> + leal (%r26), %eax
> + leal (%r27), %eax
> + leal (%r28), %eax
> + leal (%r29), %eax
> + leal (%r30), %eax
> + leal (%r31), %eax
> +## SIB
> + leal 1(%r20), %eax
> + leal 1(%r28), %eax
> + leal 129(%r20), %eax
> + leal 129(%r28), %eax
I don't see why the comment says "SIB" for these.
> +## W bit
> + leaq (%rax), %r15
> + leaq (%rax), %r16
> + leaq (%r15), %rax
> + leaq (%r16), %rax
> + leaq (,%r15), %rax
> + leaq (,%r16), %rax
> +## M bit
> + imull %eax, %r15d
> + imull %eax, %r16d
> + punpckldq (%r18), %mm2 #D5906212
Please ensure consistent indentation, and please omit meaningless comments.
> +## AddRegFrm
> + movl $1, %r16d
From here onwards I'm afraid I can't decipher any of the comments. In many
cases the choice of what to test (and what not) looks pretty random.
> --- a/opcodes/i386-dis.c
> +++ b/opcodes/i386-dis.c
(I'll look at the disassembler parts separately. This and the other patches
are quite a bit too large anyway.)
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -380,6 +380,7 @@ static bitfield cpu_flags[] =
> BITFIELD (RAO_INT),
> BITFIELD (FRED),
> BITFIELD (LKGS),
> + BITFIELD (APX_F),
> BITFIELD (MWAITX),
> BITFIELD (CLZERO),
> BITFIELD (OSPKE),
> @@ -469,6 +470,7 @@ static bitfield opcode_modifiers[] =
> BITFIELD (ATTSyntax),
> BITFIELD (IntelSyntax),
> BITFIELD (ISA64),
> + BITFIELD (No_egpr),
> };
>
Additionally a dependency of APX_F on XSAVE needs introducing.
> --- a/opcodes/i386-opc.h
> +++ b/opcodes/i386-opc.h
> @@ -317,6 +317,8 @@ enum i386_cpu
> CpuAVX512F,
> /* Intel AVX-512 VL Instructions support required. */
> CpuAVX512VL,
> + /* Intel APX Instructions support required. */
> + CpuAPX_F,
The comment kind of misses the F in the feature identifier.
> @@ -742,6 +745,10 @@ enum
> #define INTEL64 2
> #define INTEL64ONLY 3
> ISA64,
> +
> + /* egprs (r16-r31) on instruction illegal. */
> + No_egpr,
I'm not overly happy with the name and spelling. How about NoEgpr? That's
more in line with the majority of the attributes.
> @@ -789,6 +796,7 @@ typedef struct i386_opcode_modifier
> unsigned int attsyntax:1;
> unsigned int intelsyntax:1;
> unsigned int isa64:2;
> + unsigned int no_egpr:1;
> } i386_opcode_modifier;
>
> /* Operand classes. */
> @@ -988,7 +996,7 @@ typedef struct insn_template
> AMD 3DNow! instructions.
> If this template has no extension opcode (the usual case) use None
> Instructions */
> - signed int extension_opcode:9;
> + signed int extension_opcode:0xA;
Why?
> @@ -1001,7 +1009,8 @@ typedef struct insn_template
> #define Prefix_VEX3 6 /* {vex3} */
> #define Prefix_EVEX 7 /* {evex} */
> #define Prefix_REX 8 /* {rex} */
> -#define Prefix_NoOptimize 9 /* {nooptimize} */
> +#define Prefix_REX2 9 /* {rex2} */
> +#define Prefix_NoOptimize 0xA /* {nooptimize} */
Any reason to use a hex number here?
> @@ -1028,6 +1037,7 @@ typedef struct
> #define RegRex 0x1 /* Extended register. */
> #define RegRex64 0x2 /* Extended 8 bit register. */
> #define RegVRex 0x4 /* Extended vector register. */
> +#define RegRex2 0x8 /* Extended rex2 interge register. */
Since I expect / hope the bit will be reused for extended EVEX encodings,
I don't think "rex2" should be mentioned here. Also "integer" please.
> @@ -93,6 +141,22 @@ r12, Class=Reg|Qword|BaseIndex, RegRex, 4, Dw2Inval, 12
> r13, Class=Reg|Qword|BaseIndex, RegRex, 5, Dw2Inval, 13
> r14, Class=Reg|Qword|BaseIndex, RegRex, 6, Dw2Inval, 14
> r15, Class=Reg|Qword|BaseIndex, RegRex, 7, Dw2Inval, 15
> +r16, Class=Reg|Qword|BaseIndex, RegRex2, 0, Dw2Inval, 130
> +r17, Class=Reg|Qword|BaseIndex, RegRex2, 1, Dw2Inval, 131
> +r18, Class=Reg|Qword|BaseIndex, RegRex2, 2, Dw2Inval, 132
> +r19, Class=Reg|Qword|BaseIndex, RegRex2, 3, Dw2Inval, 133
> +r20, Class=Reg|Qword|BaseIndex, RegRex2, 4, Dw2Inval, 134
> +r21, Class=Reg|Qword|BaseIndex, RegRex2, 5, Dw2Inval, 135
> +r22, Class=Reg|Qword|BaseIndex, RegRex2, 6, Dw2Inval, 136
> +r23, Class=Reg|Qword|BaseIndex, RegRex2, 7, Dw2Inval, 137
> +r24, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 0, Dw2Inval, 138
> +r25, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 1, Dw2Inval, 139
> +r26, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 2, Dw2Inval, 140
> +r27, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 3, Dw2Inval, 141
> +r28, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 4, Dw2Inval, 142
> +r29, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 5, Dw2Inval, 143
> +r30, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 6, Dw2Inval, 144
> +r31, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 7, Dw2Inval, 145
I wonder how the Dwarf register number were chosen ...
Jan
@@ -1,5 +1,8 @@
-*- text -*-
+
+* Add support for Intel APX instructions.
+
* Add support for Intel AVX10.1.
* Add support for Intel PBNDKB instructions.
@@ -247,6 +247,7 @@ enum i386_error
invalid_vector_register_set,
invalid_tmm_register_set,
invalid_dest_and_src_register_set,
+ invalid_pseudo_prefix,
unsupported_vector_index_register,
unsupported_broadcast,
broadcast_needed,
@@ -353,6 +354,7 @@ struct _i386_insn
modrm_byte rm;
rex_byte rex;
rex_byte vrex;
+ rex_byte rex2; // for extends gpr32 r16-r31
sib_byte sib;
vex_prefix vex;
@@ -405,6 +407,11 @@ struct _i386_insn
/* Compressed disp8*N attribute. */
unsigned int memshift;
+ /* No CSPAZO flags update.*/
+ bool has_nf;
+
+ bool has_zero_upper;
+
/* Prefer load or store in encoding. */
enum
{
@@ -426,6 +433,9 @@ struct _i386_insn
/* Prefer the REX byte in encoding. */
bool rex_encoding;
+ /* Prefer the REX2 byte in encoding. */
+ bool rex2_encoding;
+
/* Disable instruction size optimization. */
bool no_optimize;
@@ -1165,6 +1175,7 @@ static const arch_entry cpu_arch[] =
VECARCH (sm4, SM4, ANY_SM4, reset),
SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
+ SUBARCH (apx_f, APX_F, APX_F, false),
};
#undef SUBARCH
@@ -1694,6 +1705,7 @@ is_cpu (const insn_template *t, enum i386_cpu cpu)
case CpuHLE: return t->cpu.bitfield.cpuhle;
case CpuAVX512F: return t->cpu.bitfield.cpuavx512f;
case CpuAVX512VL: return t->cpu.bitfield.cpuavx512vl;
+ case CpuAPX_F: return t->cpu.bitfield.cpuapx_f;
case Cpu64: return t->cpu.bitfield.cpu64;
case CpuNo64: return t->cpu.bitfield.cpuno64;
default:
@@ -2332,6 +2344,9 @@ register_number (const reg_entry *r)
if (r->reg_flags & RegRex)
nr += 8;
+ if (r->reg_flags & RegRex2)
+ nr += 16;
+
if (r->reg_flags & RegVRex)
nr += 16;
@@ -3832,6 +3847,18 @@ is_any_vex_encoding (const insn_template *t)
return t->opcode_modifier.vex || is_evex_encoding (t);
}
+static INLINE bool
+is_any_apx_encoding (void)
+{
+ return i.rex2 || i.rex2_encoding;
+}
+
+static INLINE bool
+is_any_apx_rex2_encoding (void)
+{
+ return (i.rex2 && i.vex.length == 2) || i.rex2_encoding;
+}
+
static unsigned int
get_broadcast_bytes (const insn_template *t, bool diag)
{
@@ -4089,6 +4116,19 @@ build_evex_prefix (void)
i.vex.bytes[3] |= i.mask.reg->reg_num;
}
+/* Build (2 bytes) rex2 prefix.
+ | D5h |
+ | m | R4 X4 B4 | W R X B |
+*/
+static void
+build_rex2_prefix (void)
+{
+ i.vex.length = 2;
+ i.vex.bytes[0] = 0xd5;
+ i.vex.bytes[1] = ((i.tm.opcode_space << 7)
+ | (i.rex2 << 4) | i.rex);
+}
+
static void
process_immext (void)
{
@@ -4354,12 +4394,12 @@ optimize_encoding (void)
i.suffix = 0;
/* Convert to byte registers. */
if (i.types[1].bitfield.word)
- j = 16;
+ j = 16 + 16; // new 16 apx additional gprs.
else if (i.types[1].bitfield.dword)
- j = 32;
+ j = 32 + 16 * 2; // new 16 apx additional gprs
else
- j = 48;
- if (!(i.op[1].regs->reg_flags & RegRex) && base_regnum < 4)
+ j = 48 + 16 * 3; // new 16 apx additional gprs
+ if (!(i.op[1].regs->reg_flags & (RegRex | RegRex2)) && base_regnum < 4)
j += 8;
i.op[1].regs -= j;
}
@@ -5269,6 +5309,9 @@ md_assemble (char *line)
case invalid_dest_and_src_register_set:
err_msg = _("destination and source registers must be distinct");
break;
+ case invalid_pseudo_prefix:
+ err_msg = _("unsupport rex2 pseudo prefix");
+ break;
case unsupported_vector_index_register:
err_msg = _("unsupported vector index register");
break;
@@ -5498,7 +5541,17 @@ md_assemble (char *line)
as_warn (_("translating to `%sp'"), insn_name (&i.tm));
}
- if (is_any_vex_encoding (&i.tm))
+ if (is_any_apx_encoding ())
+ {
+ if (!is_any_vex_encoding (&i.tm)
+ && i.tm.opcode_space <= SPACE_0F
+ && !i.vex.register_specifier && !i.has_nf && !i.has_zero_upper)
+ build_rex2_prefix ();
+
+ /* The individual REX.RXBW bits got consumed. */
+ i.rex &= REX_OPCODE;
+ }
+ else if (is_any_vex_encoding (&i.tm))
{
if (!cpu_arch_flags.bitfield.cpui286)
{
@@ -5514,6 +5567,13 @@ md_assemble (char *line)
return;
}
+ /* Check for explicit REX2 prefix. */
+ if (i.rex2 || i.rex2_encoding)
+ {
+ as_bad (_("REX2 prefix invalid with `%s'"), insn_name (&i.tm));
+ return;
+ }
+
if (i.tm.opcode_modifier.vex)
build_vex_prefix (t);
else
@@ -5553,11 +5613,11 @@ md_assemble (char *line)
&& (i.op[1].regs->reg_flags & RegRex64) != 0)
|| (((i.types[0].bitfield.class == Reg && i.types[0].bitfield.byte)
|| (i.types[1].bitfield.class == Reg && i.types[1].bitfield.byte))
- && i.rex != 0))
+ && (i.rex != 0 || i.rex2!=0)))
{
int x;
-
- i.rex |= REX_OPCODE;
+ if (!i.rex2)
+ i.rex |= REX_OPCODE;
for (x = 0; x < 2; x++)
{
/* Look for 8 bit operand that uses old registers. */
@@ -5567,9 +5627,16 @@ md_assemble (char *line)
gas_assert (!(i.op[x].regs->reg_flags & RegRex));
/* In case it is "hi" register, give up. */
if (i.op[x].regs->reg_num > 3)
- as_bad (_("can't encode register '%s%s' in an "
- "instruction requiring REX prefix."),
- register_prefix, i.op[x].regs->reg_name);
+ {
+ if (i.rex)
+ as_bad (_("can't encode register '%s%s' in an "
+ "instruction requiring REX prefix."),
+ register_prefix, i.op[x].regs->reg_name);
+ else
+ as_bad (_("can't encode register '%s%s' in an "
+ "instruction requiring REX2 prefix."),
+ register_prefix, i.op[x].regs->reg_name);
+ }
/* Otherwise it is equivalent to the extended register.
Since the encoding doesn't change this is merely
@@ -5580,7 +5647,7 @@ md_assemble (char *line)
}
}
- if (i.rex == 0 && i.rex_encoding)
+ if ((i.rex == 0 && i.rex_encoding) || (i.rex2 == 0 && i.rex2_encoding))
{
/* Check if we can add a REX_OPCODE byte. Look for 8 bit operand
that uses legacy register. If it is "hi" register, don't add
@@ -5594,6 +5661,7 @@ md_assemble (char *line)
{
gas_assert (!(i.op[x].regs->reg_flags & RegRex));
i.rex_encoding = false;
+ i.rex2_encoding = false;
break;
}
@@ -5772,6 +5840,10 @@ parse_insn (const char *line, char *mnemonic, bool prefix_only)
/* {rex} */
i.rex_encoding = true;
break;
+ case Prefix_REX2:
+ /* {rex2} */
+ i.rex2_encoding = true;
+ break;
case Prefix_NoOptimize:
/* {nooptimize} */
i.no_optimize = true;
@@ -6899,6 +6971,42 @@ VEX_check_encoding (const insn_template *t)
return 0;
}
+/* Check if Egprs operands are valid for the instruction. */
+
+static int
+check_EgprOperands (const insn_template *t)
+{
+ if (t->opcode_modifier.no_egpr)
+ {
+ for (unsigned int op = 0; op < i.operands; op++)
+ {
+ if (i.types[op].bitfield.class != Reg)
+ continue;
+
+ if (i.op[op].regs->reg_flags & RegRex2)
+ {
+ i.error = register_type_mismatch;
+ return 1;
+ }
+ }
+
+ if ((i.index_reg && (i.index_reg->reg_flags & RegRex2))
+ || (i.base_reg && (i.base_reg->reg_flags & RegRex2)))
+ {
+ i.error = register_type_mismatch;
+ return 1;
+ }
+
+ /* Check pseudo prefix {rex2} are valid. */
+ if (i.rex2_encoding)
+ {
+ i.error = invalid_pseudo_prefix;
+ return 1;
+ }
+ }
+ return 0;
+}
+
/* Helper function for the progress() macro in match_template(). */
static INLINE enum i386_error progress (enum i386_error new,
enum i386_error last,
@@ -7371,6 +7479,13 @@ match_template (char mnem_suffix)
continue;
}
+ /* Check if EGRPS operands(r16-r31) are valid. */
+ if (check_EgprOperands (t))
+ {
+ specific_error = progress (i.error);
+ continue;
+ }
+
/* We've found a match; break out of loop. */
break;
}
@@ -8245,6 +8360,18 @@ static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
if (r->reg_flags & RegVRex)
i.vrex |= rex_bit;
+
+ if (r->reg_flags & RegRex2)
+ i.rex2 |= rex_bit;
+}
+
+static INLINE void
+set_rex_rex2 (const reg_entry *r, unsigned int rex_bit)
+{
+ if ((r->reg_flags & RegRex) != 0)
+ i.rex |= rex_bit;
+ if ((r->reg_flags & RegRex2) != 0)
+ i.rex2 |= rex_bit;
}
static int
@@ -8728,8 +8855,7 @@ build_modrm_byte (void)
i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
i.types[op] = operand_type_and_not (i.types[op], anydisp);
i.types[op].bitfield.disp32 = 1;
- if ((i.index_reg->reg_flags & RegRex) != 0)
- i.rex |= REX_X;
+ set_rex_rex2 (i.index_reg, REX_X);
}
}
/* RIP addressing for 64bit mode. */
@@ -8800,8 +8926,7 @@ build_modrm_byte (void)
if (!i.tm.opcode_modifier.sib)
i.rm.regmem = i.base_reg->reg_num;
- if ((i.base_reg->reg_flags & RegRex) != 0)
- i.rex |= REX_B;
+ set_rex_rex2 (i.base_reg, REX_B);
i.sib.base = i.base_reg->reg_num;
/* x86-64 ignores REX prefix bit here to avoid decoder
complications. */
@@ -8839,8 +8964,7 @@ build_modrm_byte (void)
else
i.sib.index = i.index_reg->reg_num;
i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
- if ((i.index_reg->reg_flags & RegRex) != 0)
- i.rex |= REX_X;
+ set_rex_rex2 (i.index_reg, REX_X);
}
if (i.disp_operands
@@ -9987,6 +10111,12 @@ output_insn (void)
for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
if (*q)
frag_opcode_byte (*q);
+
+ if (is_any_apx_rex2_encoding ())
+ {
+ frag_opcode_byte (i.vex.bytes[0]);
+ frag_opcode_byte (i.vex.bytes[1]);
+ }
}
else
{
@@ -13985,6 +14115,14 @@ static bool check_register (const reg_entry *r)
i.vec_encoding = vex_encoding_error;
}
+ if (r->reg_flags & RegRex2)
+ {
+ if (!cpu_arch_flags.bitfield.cpuapx_f
+ || flag_code != CODE_64BIT
+ || i.rex_encoding)
+ return false;
+ }
+
if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
&& (!cpu_arch_flags.bitfield.cpu64
|| r->reg_type.bitfield.class != RegCR
@@ -216,6 +216,7 @@ accept various extension mnemonics. For example,
@code{avx10.1/512},
@code{avx10.1/256},
@code{avx10.1/128},
+@code{apx},
@code{amx_int8},
@code{amx_bf16},
@code{amx_fp16},
@@ -1662,7 +1663,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.lwp} @tab @samp{.fma4} @tab @samp{.xop} @tab @samp{.cx16}
@item @samp{.padlock} @tab @samp{.clzero} @tab @samp{.mwaitx} @tab @samp{.rdpru}
@item @samp{.mcommit} @tab @samp{.sev_es} @tab @samp{.snp} @tab @samp{.invlpgb}
-@item @samp{.tlbsync}
+@item @samp{.tlbsync} @tab @samp{.apx}
@end multitable
Apart from the warning, there are only two other effects on
@@ -11,11 +11,11 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 37 \(bad\)
0+1 <aad0>:
-[ ]*[a-f0-9]+: d5 \(bad\)
+[ ]*[a-f0-9]+: d5 rex2
[ ]*[a-f0-9]+: 0a .byte 0xa
0+3 <aad1>:
-[ ]*[a-f0-9]+: d5 \(bad\)
+[ ]*[a-f0-9]+: d5 rex2
[ ]*[a-f0-9]+: 02 .byte 0x2
0+5 <aam0>:
@@ -11,11 +11,11 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 37 \(bad\)
0+1 <aad0>:
-[ ]*[a-f0-9]+: d5 \(bad\)
+[ ]*[a-f0-9]+: d5 rex2
[ ]*[a-f0-9]+: 0a .byte 0xa
0+3 <aad1>:
-[ ]*[a-f0-9]+: d5 \(bad\)
+[ ]*[a-f0-9]+: d5 rex2
[ ]*[a-f0-9]+: 02 .byte 0x2
0+5 <aam0>:
new file mode 100644
@@ -0,0 +1,24 @@
+.*: Assembler messages:
+.*:4: Error: bad register name `%r17d'
+.*:7: Error: register type mismatch for `xsave'
+.*:8: Error: register type mismatch for `xsave64'
+.*:9: Error: register type mismatch for `xrstor'
+.*:10: Error: register type mismatch for `xrstor64'
+.*:11: Error: register type mismatch for `xsaves'
+.*:12: Error: register type mismatch for `xsaves64'
+.*:13: Error: register type mismatch for `xrstors'
+.*:14: Error: register type mismatch for `xrstors64'
+.*:15: Error: register type mismatch for `xsaveopt'
+.*:16: Error: register type mismatch for `xsaveopt64'
+.*:17: Error: register type mismatch for `xsavec'
+.*:18: Error: register type mismatch for `xsavec64'
+GAS LISTING .*
+#...
+[ ]*1[ ]+\# Check Illegal 64bit APX instructions
+[ ]*2[ ]+\.text
+[ ]*3[ ]+\.arch \.noapx_f
+[ ]*4[ ]+test \$0x7, %r17d
+[ ]*5[ ]+\.arch \.apx_f
+[ ]*6[ ]+\?\?\?\? D510F7C1 test \$0x7, %r17d
+[ ]*6[ ]+07000000
+#pass
new file mode 100644
@@ -0,0 +1,18 @@
+# Check Illegal 64bit APX instructions
+ .text
+ .arch .noapx_f
+ test $0x7, %r17d
+ .arch .apx_f
+ test $0x7, %r17d
+ xsave (%r16, %rbx)
+ xsave64 (%r16, %rbx)
+ xrstor (%r16, %rbx)
+ xrstor64 (%r16, %rbx)
+ xsaves (%r16, %rbx)
+ xsaves64 (%r16, %rbx)
+ xrstors (%r16, %rbx)
+ xrstors64 (%r16, %rbx)
+ xsaveopt (%r16, %rbx)
+ xsaveopt64 (%r16, %rbx)
+ xsavec (%r16, %rbx)
+ xsavec64 (%r16, %rbx)
new file mode 100644
@@ -0,0 +1,29 @@
+#as:
+#objdump: -dw
+#name: x86-64 APX use gpr32 with rex2 prefix illegal check
+#source: x86-64-apx-rex2-inval.s
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*d5 f0 d5 f0\s+{rex2} pmullw %mm0,%mm6
+\s*[a-f0-9]+:\s*d5 f9 d5 f9\s+{rex2} pmullw %mm1,%mm7
+\s*[a-f0-9]+:\s*d5 88 d5 f9\s+{rex2} pmullw %mm1,%mm7
+\s*[a-f0-9]+:\s*d5 f7 d5 f9\s+{rex2} pmullw %mm1,%mm7
+\s*[a-f0-9]+:\s*d5 80 d5 f9\s+{rex2} pmullw %mm1,%mm7
+\s*[a-f0-9]+:\s*66 d5 f9 d5 f9\s+{rex2} pmullw %xmm9,%xmm7
+\s*[a-f0-9]+:\s*66 41\s+data16 rex.B
+\s*[a-f0-9]+:\s*d5 f9 d5 f9\s+{rex2} pmullw %mm1,%mm7
+\s*[a-f0-9]+:\s*d5 ff 21 f8\s+{rex2} mov %db15,%r24
+\s*[a-f0-9]+:\s*d5 01 21 00\s+{rex2} and %eax,\(%r8\)
+\s*[a-f0-9]+:\s*d5 00 00 f7\s+{rex2} add %sil,%dil
+\s*[a-f0-9]+:\s*d5 ff 20 f8\s+{rex2} mov %cr15,%r24
+\s*[a-f0-9]+:\s*d5 81 ae\s+\(bad\)
+\s*[a-f0-9]+:\s*27\s+\(bad\)
+\s*[a-f0-9]+:\s*d5 c1 38\s+\(bad\)
+\s*[a-f0-9]+:\s*f6\s+.byte 0xf6
+\s*[a-f0-9]+:\s*07\s+\(bad\)
+#pass
new file mode 100644
@@ -0,0 +1,25 @@
+# Check 64bit instructions with rex2 prefix bad encoding
+
+ .allow_index_reg
+ .text
+_start:
+# check {rex2} pseudo prefix to force REX2 encoding.
+.byte 0xd5, 0xf0, 0xd5, 0xf0
+.byte 0xd5, 0xf9, 0xd5, 0xf9
+.byte 0xd5, 0x88, 0xd5, 0xf9
+.byte 0xd5, 0xf7, 0xd5, 0xf9
+.byte 0xd5, 0x80, 0xd5, 0xf9
+
+.byte 0x66
+.byte 0xd5, 0xf9, 0xd5, 0xf9
+.byte 0x66, 0x41
+.byte 0xd5, 0xf9, 0xd5, 0xf9
+.byte 0xd5, 0xff, 0x21, 0xf8
+.byte 0xd5, 0x01, 0x21, 0x00
+.byte 0xd5, 0x00, 0x00, 0xf7
+.byte 0xd5, 0xff, 0x20, 0xf8
+# check xsave/xstore are not allowed to use rex2.
+.byte 0xd5, 0x81, 0xae, 0x27
+# check rex2 only use for map0/1
+.byte 0xd5, 0xc1, 0x38, 0xf6, 0x07
+
new file mode 100644
@@ -0,0 +1,148 @@
+#as:
+#objdump: -dw
+#name: x86-64 APX use gpr32 with rex2 prefix
+#source: x86-64-apx-rex2.s
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*d5 11 f6 c0 07\s+test \$0x7,%r24b
+\s*[a-f0-9]+:\s*d5 11 f7 c0 07 00 00 00 test \$0x7,%r24d
+\s*[a-f0-9]+:\s*d5 19 f7 c0 07 00 00 00 test \$0x7,%r24
+\s*[a-f0-9]+:\s*66 d5 11 f7 c0 07 00 test \$0x7,%r24w
+\s*[a-f0-9]+:\s*d5 40 8d 00\s+lea \(%rax\),%r16d
+\s*[a-f0-9]+:\s*d5 40 8d 08\s+lea \(%rax\),%r17d
+\s*[a-f0-9]+:\s*d5 40 8d 10\s+lea \(%rax\),%r18d
+\s*[a-f0-9]+:\s*d5 40 8d 18\s+lea \(%rax\),%r19d
+\s*[a-f0-9]+:\s*d5 40 8d 20\s+lea \(%rax\),%r20d
+\s*[a-f0-9]+:\s*d5 40 8d 28\s+lea \(%rax\),%r21d
+\s*[a-f0-9]+:\s*d5 40 8d 30\s+lea \(%rax\),%r22d
+\s*[a-f0-9]+:\s*d5 40 8d 38\s+lea \(%rax\),%r23d
+\s*[a-f0-9]+:\s*d5 44 8d 00\s+lea \(%rax\),%r24d
+\s*[a-f0-9]+:\s*d5 44 8d 08\s+lea \(%rax\),%r25d
+\s*[a-f0-9]+:\s*d5 44 8d 10\s+lea \(%rax\),%r26d
+\s*[a-f0-9]+:\s*d5 44 8d 18\s+lea \(%rax\),%r27d
+\s*[a-f0-9]+:\s*d5 44 8d 20\s+lea \(%rax\),%r28d
+\s*[a-f0-9]+:\s*d5 44 8d 28\s+lea \(%rax\),%r29d
+\s*[a-f0-9]+:\s*d5 44 8d 30\s+lea \(%rax\),%r30d
+\s*[a-f0-9]+:\s*d5 44 8d 38\s+lea \(%rax\),%r31d
+\s*[a-f0-9]+:\s*d5 20 8d 04 05 00 00 00 00\s+lea 0x0\(,%r16,1\),%eax
+\s*[a-f0-9]+:\s*d5 20 8d 04 0d 00 00 00 00\s+lea 0x0\(,%r17,1\),%eax
+\s*[a-f0-9]+:\s*d5 20 8d 04 15 00 00 00 00\s+lea 0x0\(,%r18,1\),%eax
+\s*[a-f0-9]+:\s*d5 20 8d 04 1d 00 00 00 00\s+lea 0x0\(,%r19,1\),%eax
+\s*[a-f0-9]+:\s*d5 20 8d 04 25 00 00 00 00\s+lea 0x0\(,%r20,1\),%eax
+\s*[a-f0-9]+:\s*d5 20 8d 04 2d 00 00 00 00\s+lea 0x0\(,%r21,1\),%eax
+\s*[a-f0-9]+:\s*d5 20 8d 04 35 00 00 00 00\s+lea 0x0\(,%r22,1\),%eax
+\s*[a-f0-9]+:\s*d5 20 8d 04 3d 00 00 00 00\s+lea 0x0\(,%r23,1\),%eax
+\s*[a-f0-9]+:\s*d5 22 8d 04 05 00 00 00 00\s+lea 0x0\(,%r24,1\),%eax
+\s*[a-f0-9]+:\s*d5 22 8d 04 0d 00 00 00 00\s+lea 0x0\(,%r25,1\),%eax
+\s*[a-f0-9]+:\s*d5 22 8d 04 15 00 00 00 00\s+lea 0x0\(,%r26,1\),%eax
+\s*[a-f0-9]+:\s*d5 22 8d 04 1d 00 00 00 00\s+lea 0x0\(,%r27,1\),%eax
+\s*[a-f0-9]+:\s*d5 22 8d 04 25 00 00 00 00\s+lea 0x0\(,%r28,1\),%eax
+\s*[a-f0-9]+:\s*d5 22 8d 04 2d 00 00 00 00\s+lea 0x0\(,%r29,1\),%eax
+\s*[a-f0-9]+:\s*d5 22 8d 04 35 00 00 00 00\s+lea 0x0\(,%r30,1\),%eax
+\s*[a-f0-9]+:\s*d5 22 8d 04 3d 00 00 00 00\s+lea 0x0\(,%r31,1\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 00\s+lea \(%r16\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 01\s+lea \(%r17\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 02\s+lea \(%r18\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 03\s+lea \(%r19\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 04 24 lea \(%r20\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 45 00 lea 0x0\(%r21\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 06\s+lea \(%r22\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 07\s+lea \(%r23\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 00\s+lea \(%r24\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 01\s+lea \(%r25\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 02\s+lea \(%r26\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 03\s+lea \(%r27\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 04 24 lea \(%r28\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 45 00 lea 0x0\(%r29\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 06 lea \(%r30\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 07 lea \(%r31\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 44 24 01 lea 0x1\(%r20\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 44 24 01 lea 0x1\(%r28\),%eax
+\s*[a-f0-9]+:\s*d5 10 8d 84 24 81 00 00 00 lea 0x81\(%r20\),%eax
+\s*[a-f0-9]+:\s*d5 11 8d 84 24 81 00 00 00 lea 0x81\(%r28\),%eax
+\s*[a-f0-9]+:\s*4c 8d 38 lea \(%rax\),%r15
+\s*[a-f0-9]+:\s*d5 48 8d 00 lea \(%rax\),%r16
+\s*[a-f0-9]+:\s*49 8d 07 lea \(%r15\),%rax
+\s*[a-f0-9]+:\s*d5 18 8d 00 lea \(%r16\),%rax
+\s*[a-f0-9]+:\s*4a 8d 04 3d 00 00 00 00 lea 0x0\(,%r15,1\),%rax
+\s*[a-f0-9]+:\s*d5 28 8d 04 05 00 00 00 00 lea 0x0\(,%r16,1\),%rax
+\s*[a-f0-9]+:\s*44 0f af f8 imul %eax,%r15d
+\s*[a-f0-9]+:\s*d5 c0 af c0 imul %eax,%r16d
+\s*[a-f0-9]+:\s*d5 90 62 12 punpckldq %mm2,\(%r18\)
+\s*[a-f0-9]+:\s*d5 10 b8 01 00 00 00 mov \$0x1,%r16d
+\s*[a-f0-9]+:\s*d5 18 63 c0\s+movslq %r16d,%rax
+\s*[a-f0-9]+:\s*d5 48 63 c0\s+movslq %eax,%r16
+\s*[a-f0-9]+:\s*d5 58 63 c8\s+movslq %r16d,%r17
+\s*[a-f0-9]+:\s*d5 90 4c c0\s+cmovl %r16d,%eax
+\s*[a-f0-9]+:\s*d5 c0 4c c0\s+cmovl %eax,%r16d
+\s*[a-f0-9]+:\s*d5 d0 4c c8\s+cmovl %r16d,%r17d
+\s*[a-f0-9]+:\s*d5 90 af 1c 00\s+imul \(%r16,%rax,1\),%ebx
+\s*[a-f0-9]+:\s*d5 a0 af 1c 00\s+imul \(%rax,%r16,1\),%ebx
+\s*[a-f0-9]+:\s*d5 c0 af 04 18\s+imul \(%rax,%rbx,1\),%r16d
+\s*[a-f0-9]+:\s*d5 b0 af 04 08\s+imul \(%r16,%r17,1\),%eax
+\s*[a-f0-9]+:\s*d5 e0 af 0c 00\s+imul \(%rax,%r16,1\),%r17d
+\s*[a-f0-9]+:\s*d5 d0 af 0c 00\s+imul \(%r16,%rax,1\),%r17d
+\s*[a-f0-9]+:\s*d5 f0 af 14 08\s+imul \(%r16,%r17,1\),%r18d
+\s*[a-f0-9]+:\s*d5 90 4c 1c 00\s+cmovl \(%r16,%rax,1\),%ebx
+\s*[a-f0-9]+:\s*d5 a0 4c 1c 00\s+cmovl \(%rax,%r16,1\),%ebx
+\s*[a-f0-9]+:\s*d5 c0 4c 04 18\s+cmovl \(%rax,%rbx,1\),%r16d
+\s*[a-f0-9]+:\s*d5 b0 4c 04 08\s+cmovl \(%r16,%r17,1\),%eax
+\s*[a-f0-9]+:\s*d5 e0 4c 0c 00\s+cmovl \(%rax,%r16,1\),%r17d
+\s*[a-f0-9]+:\s*d5 d0 4c 0c 00\s+cmovl \(%r16,%rax,1\),%r17d
+\s*[a-f0-9]+:\s*d5 f0 4c 14 08\s+cmovl \(%r16,%r17,1\),%r18d
+\s*[a-f0-9]+:\s*d5 10 89 c0 \s+mov %eax,%r16d
+\s*[a-f0-9]+:\s*d5 40 89 c0 \s+mov %r16d,%eax
+\s*[a-f0-9]+:\s*d5 50 89 c1 \s+mov %r16d,%r17d
+\s*[a-f0-9]+:\s*d5 10 8b 1c 00\s+mov \(%r16,%rax,1\),%ebx
+\s*[a-f0-9]+:\s*d5 20 8b 1c 00\s+mov \(%rax,%r16,1\),%ebx
+\s*[a-f0-9]+:\s*d5 40 8b 04 18\s+mov \(%rax,%rbx,1\),%r16d
+\s*[a-f0-9]+:\s*d5 30 8b 04 08\s+mov \(%r16,%r17,1\),%eax
+\s*[a-f0-9]+:\s*d5 60 8b 0c 00\s+mov \(%rax,%r16,1\),%r17d
+\s*[a-f0-9]+:\s*d5 50 8b 0c 00\s+mov \(%r16,%rax,1\),%r17d
+\s*[a-f0-9]+:\s*d5 70 8b 14 08\s+mov \(%r16,%r17,1\),%r18d
+\s*[a-f0-9]+:\s*d5 a0 94 04 00\s+sete \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 90 94 04 00\s+sete \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 b0 94 04 08\s+sete \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 a0 1f 04 00\s+nopl \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 90 1f 04 00\s+nopl \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 b0 1f 04 08\s+nopl \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 20 ff 04 00\s+incl \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 10 ff 04 00\s+incl \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 30 ff 04 08\s+incl \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 20 ff 0c 00\s+decl \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 10 ff 0c 00\s+decl \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 30 ff 0c 08\s+decl \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 20 f7 14 00\s+notl \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 10 f7 14 00\s+notl \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 30 f7 14 08\s+notl \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 20 f7 1c 00\s+negl \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 10 f7 1c 00\s+negl \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 30 f7 1c 08\s+negl \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 20 f7 24 00\s+mull \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 10 f7 24 00\s+mull \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 30 f7 24 08\s+mull \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 20 f7 2c 00\s+imull \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 10 f7 2c 00\s+imull \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 30 f7 2c 08\s+imull \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 20 f7 34 00\s+divl \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 10 f7 34 00\s+divl \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 30 f7 34 08\s+divl \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 20 f7 3c 00\s+idivl \(%rax,%r16,1\)
+\s*[a-f0-9]+:\s*d5 10 f7 3c 00\s+idivl \(%r16,%rax,1\)
+\s*[a-f0-9]+:\s*d5 30 f7 3c 08\s+idivl \(%r16,%r17,1\)
+\s*[a-f0-9]+:\s*d5 90 94 c0 \s+sete %r16b
+\s*[a-f0-9]+:\s*d5 90 1f c0 \s+nop %r16d
+\s*[a-f0-9]+:\s*d5 10 ff c0 \s+inc %r16d
+\s*[a-f0-9]+:\s*d5 10 ff c8 \s+dec %r16d
+\s*[a-f0-9]+:\s*d5 10 f7 d0 \s+not %r16d
+\s*[a-f0-9]+:\s*d5 10 f7 d8 \s+neg %r16d
+\s*[a-f0-9]+:\s*d5 10 f7 e0 \s+mul %r16d
+\s*[a-f0-9]+:\s*d5 10 f7 e8 \s+imul %r16d
+\s*[a-f0-9]+:\s*d5 10 f7 f0 \s+div %r16d
+\s*[a-f0-9]+:\s*d5 10 f7 f8 \s+idiv %r16d
+#pass
new file mode 100644
@@ -0,0 +1,175 @@
+# Check 64bit instructions with rex2 prefix encoding
+
+ .allow_index_reg
+ .text
+_start:
+ test $0x7, %r24b
+ test $0x7, %r24d
+ test $0x7, %r24
+ test $0x7, %r24w
+## R bit
+ leal (%rax), %r16d
+ leal (%rax), %r17d
+ leal (%rax), %r18d
+ leal (%rax), %r19d
+ leal (%rax), %r20d
+ leal (%rax), %r21d
+ leal (%rax), %r22d
+ leal (%rax), %r23d
+ leal (%rax), %r24d
+ leal (%rax), %r25d
+ leal (%rax), %r26d
+ leal (%rax), %r27d
+ leal (%rax), %r28d
+ leal (%rax), %r29d
+ leal (%rax), %r30d
+ leal (%rax), %r31d
+## X bit
+ leal (,%r16), %eax
+ leal (,%r17), %eax
+ leal (,%r18), %eax
+ leal (,%r19), %eax
+ leal (,%r20), %eax
+ leal (,%r21), %eax
+ leal (,%r22), %eax
+ leal (,%r23), %eax
+ leal (,%r24), %eax
+ leal (,%r25), %eax
+ leal (,%r26), %eax
+ leal (,%r27), %eax
+ leal (,%r28), %eax
+ leal (,%r29), %eax
+ leal (,%r30), %eax
+ leal (,%r31), %eax
+## B bit
+ leal (%r16), %eax
+ leal (%r17), %eax
+ leal (%r18), %eax
+ leal (%r19), %eax
+ leal (%r20), %eax
+ leal (%r21), %eax
+ leal (%r22), %eax
+ leal (%r23), %eax
+ leal (%r24), %eax
+ leal (%r25), %eax
+ leal (%r26), %eax
+ leal (%r27), %eax
+ leal (%r28), %eax
+ leal (%r29), %eax
+ leal (%r30), %eax
+ leal (%r31), %eax
+## SIB
+ leal 1(%r20), %eax
+ leal 1(%r28), %eax
+ leal 129(%r20), %eax
+ leal 129(%r28), %eax
+## W bit
+ leaq (%rax), %r15
+ leaq (%rax), %r16
+ leaq (%r15), %rax
+ leaq (%r16), %rax
+ leaq (,%r15), %rax
+ leaq (,%r16), %rax
+## M bit
+ imull %eax, %r15d
+ imull %eax, %r16d
+ punpckldq (%r18), %mm2 #D5906212
+
+## AddRegFrm
+ movl $1, %r16d
+## MRMSrcReg
+ movslq %r16d, %rax
+ movslq %eax, %r16
+ movslq %r16d, %r17
+## MRMSrcRegCC
+ cmovll %r16d, %eax
+ cmovll %eax, %r16d
+ cmovll %r16d, %r17d
+## MRMSrcMem
+ imull (%r16,%rax), %ebx
+ imull (%rax,%r16), %ebx
+ imull (%rax,%rbx), %r16d
+ imull (%r16,%r17), %eax
+ imull (%rax,%r16), %r17d
+ imull (%r16,%rax), %r17d
+ imull (%r16,%r17), %r18d
+## MRMSrcMemCC
+ cmovll (%r16,%rax), %ebx
+ cmovll (%rax,%r16), %ebx
+ cmovll (%rax,%rbx), %r16d
+ cmovll (%r16,%r17), %eax
+ cmovll (%rax,%r16), %r17d
+ cmovll (%r16,%rax), %r17d
+ cmovll (%r16,%r17), %r18d
+## MRMDestReg
+ movl %eax, %r16d
+ movl %r16d, %eax
+ movl %r16d, %r17d
+## MRMDestMem
+ movl (%r16,%rax), %ebx
+ movl (%rax,%r16), %ebx
+ movl (%rax,%rbx), %r16d
+ movl (%r16,%r17), %eax
+ movl (%rax,%r16), %r17d
+ movl (%r16,%rax), %r17d
+ movl (%r16,%r17), %r18d
+## MRMXmCC
+ sete (%rax,%r16)
+ sete (%r16,%rax)
+ sete (%r16,%r17)
+## MRMXm
+ nopl (%rax,%r16)
+ nopl (%r16,%rax)
+ nopl (%r16,%r17)
+## MRM0m
+ incl (%rax,%r16)
+ incl (%r16,%rax)
+ incl (%r16,%r17)
+## MRM1m
+ decl (%rax,%r16)
+ decl (%r16,%rax)
+ decl (%r16,%r17)
+## MRM2m
+ notl (%rax,%r16)
+ notl (%r16,%rax)
+ notl (%r16,%r17)
+## MRM3m
+ negl (%rax,%r16)
+ negl (%r16,%rax)
+ negl (%r16,%r17)
+## MRM4m
+ mull (%rax,%r16)
+ mull (%r16,%rax)
+ mull (%r16,%r17)
+## MRM5m
+ imull (%rax,%r16)
+ imull (%r16,%rax)
+ imull (%r16,%r17)
+## MRM6m
+ divl (%rax,%r16)
+ divl (%r16,%rax)
+ divl (%r16,%r17)
+## MRM7m
+ idivl (%rax,%r16)
+ idivl (%r16,%rax)
+ idivl (%r16,%r17)
+## MRMXrCC
+ sete %r16b
+## MRMXr
+ nopl %r16d
+## MRM0r
+ incl %r16d
+## MRM1r
+ decl %r16d
+## MRM2r
+ notl %r16d
+## MRM3r
+ negl %r16d
+## MRM4r
+ mull %r16d
+## MRM5r
+ imull %r16d
+## MRM6r
+ divl %r16d
+## MRM7r
+ idivl %r16d
@@ -1,10 +1,22 @@
.*: Assembler messages:
.*:2: Error: .*
.*:3: Error: .*
+.*:6: Error: .*
+.*:7: Error: .*
+.*:10: Error: .*
+.*:11: Error: .*
GAS LISTING .*
[ ]*1[ ]+\.text
[ ]*2[ ]+\{disp16\} movb \(%ebp\),%al
[ ]*3[ ]+\{disp16\} movb \(%rbp\),%al
+[ ]*4[ ]+
+[ ]*5[ ]+.*
+[ ]*6[ ]+\{rex2\} xsave \(%r15, %rbx\)
+[ ]*7[ ]+\{rex2\} xsave64 \(%r15, %rbx\)
+[ ]*8[ ]+
+[ ]*9[ ]+.*
+[ ]*10[ ]+\{rex\} movl %eax,\(%r16\)
+[ ]*11[ ]+\{rex\} movl %r16d,\(%r8\)
#...
@@ -1,4 +1,12 @@
.text
{disp16} movb (%ebp),%al
{disp16} movb (%rbp),%al
+
+ /* Instruction not support APX. */
+ {rex2} xsave (%r15, %rbx)
+ {rex2} xsave64 (%r15, %rbx)
+
+ /* Add pseudo prefix {rex} for GPR32 register. */
+ {rex} movl %eax,(%r16)
+ {rex} movl %r16d,(%r8)
.p2align 4,0
@@ -11,11 +11,11 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 37 \(bad\)
0+1 <aad0>:
-[ ]*[a-f0-9]+: d5 \(bad\)
+[ ]*[a-f0-9]+: d5 rex2
[ ]*[a-f0-9]+: 0a .byte 0xa
0+3 <aad1>:
-[ ]*[a-f0-9]+: d5 \(bad\)
+[ ]*[a-f0-9]+: d5 rex2
[ ]*[a-f0-9]+: 02 .byte 0x2
0+5 <aam0>:
@@ -10,11 +10,11 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 37 \(bad\)
0+1 <aad0>:
-[ ]*[a-f0-9]+: d5 \(bad\)
+[ ]*[a-f0-9]+: d5 rex2
[ ]*[a-f0-9]+: 0a .byte 0xa
0+3 <aad1>:
-[ ]*[a-f0-9]+: d5 \(bad\)
+[ ]*[a-f0-9]+: d5 rex2
[ ]*[a-f0-9]+: 02 .byte 0x2
0+5 <aam0>:
@@ -404,6 +404,18 @@ Disassembly of section .text:
+[a-f0-9]+: 41 0f 28 10 movaps \(%r8\),%xmm2
+[a-f0-9]+: 40 0f 38 01 01 rex phaddw \(%rcx\),%mm0
+[a-f0-9]+: 41 0f 38 01 00 phaddw \(%r8\),%mm0
+ +[a-f0-9]+: 88 c4 mov %al,%ah
+ +[a-f0-9]+: d5 00 d3 e0 {rex2} shl %cl,%eax
+ +[a-f0-9]+: d5 00 a0 01 00 00 00 00 00 00 00 {rex2} movabs 0x1,%al
+ +[a-f0-9]+: d5 00 38 ca {rex2} cmp %cl,%dl
+ +[a-f0-9]+: d5 00 b3 01 {rex2} mov \$(0x)?1,%bl
+ +[a-f0-9]+: d5 00 89 c3 {rex2} mov %eax,%ebx
+ +[a-f0-9]+: d5 01 89 c6 {rex2} mov %eax,%r14d
+ +[a-f0-9]+: d5 01 89 00 {rex2} mov %eax,\(%r8\)
+ +[a-f0-9]+: d5 80 28 d7 {rex2} movaps %xmm7,%xmm2
+ +[a-f0-9]+: d5 84 28 e7 {rex2} movaps %xmm7,%xmm12
+ +[a-f0-9]+: d5 80 28 11 {rex2} movaps \(%rcx\),%xmm2
+ +[a-f0-9]+: d5 81 28 10 {rex2} movaps \(%r8\),%xmm2
+[a-f0-9]+: 8a 45 00 mov 0x0\(%rbp\),%al
+[a-f0-9]+: 8a 45 00 mov 0x0\(%rbp\),%al
+[a-f0-9]+: 8a 85 00 00 00 00 mov 0x0\(%rbp\),%al
@@ -458,6 +470,14 @@ Disassembly of section .text:
+[a-f0-9]+: 41 0f 28 10 movaps \(%r8\),%xmm2
+[a-f0-9]+: 40 0f 38 01 01 rex phaddw \(%rcx\),%mm0
+[a-f0-9]+: 41 0f 38 01 00 phaddw \(%r8\),%mm0
+ +[a-f0-9]+: 88 c4 mov %al,%ah
+ +[a-f0-9]+: d5 00 89 c3 {rex2} mov %eax,%ebx
+ +[a-f0-9]+: d5 01 89 c6 {rex2} mov %eax,%r14d
+ +[a-f0-9]+: d5 01 89 00 {rex2} mov %eax,\(%r8\)
+ +[a-f0-9]+: d5 80 28 d7 {rex2} movaps %xmm7,%xmm2
+ +[a-f0-9]+: d5 84 28 e7 {rex2} movaps %xmm7,%xmm12
+ +[a-f0-9]+: d5 80 28 11 {rex2} movaps \(%rcx\),%xmm2
+ +[a-f0-9]+: d5 81 28 10 {rex2} movaps \(%r8\),%xmm2
+[a-f0-9]+: 8a 45 00 mov 0x0\(%rbp\),%al
+[a-f0-9]+: 8a 45 00 mov 0x0\(%rbp\),%al
+[a-f0-9]+: 8a 85 00 00 00 00 mov 0x0\(%rbp\),%al
@@ -360,6 +360,19 @@ _start:
{rex} movaps (%r8),%xmm2
{rex} phaddw (%rcx),%mm0
{rex} phaddw (%r8),%mm0
+ {rex2} mov %al,%ah
+ {rex2} shl %cl, %eax
+ {rex2} movabs 1, %al
+ {rex2} cmp %cl, %dl
+ {rex2} mov $1, %bl
+ {rex2} movl %eax,%ebx
+ {rex2} movl %eax,%r14d
+ {rex2} movl %eax,(%r8)
+ {rex2} movaps %xmm7,%xmm2
+ {rex2} movaps %xmm7,%xmm12
+ {rex2} movaps (%rcx),%xmm2
+ {rex2} movaps (%r8),%xmm2
+
movb (%rbp),%al
{disp8} movb (%rbp),%al
@@ -422,6 +435,14 @@ _start:
{rex} movaps xmm2,XMMWORD PTR [r8]
{rex} phaddw mm0,QWORD PTR [rcx]
{rex} phaddw mm0,QWORD PTR [r8]
+ {rex2} mov ah,al
+ {rex2} mov ebx,eax
+ {rex2} mov r14d,eax
+ {rex2} mov DWORD PTR [r8],eax
+ {rex2} movaps xmm2,xmm7
+ {rex2} movaps xmm12,xmm7
+ {rex2} movaps xmm2,XMMWORD PTR [rcx]
+ {rex2} movaps xmm2,XMMWORD PTR [r8]
mov al, BYTE PTR [rbp]
{disp8} mov al, BYTE PTR [rbp]
@@ -360,6 +360,9 @@ run_dump_test "x86-64-avx512f-rcigrne-intel"
run_dump_test "x86-64-avx512f-rcigrne"
run_dump_test "x86-64-avx512f-rcigru-intel"
run_dump_test "x86-64-avx512f-rcigru"
+run_list_test "x86-64-apx-egpr-inval" "-al"
+run_dump_test "x86-64-apx-rex2"
+run_dump_test "x86-64-apx-rex2-inval"
run_dump_test "x86-64-avx512f-rcigrz-intel"
run_dump_test "x86-64-avx512f-rcigrz"
run_dump_test "x86-64-clwb"
@@ -112,6 +112,8 @@
/* x86-64 extension prefix. */
#define REX_OPCODE 0x40
+#define REX2_OPCODE 0xd5
+
/* Non-zero if OPCODE is the rex prefix. */
#define REX_PREFIX_P(opcode) (((opcode) & 0xf0) == REX_OPCODE)
@@ -144,6 +144,12 @@ struct instr_info
/* Bits of REX we've already used. */
uint8_t rex_used;
+ /* REX2 prefix for the current instruction use gpr32(r16-r31). */
+ unsigned char rex2;
+ /* Bits of REX2 we've already used. */
+ unsigned char rex2_used;
+ unsigned char rex2_payload;
+
bool need_modrm;
unsigned char need_vex;
bool has_sib;
@@ -169,6 +175,7 @@ struct instr_info
signed char last_data_prefix;
signed char last_addr_prefix;
signed char last_rex_prefix;
+ signed char last_rex2_prefix;
signed char last_seg_prefix;
signed char fwait_prefix;
/* The active segment register prefix. */
@@ -269,6 +276,12 @@ struct dis_private {
ins->rex_used |= REX_OPCODE; \
}
+#define USED_REX2(value) \
+ { \
+ if ((ins->rex2 & value)) \
+ ins->rex2_used |= value; \
+ }
+
#define EVEX_b_used 1
#define EVEX_len_used 2
@@ -286,6 +299,7 @@ struct dis_private {
#define PREFIX_DATA 0x200
#define PREFIX_ADDR 0x400
#define PREFIX_FWAIT 0x800
+#define PREFIX_REX2 0x1000
/* Make sure that bytes from INFO->PRIVATE_DATA->BUFFER (inclusive)
to ADDR (exclusive) are valid. Returns true for success, false
@@ -367,6 +381,7 @@ fetch_error (const instr_info *ins)
#define PREFIX_IGNORED_DATA (PREFIX_DATA << PREFIX_IGNORED_SHIFT)
#define PREFIX_IGNORED_ADDR (PREFIX_ADDR << PREFIX_IGNORED_SHIFT)
#define PREFIX_IGNORED_LOCK (PREFIX_LOCK << PREFIX_IGNORED_SHIFT)
+#define PREFIX_IGNORED_REX2 (PREFIX_REX2 << PREFIX_IGNORED_SHIFT)
/* Opcode prefixes. */
#define PREFIX_OPCODE (PREFIX_REPZ \
@@ -2390,22 +2405,30 @@ static const char intel_index16[][6] = {
static const char att_names64[][8] = {
"%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
- "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+ "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
+ "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31"
};
static const char att_names32[][8] = {
"%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
- "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d"
+ "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d",
+ "%r16d", "%r17d", "%r18d", "%r19d", "%r20d", "%r21d", "%r22d", "%r23d",
+ "%r24d", "%r25d", "%r26d", "%r27d", "%r28d", "%r29d", "%r30d", "%r31d"
};
static const char att_names16[][8] = {
"%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
- "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w"
+ "%r8w", "%r9w", "%r10w", "%r11w", "%r12w", "%r13w", "%r14w", "%r15w",
+ "%r16w", "%r17w", "%r18w", "%r19w", "%r20w", "%r21w", "%r22w", "%r23w",
+ "%r24w", "%r25w", "%r26w", "%r27w", "%r28w", "%r29w", "%r30w", "%r31w"
};
static const char att_names8[][8] = {
"%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh",
};
static const char att_names8rex[][8] = {
"%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
- "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b"
+ "%r8b", "%r9b", "%r10b", "%r11b", "%r12b", "%r13b", "%r14b", "%r15b",
+ "%r16b", "%r17b", "%r18b", "%r19b", "%r20b", "%r21b", "%r22b", "%r23b",
+ "%r24b", "%r25b", "%r26b", "%r27b", "%r28b", "%r29b", "%r30b", "%r31b"
};
static const char att_names_seg[][4] = {
"%es", "%cs", "%ss", "%ds", "%fs", "%gs", "%?", "%?",
@@ -2794,9 +2817,9 @@ static const struct dis386 reg_table[][8] = {
{ Bad_Opcode },
{ "cmpxchg8b", { { CMPXCHG8B_Fixup, q_mode } }, 0 },
{ Bad_Opcode },
- { "xrstors", { FXSAVE }, 0 },
- { "xsavec", { FXSAVE }, 0 },
- { "xsaves", { FXSAVE }, 0 },
+ { "xrstors", { FXSAVE }, PREFIX_IGNORED_REX2 },
+ { "xsavec", { FXSAVE }, PREFIX_IGNORED_REX2 },
+ { "xsaves", { FXSAVE }, PREFIX_IGNORED_REX2 },
{ MOD_TABLE (MOD_0FC7_REG_6) },
{ MOD_TABLE (MOD_0FC7_REG_7) },
},
@@ -3364,7 +3387,7 @@ static const struct dis386 prefix_table[][4] = {
/* PREFIX_0FAE_REG_4_MOD_0 */
{
- { "xsave", { FXSAVE }, 0 },
+ { "xsave", { FXSAVE }, PREFIX_IGNORED_REX2 },
{ "ptwrite{%LQ|}", { Edq }, 0 },
},
@@ -3382,7 +3405,7 @@ static const struct dis386 prefix_table[][4] = {
/* PREFIX_0FAE_REG_6_MOD_0 */
{
- { "xsaveopt", { FXSAVE }, PREFIX_OPCODE },
+ { "xsaveopt", { FXSAVE }, PREFIX_OPCODE | PREFIX_IGNORED_REX2 },
{ "clrssbsy", { Mq }, PREFIX_OPCODE },
{ "clwb", { Mb }, PREFIX_OPCODE },
},
@@ -8125,7 +8148,7 @@ static const struct dis386 mod_table[][2] = {
},
{
/* MOD_0FAE_REG_5 */
- { "xrstor", { FXSAVE }, PREFIX_OPCODE },
+ { "xrstor", { FXSAVE }, PREFIX_OPCODE | PREFIX_IGNORED_REX2 },
{ PREFIX_TABLE (PREFIX_0FAE_REG_5_MOD_3) },
},
{
@@ -8323,6 +8346,24 @@ ckprefix (instr_info *ins)
return ckp_okay;
ins->last_rex_prefix = i;
break;
+ /* REX2 must be the last prefix. */
+ case 0xd5:
+ if (ins->address_mode == mode_64bit)
+ {
+ if (ins->last_rex_prefix >= 0)
+ return ckp_bogus;
+
+ ins->codep++;
+ if (!fetch_code (ins->info, ins->codep + 1))
+ return ckp_fetch_error;
+ ins->rex2_payload = *ins->codep;
+ ins->rex2 = ins->rex2_payload >> 4;
+ ins->rex = (ins->rex2_payload & 0xf) | REX_OPCODE;
+ ins->codep++;
+ ins->last_rex2_prefix = i;
+ ins->all_prefixes[i] = REX2_OPCODE;
+ }
+ return ckp_okay;
case 0xf3:
ins->prefixes |= PREFIX_REPZ;
ins->last_repz_prefix = i;
@@ -8490,6 +8531,8 @@ prefix_name (enum address_mode mode, uint8_t pref, int sizeflag)
return "bnd";
case NOTRACK_PREFIX:
return "notrack";
+ case REX2_OPCODE:
+ return "rex2";
default:
return NULL;
}
@@ -8628,6 +8671,8 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
break;
case USE_3BYTE_TABLE:
+ if (ins->last_rex2_prefix >= 0)
+ return &bad_opcode;
if (!fetch_code (ins->info, ins->codep + 2))
return &err_opcode;
vindex = *ins->codep++;
@@ -8751,6 +8796,8 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
break;
case USE_VEX_C4_TABLE:
+ if (ins->last_rex2_prefix >= 0)
+ return &bad_opcode;
/* VEX prefix. */
if (!fetch_code (ins->info, ins->codep + 3))
return &err_opcode;
@@ -8812,6 +8859,8 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
break;
case USE_VEX_C5_TABLE:
+ if (ins->last_rex2_prefix >= 0)
+ return &bad_opcode;
/* VEX prefix. */
if (!fetch_code (ins->info, ins->codep + 2))
return &err_opcode;
@@ -8853,6 +8902,8 @@ get_valid_dis386 (const struct dis386 *dp, instr_info *ins)
break;
case USE_EVEX_TABLE:
+ if (ins->last_rex2_prefix >= 0)
+ return &bad_opcode;
ins->two_source_ops = false;
/* EVEX prefix. */
ins->vex.evex = true;
@@ -9128,6 +9179,7 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
.last_data_prefix = -1,
.last_addr_prefix = -1,
.last_rex_prefix = -1,
+ .last_rex2_prefix = -1,
.last_seg_prefix = -1,
.fwait_prefix = -1,
};
@@ -9292,13 +9344,17 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
goto out;
}
- if (*ins.codep == 0x0f)
+ /* M0 in rex2 prefix represents map0 or map1. */
+ if (*ins.codep == 0x0f || (ins.rex2 & 0x8))
{
unsigned char threebyte;
- ins.codep++;
- if (!fetch_code (info, ins.codep + 1))
- goto fetch_error_out;
+ if (!ins.rex2)
+ {
+ ins.codep++;
+ if (!fetch_code (info, ins.codep + 1))
+ goto fetch_error_out;
+ }
threebyte = *ins.codep;
dp = &dis386_twobyte[threebyte];
ins.need_modrm = twobyte_has_modrm[threebyte];
@@ -9454,6 +9510,14 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
goto out;
}
+ if ((dp->prefix_requirement & PREFIX_IGNORED_REX2)
+ && ins.last_rex2_prefix >= 0)
+ {
+ i386_dis_printf (info, dis_style_text, "(bad)");
+ ret = ins.end_codep - priv.the_buffer;
+ goto out;
+ }
+
switch (dp->prefix_requirement)
{
case PREFIX_DATA:
@@ -9468,6 +9532,7 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
ins.used_prefixes |= PREFIX_DATA;
/* Fall through. */
case PREFIX_OPCODE:
+ case PREFIX_OPCODE | PREFIX_IGNORED_REX2:
/* If the mandatory PREFIX_REPZ/PREFIX_REPNZ/PREFIX_DATA prefix is
unused, opcode is invalid. Since the PREFIX_DATA prefix may be
used by putop and MMX/SSE operand and may be overridden by the
@@ -9510,9 +9575,17 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
/* Check if the REX prefix is used. */
if ((ins.rex ^ ins.rex_used) == 0
- && !ins.need_vex && ins.last_rex_prefix >= 0)
+ && !ins.need_vex && ins.last_rex_prefix >= 0
+ && ins.last_rex2_prefix < 0)
ins.all_prefixes[ins.last_rex_prefix] = 0;
+ /* Check if the REX2 prefix is used. */
+ if (ins.last_rex2_prefix >= 0
+ && ((((ins.rex2 & 0x7) ^ (ins.rex2_used & 0x7)) == 0
+ && (ins.rex2 & 0x7))
+ || dp == &bad_opcode))
+ ins.all_prefixes[ins.last_rex2_prefix] = 0;
+
/* Check if the SEG prefix is used. */
if ((ins.prefixes & (PREFIX_CS | PREFIX_SS | PREFIX_DS | PREFIX_ES
| PREFIX_FS | PREFIX_GS)) != 0
@@ -9541,7 +9614,10 @@ print_insn (bfd_vma pc, disassemble_info *info, int intel_syntax)
if (name == NULL)
abort ();
prefix_length += strlen (name) + 1;
- i386_dis_printf (info, dis_style_mnemonic, "%s ", name);
+ if (ins.all_prefixes[i] == REX2_OPCODE)
+ i386_dis_printf (info, dis_style_mnemonic, "{%s} ", name);
+ else
+ i386_dis_printf (info, dis_style_mnemonic, "%s ", name);
}
/* Check maximum code length. */
@@ -11086,8 +11162,11 @@ print_register (instr_info *ins, unsigned int reg, unsigned int rexmask,
ins->illegal_masking = true;
USED_REX (rexmask);
+ USED_REX2 (rexmask);
if (ins->rex & rexmask)
reg += 8;
+ if (ins->rex2 & rexmask)
+ reg += 16;
switch (bytemode)
{
@@ -11307,6 +11386,7 @@ static bool
OP_E_memory (instr_info *ins, int bytemode, int sizeflag)
{
int add = (ins->rex & REX_B) ? 8 : 0;
+ add += (ins->rex2 & REX_B) ? 16 : 0;
int riprel = 0;
int shift;
@@ -11414,6 +11494,7 @@ OP_E_memory (instr_info *ins, int bytemode, int sizeflag)
shift = 0;
USED_REX (REX_B);
+ USED_REX2 (REX_B);
if (ins->intel_syntax)
intel_operand_size (ins, bytemode, sizeflag);
append_seg (ins);
@@ -11444,8 +11525,11 @@ OP_E_memory (instr_info *ins, int bytemode, int sizeflag)
{
vindex = ins->sib.index;
USED_REX (REX_X);
+ USED_REX2 (REX_X);
if (ins->rex & REX_X)
vindex += 8;
+ if (ins->rex2 & REX_X)
+ vindex += 16;
switch (bytemode)
{
case vex_vsib_d_w_dq_mode:
@@ -11866,7 +11950,7 @@ static bool
OP_REG (instr_info *ins, int code, int sizeflag)
{
const char *s;
- int add;
+ int add = 0;
switch (code)
{
@@ -11877,10 +11961,11 @@ OP_REG (instr_info *ins, int code, int sizeflag)
}
USED_REX (REX_B);
+ USED_REX2 (REX_B);
if (ins->rex & REX_B)
add = 8;
- else
- add = 0;
+ if (ins->rex2 & REX_B)
+ add += 16;
switch (code)
{
@@ -12590,8 +12675,11 @@ OP_EX (instr_info *ins, int bytemode, int sizeflag)
reg = ins->modrm.rm;
USED_REX (REX_B);
+ USED_REX2 (REX_B);
if (ins->rex & REX_B)
reg += 8;
+ if (ins->rex2 & REX_B)
+ reg += 16;
if (ins->vex.evex)
{
USED_REX (REX_X);
@@ -380,6 +380,7 @@ static bitfield cpu_flags[] =
BITFIELD (RAO_INT),
BITFIELD (FRED),
BITFIELD (LKGS),
+ BITFIELD (APX_F),
BITFIELD (MWAITX),
BITFIELD (CLZERO),
BITFIELD (OSPKE),
@@ -469,6 +470,7 @@ static bitfield opcode_modifiers[] =
BITFIELD (ATTSyntax),
BITFIELD (IntelSyntax),
BITFIELD (ISA64),
+ BITFIELD (No_egpr),
};
#define CLASS(n) #n, n
@@ -317,6 +317,8 @@ enum i386_cpu
CpuAVX512F,
/* Intel AVX-512 VL Instructions support required. */
CpuAVX512VL,
+ /* Intel APX Instructions support required. */
+ CpuAPX_F,
/* Not supported in the 64bit mode */
CpuNo64,
@@ -352,6 +354,7 @@ enum i386_cpu
cpuhle:1, \
cpuavx512f:1, \
cpuavx512vl:1, \
+ cpuapx_f:1, \
/* NOTE: This field needs to remain last. */ \
cpuno64:1
@@ -742,6 +745,10 @@ enum
#define INTEL64 2
#define INTEL64ONLY 3
ISA64,
+
+ /* egprs (r16-r31) on instruction illegal. */
+ No_egpr,
+
/* The last bitfield in i386_opcode_modifier. */
Opcode_Modifier_Num
};
@@ -789,6 +796,7 @@ typedef struct i386_opcode_modifier
unsigned int attsyntax:1;
unsigned int intelsyntax:1;
unsigned int isa64:2;
+ unsigned int no_egpr:1;
} i386_opcode_modifier;
/* Operand classes. */
@@ -988,7 +996,7 @@ typedef struct insn_template
AMD 3DNow! instructions.
If this template has no extension opcode (the usual case) use None
Instructions */
- signed int extension_opcode:9;
+ signed int extension_opcode:0xA;
#define None (-1) /* If no extension_opcode is possible. */
/* Pseudo prefixes. */
@@ -1001,7 +1009,8 @@ typedef struct insn_template
#define Prefix_VEX3 6 /* {vex3} */
#define Prefix_EVEX 7 /* {evex} */
#define Prefix_REX 8 /* {rex} */
-#define Prefix_NoOptimize 9 /* {nooptimize} */
+#define Prefix_REX2 9 /* {rex2} */
+#define Prefix_NoOptimize 0xA /* {nooptimize} */
/* the bits in opcode_modifier are used to generate the final opcode from
the base_opcode. These bits also are used to detect alternate forms of
@@ -1028,6 +1037,7 @@ typedef struct
#define RegRex 0x1 /* Extended register. */
#define RegRex64 0x2 /* Extended 8 bit register. */
#define RegVRex 0x4 /* Extended vector register. */
+#define RegRex2 0x8 /* Extended rex2 interge register. */
unsigned char reg_num;
#define RegIP ((unsigned char ) ~0)
/* EIZ and RIZ are fake index registers. */
@@ -889,7 +889,7 @@ rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {}
<pseudopfx:ident:cpu, disp8:Disp8:0, disp16:Disp16:0, disp32:Disp32:0, +
load:Load:0, store:Store:0, +
vex:VEX:0, vex2:VEX:0, vex3:VEX3:0, evex:EVEX:0, +
- rex:REX:x64, nooptimize:NoOptimize:0>
+ rex:REX:x64, rex2:REX2:x64, nooptimize:NoOptimize:0>
{<pseudopfx>}, PSEUDO_PREFIX/Prefix_<pseudopfx:ident>, <pseudopfx:cpu>, NoSuf|IsPrefix, {}
@@ -1422,16 +1422,16 @@ crc32, 0xf20f38f0, SSE4_2|x64, W|Modrm|No_wSuf|No_lSuf|No_sSuf, { Reg8|Reg64|Uns
// xsave/xrstor New Instructions.
-xsave, 0xfae/4, Xsave, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Unspecified|BaseIndex }
-xsave64, 0xfae/4, Xsave|x64, Modrm|NoSuf|Size64, { Unspecified|BaseIndex }
-xrstor, 0xfae/5, Xsave, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Unspecified|BaseIndex }
-xrstor64, 0xfae/5, Xsave|x64, Modrm|NoSuf|Size64, { Unspecified|BaseIndex }
+xsave, 0xfae/4, Xsave, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_egpr, { Unspecified|BaseIndex }
+xsave64, 0xfae/4, Xsave|x64, Modrm|NoSuf|Size64|No_egpr, { Unspecified|BaseIndex }
+xrstor, 0xfae/5, Xsave, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_egpr, { Unspecified|BaseIndex }
+xrstor64, 0xfae/5, Xsave|x64, Modrm|NoSuf|Size64|No_egpr, { Unspecified|BaseIndex }
xgetbv, 0xf01d0, Xsave, NoSuf, {}
xsetbv, 0xf01d1, Xsave, NoSuf, {}
// xsaveopt
-xsaveopt, 0xfae/6, Xsaveopt, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf, { Unspecified|BaseIndex }
-xsaveopt64, 0xfae/6, Xsaveopt|x64, Modrm|NoSuf|Size64, { Unspecified|BaseIndex }
+xsaveopt, 0xfae/6, Xsaveopt, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_egpr, { Unspecified|BaseIndex }
+xsaveopt64, 0xfae/6, Xsaveopt|x64, Modrm|NoSuf|Size64|No_egpr, { Unspecified|BaseIndex }
// AES instructions.
@@ -2492,17 +2492,17 @@ clflushopt, 0x660fae/7, ClflushOpt, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex
// XSAVES/XRSTORS instructions.
-xrstors, 0xfc7/3, XSAVES, Modrm|NoSuf, { Unspecified|BaseIndex }
-xrstors64, 0xfc7/3, XSAVES|x64, Modrm|NoSuf|Size64, { Unspecified|BaseIndex }
-xsaves, 0xfc7/5, XSAVES, Modrm|NoSuf, { Unspecified|BaseIndex }
-xsaves64, 0xfc7/5, XSAVES|x64, Modrm|NoSuf|Size64, { Unspecified|BaseIndex }
+xrstors, 0xfc7/3, XSAVES, Modrm|NoSuf|No_egpr, { Unspecified|BaseIndex }
+xrstors64, 0xfc7/3, XSAVES|x64, Modrm|NoSuf|Size64|No_egpr, { Unspecified|BaseIndex }
+xsaves, 0xfc7/5, XSAVES, Modrm|NoSuf|No_egpr, { Unspecified|BaseIndex }
+xsaves64, 0xfc7/5, XSAVES|x64, Modrm|NoSuf|Size64|No_egpr, { Unspecified|BaseIndex }
// XSAVES instructions end.
// XSAVEC instructions.
-xsavec, 0xfc7/4, XSAVEC, Modrm|NoSuf, { Unspecified|BaseIndex }
-xsavec64, 0xfc7/4, XSAVEC|x64, Modrm|NoSuf|Size64, { Unspecified|BaseIndex }
+xsavec, 0xfc7/4, XSAVEC, Modrm|NoSuf|No_egpr, { Unspecified|BaseIndex }
+xsavec64, 0xfc7/4, XSAVEC|x64, Modrm|NoSuf|Size64|No_egpr, { Unspecified|BaseIndex }
// XSAVEC instructions end.
@@ -43,6 +43,22 @@ r12b, Class=Reg|Byte, RegRex|RegRex64, 4, Dw2Inval, Dw2Inval
r13b, Class=Reg|Byte, RegRex|RegRex64, 5, Dw2Inval, Dw2Inval
r14b, Class=Reg|Byte, RegRex|RegRex64, 6, Dw2Inval, Dw2Inval
r15b, Class=Reg|Byte, RegRex|RegRex64, 7, Dw2Inval, Dw2Inval
+r16b, Class=Reg|Byte, RegRex2|RegRex64, 0, Dw2Inval, Dw2Inval
+r17b, Class=Reg|Byte, RegRex2|RegRex64, 1, Dw2Inval, Dw2Inval
+r18b, Class=Reg|Byte, RegRex2|RegRex64, 2, Dw2Inval, Dw2Inval
+r19b, Class=Reg|Byte, RegRex2|RegRex64, 3, Dw2Inval, Dw2Inval
+r20b, Class=Reg|Byte, RegRex2|RegRex64, 4, Dw2Inval, Dw2Inval
+r21b, Class=Reg|Byte, RegRex2|RegRex64, 5, Dw2Inval, Dw2Inval
+r22b, Class=Reg|Byte, RegRex2|RegRex64, 6, Dw2Inval, Dw2Inval
+r23b, Class=Reg|Byte, RegRex2|RegRex64, 7, Dw2Inval, Dw2Inval
+r24b, Class=Reg|Byte, RegRex2|RegRex64|RegRex, 0, Dw2Inval, Dw2Inval
+r25b, Class=Reg|Byte, RegRex2|RegRex64|RegRex, 1, Dw2Inval, Dw2Inval
+r26b, Class=Reg|Byte, RegRex2|RegRex64|RegRex, 2, Dw2Inval, Dw2Inval
+r27b, Class=Reg|Byte, RegRex2|RegRex64|RegRex, 3, Dw2Inval, Dw2Inval
+r28b, Class=Reg|Byte, RegRex2|RegRex64|RegRex, 4, Dw2Inval, Dw2Inval
+r29b, Class=Reg|Byte, RegRex2|RegRex64|RegRex, 5, Dw2Inval, Dw2Inval
+r30b, Class=Reg|Byte, RegRex2|RegRex64|RegRex, 6, Dw2Inval, Dw2Inval
+r31b, Class=Reg|Byte, RegRex2|RegRex64|RegRex, 7, Dw2Inval, Dw2Inval
// 16 bit regs
ax, Class=Reg|Instance=Accum|Word, 0, 0, Dw2Inval, Dw2Inval
cx, Class=Reg|Word, 0, 1, Dw2Inval, Dw2Inval
@@ -60,6 +76,22 @@ r12w, Class=Reg|Word, RegRex, 4, Dw2Inval, Dw2Inval
r13w, Class=Reg|Word, RegRex, 5, Dw2Inval, Dw2Inval
r14w, Class=Reg|Word, RegRex, 6, Dw2Inval, Dw2Inval
r15w, Class=Reg|Word, RegRex, 7, Dw2Inval, Dw2Inval
+r16w, Class=Reg|Word, RegRex2, 0, Dw2Inval, Dw2Inval
+r17w, Class=Reg|Word, RegRex2, 1, Dw2Inval, Dw2Inval
+r18w, Class=Reg|Word, RegRex2, 2, Dw2Inval, Dw2Inval
+r19w, Class=Reg|Word, RegRex2, 3, Dw2Inval, Dw2Inval
+r20w, Class=Reg|Word, RegRex2, 4, Dw2Inval, Dw2Inval
+r21w, Class=Reg|Word, RegRex2, 5, Dw2Inval, Dw2Inval
+r22w, Class=Reg|Word, RegRex2, 6, Dw2Inval, Dw2Inval
+r23w, Class=Reg|Word, RegRex2, 7, Dw2Inval, Dw2Inval
+r24w, Class=Reg|Word, RegRex2|RegRex, 0, Dw2Inval, Dw2Inval
+r25w, Class=Reg|Word, RegRex2|RegRex, 1, Dw2Inval, Dw2Inval
+r26w, Class=Reg|Word, RegRex2|RegRex, 2, Dw2Inval, Dw2Inval
+r27w, Class=Reg|Word, RegRex2|RegRex, 3, Dw2Inval, Dw2Inval
+r28w, Class=Reg|Word, RegRex2|RegRex, 4, Dw2Inval, Dw2Inval
+r29w, Class=Reg|Word, RegRex2|RegRex, 5, Dw2Inval, Dw2Inval
+r30w, Class=Reg|Word, RegRex2|RegRex, 6, Dw2Inval, Dw2Inval
+r31w, Class=Reg|Word, RegRex2|RegRex, 7, Dw2Inval, Dw2Inval
// 32 bit regs
eax, Class=Reg|Instance=Accum|Dword|BaseIndex, 0, 0, 0, Dw2Inval
ecx, Class=Reg|Instance=RegC|Dword|BaseIndex, 0, 1, 1, Dw2Inval
@@ -77,6 +109,22 @@ r12d, Class=Reg|Dword|BaseIndex, RegRex, 4, Dw2Inval, Dw2Inval
r13d, Class=Reg|Dword|BaseIndex, RegRex, 5, Dw2Inval, Dw2Inval
r14d, Class=Reg|Dword|BaseIndex, RegRex, 6, Dw2Inval, Dw2Inval
r15d, Class=Reg|Dword|BaseIndex, RegRex, 7, Dw2Inval, Dw2Inval
+r16d, Class=Reg|Dword|BaseIndex, RegRex2, 0, Dw2Inval, Dw2Inval
+r17d, Class=Reg|Dword|BaseIndex, RegRex2, 1, Dw2Inval, Dw2Inval
+r18d, Class=Reg|Dword|BaseIndex, RegRex2, 2, Dw2Inval, Dw2Inval
+r19d, Class=Reg|Dword|BaseIndex, RegRex2, 3, Dw2Inval, Dw2Inval
+r20d, Class=Reg|Dword|BaseIndex, RegRex2, 4, Dw2Inval, Dw2Inval
+r21d, Class=Reg|Dword|BaseIndex, RegRex2, 5, Dw2Inval, Dw2Inval
+r22d, Class=Reg|Dword|BaseIndex, RegRex2, 6, Dw2Inval, Dw2Inval
+r23d, Class=Reg|Dword|BaseIndex, RegRex2, 7, Dw2Inval, Dw2Inval
+r24d, Class=Reg|Dword|BaseIndex, RegRex2|RegRex, 0, Dw2Inval, Dw2Inval
+r25d, Class=Reg|Dword|BaseIndex, RegRex2|RegRex, 1, Dw2Inval, Dw2Inval
+r26d, Class=Reg|Dword|BaseIndex, RegRex2|RegRex, 2, Dw2Inval, Dw2Inval
+r27d, Class=Reg|Dword|BaseIndex, RegRex2|RegRex, 3, Dw2Inval, Dw2Inval
+r28d, Class=Reg|Dword|BaseIndex, RegRex2|RegRex, 4, Dw2Inval, Dw2Inval
+r29d, Class=Reg|Dword|BaseIndex, RegRex2|RegRex, 5, Dw2Inval, Dw2Inval
+r30d, Class=Reg|Dword|BaseIndex, RegRex2|RegRex, 6, Dw2Inval, Dw2Inval
+r31d, Class=Reg|Dword|BaseIndex, RegRex2|RegRex, 7, Dw2Inval, Dw2Inval
rax, Class=Reg|Instance=Accum|Qword|BaseIndex, 0, 0, Dw2Inval, 0
rcx, Class=Reg|Instance=RegC|Qword|BaseIndex, 0, 1, Dw2Inval, 2
rdx, Class=Reg|Instance=RegD|Qword|BaseIndex, 0, 2, Dw2Inval, 1
@@ -93,6 +141,22 @@ r12, Class=Reg|Qword|BaseIndex, RegRex, 4, Dw2Inval, 12
r13, Class=Reg|Qword|BaseIndex, RegRex, 5, Dw2Inval, 13
r14, Class=Reg|Qword|BaseIndex, RegRex, 6, Dw2Inval, 14
r15, Class=Reg|Qword|BaseIndex, RegRex, 7, Dw2Inval, 15
+r16, Class=Reg|Qword|BaseIndex, RegRex2, 0, Dw2Inval, 130
+r17, Class=Reg|Qword|BaseIndex, RegRex2, 1, Dw2Inval, 131
+r18, Class=Reg|Qword|BaseIndex, RegRex2, 2, Dw2Inval, 132
+r19, Class=Reg|Qword|BaseIndex, RegRex2, 3, Dw2Inval, 133
+r20, Class=Reg|Qword|BaseIndex, RegRex2, 4, Dw2Inval, 134
+r21, Class=Reg|Qword|BaseIndex, RegRex2, 5, Dw2Inval, 135
+r22, Class=Reg|Qword|BaseIndex, RegRex2, 6, Dw2Inval, 136
+r23, Class=Reg|Qword|BaseIndex, RegRex2, 7, Dw2Inval, 137
+r24, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 0, Dw2Inval, 138
+r25, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 1, Dw2Inval, 139
+r26, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 2, Dw2Inval, 140
+r27, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 3, Dw2Inval, 141
+r28, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 4, Dw2Inval, 142
+r29, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 5, Dw2Inval, 143
+r30, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 6, Dw2Inval, 144
+r31, Class=Reg|Qword|BaseIndex, RegRex2|RegRex, 7, Dw2Inval, 145
// Vector mask registers.
k0, Class=RegMask, 0, 0, 93, 118
k1, Class=RegMask, 0, 1, 94, 119