@@ -5545,6 +5545,80 @@ optimize_encoding (void)
i.operands = 2;
i.imm_operands = 0;
}
+ else if ((i.tm.base_opcode | 0x22) == 0x3b
+ && i.tm.opcode_space == SPACE_0F3A
+ && i.op[0].imms->X_op == O_constant
+ && i.op[0].imms->X_add_number == 0)
+ {
+ /* Optimize: -O:
+ vextractf128 $0, %ymmN, %xmmM -> vmovaps %xmmN, %xmmM
+ vextractf128 $0, %ymmN, mem -> vmovups %xmmN, mem
+ vextractf32x4 $0, %[yz]mmN, %xmmM -> vmovaps %xmmN, %xmmM
+ vextractf32x4 $0, %[yz]mmN, mem -> vmovups %xmmN, mem
+ vextractf64x2 $0, %[yz]mmN, %xmmM -> vmovapd %xmmN, %xmmM
+ vextractf64x2 $0, %[yz]mmN, mem -> vmovupd %xmmN, mem
+ vextractf32x8 $0, %zmmN, %ymmM -> vmovaps %ymmN, %ymmM
+ vextractf32x8 $0, %zmmN, mem -> vmovups %ymmN, mem
+ vextractf64x4 $0, %zmmN, %ymmM -> vmovapd %ymmN, %ymmM
+ vextractf64x4 $0, %zmmN, mem -> vmovupd %ymmN, mem
+ vextracti128 $0, %ymmN, %xmmM -> vmovdqa %xmmN, %xmmM
+ vextracti128 $0, %ymmN, mem -> vmovdqu %xmmN, mem
+ vextracti32x4 $0, %[yz]mmN, %xmmM -> vmovdqa{,32} %xmmN, %xmmM
+ vextracti32x4 $0, %[yz]mmN, mem -> vmovdqu{,32} %xmmN, mem
+ vextracti64x2 $0, %[yz]mmN, %xmmM -> vmovdqa{,64} %xmmN, %xmmM
+ vextracti64x2 $0, %[yz]mmN, mem -> vmovdqu{,64} %xmmN, mem
+ vextracti32x8 $0, %zmmN, %ymmM -> vmovdqa{,32} %ymmN, %ymmM
+ vextracti32x8 $0, %zmmN, mem -> vmovdqu{,32} %ymmN, mem
+ vextracti64x4 $0, %zmmN, %ymmM -> vmovdqa{,64} %ymmN, %ymmM
+ vextracti64x4 $0, %zmmN, mem -> vmovdqu{,64} %ymmN, mem
+ */
+ i.tm.opcode_space = SPACE_0F;
+
+ if (!i.mask.reg
+ && (pp.encoding <= encoding_vex3
+ || (pp.encoding == encoding_evex512
+ && (!i.base_reg || !(i.base_reg->reg_flags & RegRex2))
+ && (!i.index_reg || !(i.index_reg->reg_flags & RegRex2)))))
+ {
+ i.tm.opcode_modifier.vex = i.tm.base_opcode & 2 ? VEX256 : VEX128;
+ i.tm.opcode_modifier.evex = 0;
+ }
+ else
+ i.tm.opcode_modifier.evex = i.tm.base_opcode & 2 ? EVEX256 : EVEX128;
+
+ if (i.tm.base_opcode & 0x20)
+ {
+ i.tm.base_opcode = 0x7f;
+ if (i.reg_operands != 2)
+ i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
+ }
+ else
+ {
+ if (i.reg_operands == 2)
+ i.tm.base_opcode = 0x29;
+ else
+ i.tm.base_opcode = 0x11;
+ if (i.tm.opcode_modifier.vexw != VEXW1)
+ i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
+ }
+
+ if (i.tm.opcode_modifier.vex)
+ i.tm.opcode_modifier.vexw = VEXWIG;
+
+ i.op[0].regs = i.op[1].regs;
+ i.types[0] = i.types[1];
+ i.flags[0] = i.flags[1];
+ i.tm.operand_types[0] = i.tm.operand_types[1];
+
+ i.op[1].regs = i.op[2].regs;
+ i.types[1] = i.types[2];
+ i.flags[1] = i.flags[2];
+ i.reloc[1] = i.reloc[2];
+ i.tm.operand_types[1] = i.tm.operand_types[2];
+
+ i.operands = 2;
+ i.imm_operands = 0;
+ }
}
/* Check whether the promoted (to address size) register is usable as index
@@ -170,6 +170,26 @@ Disassembly of section .text:
+[a-f0-9]+: f3 .* movss %xmm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovapd %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovupd %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovups %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovapd %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -199,6 +199,31 @@ _start:
vextractps $0, %xmm1, %edx
vextractps $0, %xmm1, (%edx)
+ vextractf128 $0, %ymm1, %xmm2
+ vextractf128 $0, %ymm1, (%edx)
+ vextracti128 $0, %ymm1, %xmm2
+ vextracti128 $0, %ymm1, (%edx)
+
+ vextractf32x4 $0, %ymm1, %xmm2
+ vextractf32x4 $0, %ymm1, (%edx)
+ vextracti32x4 $0, %ymm1, %xmm2
+ vextracti32x4 $0, %ymm1, (%edx)
+
+ vextractf64x2 $0, %ymm1, %xmm2
+ vextractf64x2 $0, %ymm1, (%edx)
+ vextracti64x2 $0, %ymm1, %xmm2
+ vextracti64x2 $0, %ymm1, (%edx)
+
+ vextractf32x8 $0, %zmm1, %ymm2
+ vextractf32x8 $0, %zmm1, (%edx)
+ vextracti32x8 $0, %zmm1, %ymm2
+ vextracti32x8 $0, %zmm1, (%edx)
+
+ vextractf64x4 $0, %zmm1, %ymm2
+ vextractf64x4 $0, %zmm1, (%edx)
+ vextracti64x4 $0, %zmm1, %ymm2
+ vextracti64x4 $0, %zmm1, (%edx)
+
bt $15, %ax
bt $16, %ax
btc $15, %ax
@@ -171,6 +171,26 @@ Disassembly of section .text:
+[a-f0-9]+: f3 .* movss %xmm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovapd %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovupd %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovups %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovapd %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -170,6 +170,26 @@ Disassembly of section .text:
+[a-f0-9]+: f3 .* movss %xmm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovapd %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovupd %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovups %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovapd %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -170,6 +170,26 @@ Disassembly of section .text:
+[a-f0-9]+: f3 .* movss %xmm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovups %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovapd %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovupd %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 .* vmovdqu %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovaps %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovups %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovapd %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovupd %ymm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 .* vmovdqu %ymm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -596,6 +596,7 @@ run_list_test "x86-64-optimize-pextr" "-
run_dump_test "x86-64-optimize-pextr"
run_list_test "x86-64-optimize-extractps" "-O -aln"
run_dump_test "x86-64-optimize-extractps"
+run_dump_test "x86-64-optimize-vextractNN"
run_dump_test "x86-64-apx-ndd-optimize"
run_dump_test "x86-64-align-branch-1a"
run_dump_test "x86-64-align-branch-1b"
@@ -0,0 +1,61 @@
+#as: -O
+#objdump: -drw
+#name: x86-64 VEXTRACT{F,I}<nn> optimized encoding with -msse2avx
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <vextract_128>:
+ +[a-f0-9]+: c5 f8 29 ca vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 f8 11 0a vmovups %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 f9 7c 08 11 0a vmovups %xmm1,\(%r18\)
+ +[a-f0-9]+: c5 f9 7f ca vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 fa 7f 0a vmovdqu %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 f9 7e 08 7f 0a vmovdqu32 %xmm1,\(%r18\)
+
+0+[a-f0-9]+ <vextract_NNxM_XMM>:
+ +[a-f0-9]+: c5 f8 29 ca vmovaps %xmm1,%xmm2
+ +[a-f0-9]+: c5 f8 11 0a vmovups %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 7c 08 29 ca vmovaps %xmm17,%xmm2
+ +[a-f0-9]+: 62 e1 7c 08 11 0a vmovups %xmm17,\(%rdx\)
+ +[a-f0-9]+: 62 f9 7c 08 11 0a vmovups %xmm1,\(%r18\)
+ +[a-f0-9]+: c5 f9 29 ca vmovapd %xmm1,%xmm2
+ +[a-f0-9]+: c5 f9 11 0a vmovupd %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 fd 08 29 ca vmovapd %xmm17,%xmm2
+ +[a-f0-9]+: 62 e1 fd 08 11 0a vmovupd %xmm17,\(%rdx\)
+ +[a-f0-9]+: 62 f9 fd 08 11 0a vmovupd %xmm1,\(%r18\)
+ +[a-f0-9]+: c5 f9 7f ca vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 fa 7f 0a vmovdqu %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 7d 08 7f ca vmovdqa32 %xmm17,%xmm2
+ +[a-f0-9]+: 62 e1 7e 08 7f 0a vmovdqu32 %xmm17,\(%rdx\)
+ +[a-f0-9]+: 62 f9 7e 08 7f 0a vmovdqu32 %xmm1,\(%r18\)
+ +[a-f0-9]+: c5 f9 7f ca vmovdqa %xmm1,%xmm2
+ +[a-f0-9]+: c5 fa 7f 0a vmovdqu %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 fd 08 7f ca vmovdqa64 %xmm17,%xmm2
+ +[a-f0-9]+: 62 e1 fe 08 7f 0a vmovdqu64 %xmm17,\(%rdx\)
+ +[a-f0-9]+: 62 f9 fe 08 7f 0a vmovdqu64 %xmm1,\(%r18\)
+
+0+[a-f0-9]+ <vextract_NNxM_YMM>:
+ +[a-f0-9]+: c5 fc 29 ca vmovaps %ymm1,%ymm2
+ +[a-f0-9]+: c5 fc 11 0a vmovups %ymm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 7c 28 29 ca vmovaps %ymm17,%ymm2
+ +[a-f0-9]+: 62 e1 7c 28 11 0a vmovups %ymm17,\(%rdx\)
+ +[a-f0-9]+: 62 f9 7c 28 11 0a vmovups %ymm1,\(%r18\)
+ +[a-f0-9]+: c5 fd 29 ca vmovapd %ymm1,%ymm2
+ +[a-f0-9]+: c5 fd 11 0a vmovupd %ymm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 fd 28 29 ca vmovapd %ymm17,%ymm2
+ +[a-f0-9]+: 62 e1 fd 28 11 0a vmovupd %ymm17,\(%rdx\)
+ +[a-f0-9]+: 62 f9 fd 28 11 0a vmovupd %ymm1,\(%r18\)
+ +[a-f0-9]+: c5 fd 7f ca vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 fe 7f 0a vmovdqu %ymm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 7d 28 7f ca vmovdqa32 %ymm17,%ymm2
+ +[a-f0-9]+: 62 e1 7e 28 7f 0a vmovdqu32 %ymm17,\(%rdx\)
+ +[a-f0-9]+: 62 f9 7e 28 7f 0a vmovdqu32 %ymm1,\(%r18\)
+ +[a-f0-9]+: c5 fd 7f ca vmovdqa %ymm1,%ymm2
+ +[a-f0-9]+: c5 fe 7f 0a vmovdqu %ymm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 fd 28 7f ca vmovdqa64 %ymm17,%ymm2
+ +[a-f0-9]+: 62 e1 fe 28 7f 0a vmovdqu64 %ymm17,\(%rdx\)
+ +[a-f0-9]+: 62 f9 fe 28 7f 0a vmovdqu64 %ymm1,\(%r18\)
+#pass
@@ -0,0 +1,59 @@
+ .text
+vextract_128:
+ vextractf128 $0, %ymm1, %xmm2
+ vextractf128 $0, %ymm1, (%rdx)
+ vextractf128 $0, %ymm1, (%r18)
+
+ vextracti128 $0, %ymm1, %xmm2
+ vextracti128 $0, %ymm1, (%rdx)
+ vextracti128 $0, %ymm1, (%r18)
+
+vextract_NNxM_XMM:
+ vextractf32x4 $0, %ymm1, %xmm2
+ vextractf32x4 $0, %ymm1, (%rdx)
+ vextractf32x4 $0, %ymm17, %xmm2
+ vextractf32x4 $0, %ymm17, (%rdx)
+ vextractf32x4 $0, %ymm1, (%r18)
+
+ vextractf64x2 $0, %ymm1, %xmm2
+ vextractf64x2 $0, %ymm1, (%rdx)
+ vextractf64x2 $0, %ymm17, %xmm2
+ vextractf64x2 $0, %ymm17, (%rdx)
+ vextractf64x2 $0, %ymm1, (%r18)
+
+ vextracti32x4 $0, %ymm1, %xmm2
+ vextracti32x4 $0, %ymm1, (%rdx)
+ vextracti32x4 $0, %ymm17, %xmm2
+ vextracti32x4 $0, %ymm17, (%rdx)
+ vextracti32x4 $0, %ymm1, (%r18)
+
+ vextracti64x2 $0, %ymm1, %xmm2
+ vextracti64x2 $0, %ymm1, (%rdx)
+ vextracti64x2 $0, %ymm17, %xmm2
+ vextracti64x2 $0, %ymm17, (%rdx)
+ vextracti64x2 $0, %ymm1, (%r18)
+
+vextract_NNxM_YMM:
+ vextractf32x8 $0, %zmm1, %ymm2
+ vextractf32x8 $0, %zmm1, (%rdx)
+ vextractf32x8 $0, %zmm17, %ymm2
+ vextractf32x8 $0, %zmm17, (%rdx)
+ vextractf32x8 $0, %zmm1, (%r18)
+
+ vextractf64x4 $0, %zmm1, %ymm2
+ vextractf64x4 $0, %zmm1, (%rdx)
+ vextractf64x4 $0, %zmm17, %ymm2
+ vextractf64x4 $0, %zmm17, (%rdx)
+ vextractf64x4 $0, %zmm1, (%r18)
+
+ vextracti32x8 $0, %zmm1, %ymm2
+ vextracti32x8 $0, %zmm1, (%rdx)
+ vextracti32x8 $0, %zmm17, %ymm2
+ vextracti32x8 $0, %zmm17, (%rdx)
+ vextracti32x8 $0, %zmm1, (%r18)
+
+ vextracti64x4 $0, %zmm1, %ymm2
+ vextracti64x4 $0, %zmm1, (%rdx)
+ vextracti64x4 $0, %zmm17, %ymm2
+ vextracti64x4 $0, %zmm17, (%rdx)
+ vextracti64x4 $0, %zmm1, (%r18)
@@ -1666,9 +1666,9 @@ vcvttps2dq, 0xf35b, AVX, Modrm|Vex|Space
vcvtts<sd>2si, 0x<sd:spfx>2c, AVX, Modrm|VexLIG|Space0F|No_bSuf|No_wSuf|No_sSuf, { <sd:elem>|Unspecified|BaseIndex|RegXMM, Reg32|Reg64 }
vdppd, 0x6641, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM, RegXMM, RegXMM }
vdpps, 0x6640, AVX, Modrm|Vex|Space0F3A|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vextractf128, 0x6619, AVX, Modrm|Vex256|Space0F3A|VexW0|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
+vextractf128, 0x6619, AVX, Modrm|Vex256|Space0F3A|VexW0|NoSuf|Optimize, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
// vextractf32x4 in disguise (see vround{p,s}{s,d} comment)
-vextractf128, 0x6619, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexW0|Disp8MemShift=4|NoSuf, { Imm8, RegYMM, Xmmword|Unspecified|BaseIndex }
+vextractf128, 0x6619, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexW0|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM, Xmmword|Unspecified|BaseIndex }
vextractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexWIG|Disp8MemShift=2|NoSuf|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
vextractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexWIG|NoSuf|Optimize, { Imm8, RegXMM, Reg64 }
vhaddpd, 0x667c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
@@ -1879,9 +1879,9 @@ vpermd, 0x6636, AVX2|AVX512F, Modrm|Vex2
vpermpd, 0x6601, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
vpermps, 0x6616, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F38|Src1VVVV|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
vpermq, 0x6600, AVX2|AVX512F, Modrm|Vex256|EVexDYN|Masking|Space0F3A|VexW1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegYMM|RegZMM|Qword|Unspecified|BaseIndex, RegYMM|RegZMM }
-vextracti128, 0x6639, AVX2, Modrm|Vex256|Space0F3A|VexW0|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
+vextracti128, 0x6639, AVX2, Modrm|Vex256|Space0F3A|VexW0|NoSuf|Optimize, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
// vextracti32x4 in disguise (see vround{p,s}{s,d} comment)
-vextracti128, 0x6639, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexW0|Disp8MemShift=4|NoSuf, { Imm8, RegYMM, Xmmword|Unspecified|BaseIndex }
+vextracti128, 0x6639, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexW0|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM, Xmmword|Unspecified|BaseIndex }
vinserti128, 0x6638, AVX2, Modrm|Vex256|Space0F3A|Src1VVVV|VexW0|NoSuf, { Imm8, Unspecified|BaseIndex|RegXMM, RegYMM, RegYMM }
// vinserti32x4 in disguise (see vround{p,s}{s,d} comment)
vinserti128, 0x6638, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=4|NoSuf, { Imm8, Xmmword|Unspecified|BaseIndex, RegYMM, RegYMM }
@@ -2375,11 +2375,11 @@ vpexpandq, 0x6689, AVX512F, Modrm|Maskin
vexpandps, 0x6688, AVX512F, Modrm|Masking|Space0F38|VexW=1|Disp8MemShift=2|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vpexpandd, 0x6689, AVX512F, Modrm|Masking|Space0F38|VexW=1|Disp8MemShift=2|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
-vextractf32x4, 0x6619, AVX512F, Modrm|Masking|Space0F3A|VexW=1|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
-vextracti32x4, 0x6639, AVX512F, Modrm|Masking|Space0F3A|VexW=1|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
+vextractf32x4, 0x6619, AVX512F, Modrm|Masking|Space0F3A|VexW=1|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
+vextracti32x4, 0x6639, AVX512F, Modrm|Masking|Space0F3A|VexW=1|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
-vextractf64x4, 0x661B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
-vextracti64x4, 0x663B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
+vextractf64x4, 0x661B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf|Optimize, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
+vextracti64x4, 0x663B, AVX512F, Modrm|EVex=1|Masking|Space0F3A|VexW=2|Disp8MemShift=5|NoSuf|Optimize, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
vfixupimmp<sd>, 0x6654, AVX512F, Modrm|Masking|Space0F3A|Src1VVVV|<sd:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|SAE, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|<sd:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vfixupimms<sd>, 0x6655, AVX512F, Modrm|EVexLIG|Masking|Space0F3A|Src1VVVV|<sd:vexw>|Disp8MemShift|NoSuf|SAE, { Imm8|Imm8S, RegXMM|<sd:elem>|Unspecified|BaseIndex, RegXMM, RegXMM }
@@ -2833,16 +2833,16 @@ vcvttps2uqq, 0x6678, AVX512DQ&AVX512VL,
vcvtuqq2ps<Exy>, 0xf27a, AVX512DQ&<Exy:vl>, Modrm|<Exy:attr>|Masking|Space0F|VexW1|Broadcast|NoSuf|<Exy:sr>, { <Exy:src>|Qword, <Exy:dst> }
-vextractf32x8, 0x661B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
-vextracti32x8, 0x663B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
+vextractf32x8, 0x661B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf|Optimize, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
+vextracti32x8, 0x663B, AVX512DQ, Modrm|EVex=1|Masking|Space0F3A|VexW=1|Disp8MemShift=5|NoSuf|Optimize, { Imm8, RegZMM, RegYMM|Unspecified|BaseIndex }
vinsertf32x8, 0x661A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
vinserti32x8, 0x663A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
vpextr<dq>, 0x6616, AVX512DQ&<dq:cpu64>, Modrm|EVex128|Space0F3A|<dq:vexw64>|Disp8MemShift|NoSuf|Optimize, { Imm8, RegXMM, <dq:gpr>|Unspecified|BaseIndex }
vpinsr<dq>, 0x6622, AVX512DQ&<dq:cpu64>, Modrm|EVex128|Space0F3A|Src1VVVV|<dq:vexw64>|Disp8MemShift|NoSuf, { Imm8, <dq:gpr>|Unspecified|BaseIndex, RegXMM, RegXMM }
-vextractf64x2, 0x6619, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
-vextracti64x2, 0x6639, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
+vextractf64x2, 0x6619, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
+vextracti64x2, 0x6639, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf|Optimize, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }
vinsertf64x2, 0x6618, AVX512DQ, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }
vinserti64x2, 0x6638, AVX512DQ, Modrm|Masking|Space0F3A|Src1VVVV|VexW1|Disp8MemShift=4|CheckOperandSize|NoSuf, { Imm8, RegXMM|Unspecified|BaseIndex, RegYMM|RegZMM, RegYMM|RegZMM }