@@ -5533,6 +5533,44 @@ optimize_encoding (void)
swap_2_operands (1, 2);
}
+ else if (i.tm.base_opcode == 0x16
+ && i.tm.opcode_space == SPACE_0F3A
+ && i.op[0].imms->X_op == O_constant
+ && i.op[0].imms->X_add_number == 0)
+ {
+ /* Optimize: -O:
+ pextrd $0, %xmmN, ... -> movd %xmmN, ...
+ pextrq $0, %xmmN, ... -> movq %xmmN, ...
+ vpextrd $0, %xmmN, ... -> vmovd %xmmN, ...
+ vpextrq $0, %xmmN, ... -> vmovq %xmmN, ...
+ */
+ i.tm.opcode_space = SPACE_0F;
+ if (!i.mem_operands
+ || i.tm.opcode_modifier.evex
+ || (i.tm.opcode_modifier.vexw != VEXW1
+ && i.tm.opcode_modifier.size != SIZE64))
+ i.tm.base_opcode = 0x7e;
+ else
+ {
+ i.tm.base_opcode = 0xd6;
+ i.tm.opcode_modifier.size = 0;
+ i.tm.opcode_modifier.vexw
+ = i.tm.opcode_modifier.sse2avx ? VEXW0 : VEXWIG;
+ }
+
+ i.op[0].regs = i.op[1].regs;
+ i.types[0] = i.types[1];
+ i.flags[0] = i.flags[1];
+ i.tm.operand_types[0] = i.tm.operand_types[1];
+
+ i.op[1].regs = i.op[2].regs;
+ i.types[1] = i.types[2];
+ i.flags[1] = i.flags[2];
+ i.tm.operand_types[1] = i.tm.operand_types[2];
+
+ i.operands = 2;
+ i.imm_operands = 0;
+ }
}
static void
@@ -162,6 +162,10 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pxor %xmm2,%xmm2
+[a-f0-9]+: c5 .* vpxor %xmm2,%xmm2,%xmm0
+[a-f0-9]+: c5 .* vpxor %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: 66 .* movd %xmm1,%edx
+ +[a-f0-9]+: 66 .* movd %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -189,6 +189,11 @@ _start:
vpcmpgtq %xmm2, %xmm2, %xmm0
vpcmpgtq %ymm2, %ymm2, %ymm0
+ pextrd $0, %xmm1, %edx
+ pextrd $0, %xmm1, (%edx)
+ vpextrd $0, %xmm1, %edx
+ vpextrd $0, %xmm1, (%edx)
+
bt $15, %ax
bt $16, %ax
btc $15, %ax
@@ -163,6 +163,10 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pxor %xmm2,%xmm2
+[a-f0-9]+: c5 .* vpxor %xmm2,%xmm2,%xmm0
+[a-f0-9]+: c5 .* vpxor %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: 66 .* movd %xmm1,%edx
+ +[a-f0-9]+: 66 .* movd %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -162,6 +162,10 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pxor %xmm2,%xmm2
+[a-f0-9]+: c5 .* vpxor %xmm2,%xmm2,%xmm0
+[a-f0-9]+: c5 .* vpxor %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: 66 .* movd %xmm1,%edx
+ +[a-f0-9]+: 66 .* movd %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -162,6 +162,10 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pxor %xmm2,%xmm2
+[a-f0-9]+: c5 .* vpxor %xmm2,%xmm2,%xmm0
+[a-f0-9]+: c5 .* vpxor %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: 66 .* movd %xmm1,%edx
+ +[a-f0-9]+: 66 .* movd %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -0,0 +1,29 @@
+#as: -O -msse2avx
+#objdump: -drw
+#name: x86-64 PEXTR optimized encoding with -msse2avx
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <pextr>:
+ +[a-f0-9]+: c5 f9 7e ca vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 f9 7e 0a vmovd %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 f9 7d 08 7e ca vmovd %xmm1,%r18d
+ +[a-f0-9]+: 62 f9 7d 08 7e 0a vmovd %xmm1,\(%r18\)
+ +[a-f0-9]+: c5 f9 7e ca vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 f9 7e 0a vmovd %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 7d 08 7e ca vmovd %xmm17,%edx
+ +[a-f0-9]+: 62 f9 7d 08 7e ca vmovd %xmm1,%r18d
+ +[a-f0-9]+: 62 f9 7d 08 7e 0a vmovd %xmm1,\(%r18\)
+ +[a-f0-9]+: c4 e1 f9 7e ca vmovq %xmm1,%rdx
+ +[a-f0-9]+: c5 f9 d6 0a vmovq %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 f9 fd 08 7e ca vmovq %xmm1,%r18
+ +[a-f0-9]+: 62 f9 fd 08 7e 0a vmovq %xmm1,\(%r18\)
+ +[a-f0-9]+: c4 e1 f9 7e ca vmovq %xmm1,%rdx
+ +[a-f0-9]+: c5 f9 d6 0a vmovq %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 fd 08 7e ca vmovq %xmm17,%rdx
+ +[a-f0-9]+: 62 f9 fd 08 7e ca vmovq %xmm1,%r18
+ +[a-f0-9]+: 62 f9 fd 08 7e 0a vmovq %xmm1,\(%r18\)
+#pass
@@ -0,0 +1,41 @@
+.*: Assembler messages:
+.*:6: Error: .*
+.*:7: Error: .*
+.*:19: Error: .*
+.*:20: Error: .*
+[ ]*[0-9a-f]+[ ]+\.text
+[ ]*[0-9a-f]+[ ]+pextr:
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 660F7ECA[ ]+pextrd \$0, %xmm1, %edx
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 660F7E0A[ ]+pextrd \$0, %xmm1, \(%rdx\)
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+pextrd \$0, %xmm1, %r18d
+[ ]*[0-9a-f]+[ ]+pextrd \$0, %xmm1, \(%r18\)
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5F97ECA[ ]+vpextrd \$0, %xmm1, %edx
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5F97E0A[ ]+vpextrd \$0, %xmm1, \(%rdx\)
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62E17D08[ ]+vpextrd \$0, %xmm17, %edx
+[ ]*[0-9a-f]+[ ]+7ECA
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62F97D08[ ]+vpextrd \$0, %xmm1, %r18d
+[ ]*[0-9a-f]+[ ]+7ECA
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62F97D08[ ]+vpextrd \$0, %xmm1, \(%r18\)
+[ ]*[0-9a-f]+[ ]+7E0A
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 66480F7E[ ]+pextrq \$0, %xmm1, %rdx
+[ ]*[0-9a-f]+[ ]+CA
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 660FD60A[ ]+pextrq \$0, %xmm1, \(%rdx\)
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+pextrq \$0, %xmm1, %r18
+[ ]*[0-9a-f]+[ ]+pextrq \$0, %xmm1, \(%r18\)
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C4E1F97E[ ]+vpextrq \$0, %xmm1, %rdx
+[ ]*[0-9a-f]+[ ]+CA
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5F9D60A[ ]+vpextrq \$0, %xmm1, \(%rdx\)
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62E1FD08[ ]+vpextrq \$0, %xmm17, %rdx
+[ ]*[0-9a-f]+[ ]+7ECA
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62F9FD08[ ]+vpextrq \$0, %xmm1, %r18
+[ ]*[0-9a-f]+[ ]+7ECA
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62F9FD08[ ]+vpextrq \$0, %xmm1, \(%r18\)
+[ ]*[0-9a-f]+[ ]+7E0A
+#pass
@@ -0,0 +1,27 @@
+ .text
+pextr:
+ pextrd $0, %xmm1, %edx
+ pextrd $0, %xmm1, (%rdx)
+
+ pextrd $0, %xmm1, %r18d
+ pextrd $0, %xmm1, (%r18)
+
+ vpextrd $0, %xmm1, %edx
+ vpextrd $0, %xmm1, (%rdx)
+
+ vpextrd $0, %xmm17, %edx
+ vpextrd $0, %xmm1, %r18d
+ vpextrd $0, %xmm1, (%r18)
+
+ pextrq $0, %xmm1, %rdx
+ pextrq $0, %xmm1, (%rdx)
+
+ pextrq $0, %xmm1, %r18
+ pextrq $0, %xmm1, (%r18)
+
+ vpextrq $0, %xmm1, %rdx
+ vpextrq $0, %xmm1, (%rdx)
+
+ vpextrq $0, %xmm17, %rdx
+ vpextrq $0, %xmm1, %r18
+ vpextrq $0, %xmm1, (%r18)
@@ -583,6 +583,8 @@ run_dump_test "x86-64-optimize-6"
run_list_test "x86-64-optimize-7a" "-I${srcdir}/$subdir -march=+noavx -al"
run_dump_test "x86-64-optimize-7b"
run_list_test "x86-64-optimize-8" "-I${srcdir}/$subdir -march=+noavx2 -al"
+run_list_test "x86-64-optimize-pextr" "-O -aln"
+run_dump_test "x86-64-optimize-pextr"
run_dump_test "x86-64-apx-ndd-optimize"
run_dump_test "x86-64-align-branch-1a"
run_dump_test "x86-64-align-branch-1b"
@@ -1500,10 +1500,10 @@ pblendw<sse41>, 0x660f3a0e, <sse41:cpu>,
pcmpeqq<sse41>, 0x660f3829, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf|Optimize, { RegXMM|Unspecified|BaseIndex, RegXMM }
pextr<bw><SSE41BW>, 0x660f3a14 | <bw:opc>, <SSE41BW:cpu>, RegMem|<SSE41BW:attr>|NoSuf|IgnoreSize|NoRex64, { Imm8, RegXMM, Reg32|Reg64 }
pextr<bw><SSE41BW>, 0x660f3a14 | <bw:opc>, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|Disp8MemShift|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex }
-pextrd<SSE41DQ>, 0x660f3a16, <SSE41DQ:cpu>, Modrm|<SSE41DQ:attr>|Disp8MemShift|NoSuf|IgnoreSize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
-pextrq, 0x6616, AVX&x64, Modrm|Vex|Space0F3A|VexW1|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
-pextrq, 0x6616, AVX512DQ&AVX512VL&x64, Modrm|EVex128|Space0F3A|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
-pextrq, 0x660f3a16, SSE4_1&x64, Modrm|Size64|NoSuf, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
+pextrd<SSE41DQ>, 0x660f3a16, <SSE41DQ:cpu>, Modrm|<SSE41DQ:attr>|Disp8MemShift|NoSuf|IgnoreSize|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
+pextrq, 0x6616, AVX&x64, Modrm|Vex|Space0F3A|VexW1|NoSuf|SSE2AVX|Optimize, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
+pextrq, 0x6616, AVX512DQ&AVX512VL&x64, Modrm|EVex128|Space0F3A|VexW1|Disp8MemShift=3|NoSuf|SSE2AVX|Optimize, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
+pextrq, 0x660f3a16, SSE4_1&x64, Modrm|Size64|NoSuf|Optimize, { Imm8, RegXMM, Reg64|Unspecified|BaseIndex }
phminposuw<sse41>, 0x660f3841, <sse41:cpu>, Modrm|<sse41:attr>|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
pinsrb<SSE41BW>, 0x660f3a20, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|NoSuf|IgnoreSize|NoRex64, { Imm8, Reg32|Reg64, RegXMM }
pinsrb<SSE41BW>, 0x660f3a20, <SSE41BW:cpu>, Modrm|<SSE41BW:attr>|<SSE41BW:vvvv>|Disp8MemShift|NoSuf, { Imm8, Byte|Unspecified|BaseIndex, RegXMM }
@@ -1757,7 +1757,7 @@ vpermilps, 0x660c, AVX|AVX512F, Modrm|Ve
vpermilps, 0x6604, AVX|AVX512F, Modrm|Vex|EVexDYN|Masking|Space0F3A|VexW0|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vpermilpd, 0x660d, AVX, Modrm|Vex|Space0F38|Src1VVVV|VexW0|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpermilpd, 0x6605, AVX, Modrm|Vex|Space0F3A|VexW0|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
-vpextr<dq>, 0x6616, AVX&<dq:cpu64>, Modrm|Vex|Space0F3A|<dq:vexw64>|NoSuf, { Imm8, RegXMM, <dq:gpr>|Unspecified|BaseIndex }
+vpextr<dq>, 0x6616, AVX&<dq:cpu64>, Modrm|Vex|Space0F3A|<dq:vexw64>|NoSuf|Optimize, { Imm8, RegXMM, <dq:gpr>|Unspecified|BaseIndex }
vpextrw, 0x66c5, AVX, Load|Modrm|Vex|Space0F|VexWIG|No_bSuf|No_wSuf|No_sSuf, { Imm8, RegXMM, Reg32|Reg64 }
vpextr<bw>, 0x6614 | <bw:opc>, AVX, RegMem|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg32|Reg64 }
vpextr<bw>, 0x6614 | <bw:opc>, AVX, Modrm|Vex|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, <bw:elem>|Unspecified|BaseIndex }
@@ -2937,7 +2937,7 @@ vextracti32x8, 0x663B, AVX512DQ, Modrm|E
vinsertf32x8, 0x661A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
vinserti32x8, 0x663A, AVX512DQ, Modrm|EVex512|Masking|Space0F3A|Src1VVVV|VexW0|Disp8MemShift=5|NoSuf, { Imm8, RegYMM|Unspecified|BaseIndex, RegZMM, RegZMM }
-vpextr<dq>, 0x6616, AVX512DQ&<dq:cpu64>, Modrm|EVex128|Space0F3A|<dq:vexw64>|Disp8MemShift|NoSuf, { Imm8, RegXMM, <dq:gpr>|Unspecified|BaseIndex }
+vpextr<dq>, 0x6616, AVX512DQ&<dq:cpu64>, Modrm|EVex128|Space0F3A|<dq:vexw64>|Disp8MemShift|NoSuf|Optimize, { Imm8, RegXMM, <dq:gpr>|Unspecified|BaseIndex }
vpinsr<dq>, 0x6622, AVX512DQ&<dq:cpu64>, Modrm|EVex128|Space0F3A|Src1VVVV|<dq:vexw64>|Disp8MemShift|NoSuf, { Imm8, <dq:gpr>|Unspecified|BaseIndex, RegXMM, RegXMM }
vextractf64x2, 0x6619, AVX512DQ, Modrm|Masking|Space0F3A|VexW=2|Disp8MemShift=4|NoSuf, { Imm8, RegYMM|RegZMM, RegXMM|Unspecified|BaseIndex }