@@ -5509,6 +5509,42 @@ optimize_encoding (void)
i.operands = 2;
i.imm_operands = 0;
}
+ else if (i.tm.base_opcode == 0x17
+ && i.tm.opcode_space == SPACE_0F3A
+ && i.op[0].imms->X_op == O_constant
+ && i.op[0].imms->X_add_number == 0)
+ {
+ /* Optimize: -O:
+ extractps $0, %xmmN, %rM -> movd %xmmN, %rM
+ extractps $0, %xmmN, mem -> movss %xmmN, mem
+ vextractps $0, %xmmN, %rM -> vmovd %xmmN, %rM
+ vextractps $0, %xmmN, mem -> vmovss %xmmN, mem
+ */
+ i.tm.opcode_space = SPACE_0F;
+ i.tm.opcode_modifier.vexw = VEXW0;
+
+ if (!i.mem_operands)
+ i.tm.base_opcode = 0x7e;
+ else
+ {
+ i.tm.base_opcode = 0x11;
+ i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
+ }
+
+ i.op[0].regs = i.op[1].regs;
+ i.types[0] = i.types[1];
+ i.flags[0] = i.flags[1];
+ i.tm.operand_types[0] = i.tm.operand_types[1];
+
+ i.op[1].regs = i.op[2].regs;
+ i.types[1] = i.types[2];
+ i.flags[1] = i.flags[2];
+ i.reloc[1] = i.reloc[2];
+ i.tm.operand_types[1] = i.tm.operand_types[2];
+
+ i.operands = 2;
+ i.imm_operands = 0;
+ }
}
/* Check whether the promoted (to address size) register is usable as index
@@ -166,6 +166,10 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* movd %xmm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\)
+ +[a-f0-9]+: 66 .* movd %xmm1,%edx
+ +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -194,6 +194,11 @@ _start:
vpextrd $0, %xmm1, %edx
vpextrd $0, %xmm1, (%edx)
+ extractps $0, %xmm1, %edx
+ extractps $0, %xmm1, (%edx)
+ vextractps $0, %xmm1, %edx
+ vextractps $0, %xmm1, (%edx)
+
bt $15, %ax
bt $16, %ax
btc $15, %ax
@@ -167,6 +167,10 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* movd %xmm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\)
+ +[a-f0-9]+: 66 .* movd %xmm1,%edx
+ +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -166,6 +166,10 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* movd %xmm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\)
+ +[a-f0-9]+: 66 .* movd %xmm1,%edx
+ +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -166,6 +166,10 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* movd %xmm1,\(%edx\)
+[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+[a-f0-9]+: c5 .* vmovd %xmm1,\(%edx\)
+ +[a-f0-9]+: 66 .* movd %xmm1,%edx
+ +[a-f0-9]+: f3 .* movss %xmm1,\(%edx\)
+ +[a-f0-9]+: c5 .* vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 .* vmovss %xmm1,\(%edx\)
+[a-f0-9]+: 0f ba e0 0f bt \$0xf,%eax
+[a-f0-9]+: 66 0f ba e0 10 bt \$0x10,%ax
+[a-f0-9]+: 0f ba f8 0f btc \$0xf,%eax
@@ -594,6 +594,8 @@ run_dump_test "x86-64-optimize-7b"
run_list_test "x86-64-optimize-8" "-I${srcdir}/$subdir -march=+noavx2 -al"
run_list_test "x86-64-optimize-pextr" "-O -aln"
run_dump_test "x86-64-optimize-pextr"
+run_list_test "x86-64-optimize-extractps" "-O -aln"
+run_dump_test "x86-64-optimize-extractps"
run_dump_test "x86-64-apx-ndd-optimize"
run_dump_test "x86-64-align-branch-1a"
run_dump_test "x86-64-align-branch-1b"
@@ -0,0 +1,20 @@
+#as: -O -msse2avx
+#objdump: -drw
+#name: x86-64 EXTRACTPS optimized encoding with -msse2avx
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <extractps>:
+ +[a-f0-9]+: c5 f9 7e ca vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 fa 11 0a vmovss %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 f9 7d 08 7e ca vmovd %xmm1,%r18d
+ +[a-f0-9]+: 62 f9 7e 08 11 0a vmovss %xmm1,\(%r18\)
+ +[a-f0-9]+: c5 f9 7e ca vmovd %xmm1,%edx
+ +[a-f0-9]+: c5 fa 11 0a vmovss %xmm1,\(%rdx\)
+ +[a-f0-9]+: 62 e1 7d 08 7e ca vmovd %xmm17,%edx
+ +[a-f0-9]+: 62 f9 7d 08 7e ca vmovd %xmm1,%r18d
+ +[a-f0-9]+: 62 f9 7e 08 11 0a vmovss %xmm1,\(%r18\)
+#pass
@@ -0,0 +1,21 @@
+.*: Assembler messages:
+.*:6: Error: .*
+.*:7: Error: .*
+[ ]*[0-9a-f]+[ ]+\.text
+[ ]*[0-9a-f]+[ ]+extractps:
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 660F7ECA[ ]+extractps \$0, %xmm1, %edx
+[ ]*[0-9a-f]+[ ]+\?\?\?\? F30F110A[ ]+extractps \$0, %xmm1, \(%rdx\)
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+extractps \$0, %xmm1, %r18d
+[ ]*[0-9a-f]+[ ]+extractps \$0, %xmm1, \(%r18\)
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5F97ECA[ ]+vextractps \$0, %xmm1, %edx
+[ ]*[0-9a-f]+[ ]+\?\?\?\? C5FA110A[ ]+vextractps \$0, %xmm1, \(%rdx\)
+[ ]*[0-9a-f]+[ ]+
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62E17D08[ ]+vextractps \$0, %xmm17, %edx
+[ ]*[0-9a-f]+[ ]+7ECA
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62F97D08[ ]+vextractps \$0, %xmm1, %r18d
+[ ]*[0-9a-f]+[ ]+7ECA
+[ ]*[0-9a-f]+[ ]+\?\?\?\? 62F97E08[ ]+vextractps \$0, %xmm1, \(%r18\)
+[ ]*[0-9a-f]+[ ]+110A
+#pass
@@ -0,0 +1,14 @@
+ .text
+extractps:
+ extractps $0, %xmm1, %edx
+ extractps $0, %xmm1, (%rdx)
+
+ extractps $0, %xmm1, %r18d
+ extractps $0, %xmm1, (%r18)
+
+ vextractps $0, %xmm1, %edx
+ vextractps $0, %xmm1, (%rdx)
+
+ vextractps $0, %xmm17, %edx
+ vextractps $0, %xmm1, %r18d
+ vextractps $0, %xmm1, (%r18)
@@ -1491,10 +1491,10 @@ blendvp<sd>, 0x664a | <sd:opc>, AVX, Mod
blendvp<sd>, 0x660f3814 | <sd:opc>, SSE4_1, Modrm|NoSuf, { Acc|Xmmword, RegXMM|Unspecified|BaseIndex, RegXMM }
blendvp<sd>, 0x660f3814 | <sd:opc>, SSE4_1, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
dpp<sd><sse41>, 0x660f3a40 | <sd:opc>, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
-extractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
-extractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexW1|NoSuf|SSE2AVX, { Imm8, RegXMM, Reg64 }
-extractps, 0x660f3a17, SSE4_1, Modrm|IgnoreSize|NoSuf, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
-extractps, 0x660f3a17, SSE4_1&x64, RegMem|NoSuf|NoRex64, { Imm8, RegXMM, Reg64 }
+extractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexW0|Disp8MemShift=2|NoSuf|SSE2AVX|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
+extractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexW1|NoSuf|SSE2AVX|Optimize, { Imm8, RegXMM, Reg64 }
+extractps, 0x660f3a17, SSE4_1, Modrm|IgnoreSize|NoSuf|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
+extractps, 0x660f3a17, SSE4_1&x64, RegMem|NoSuf|Optimize|NoRex64, { Imm8, RegXMM, Reg64 }
insertps<SSE41D>, 0x660f3a21, <SSE41D:cpu>, Modrm|<SSE41D:attr>|<SSE41D:vvvv>|Disp8MemShift|NoSuf, { Imm8, Dword|Unspecified|BaseIndex|RegXMM, RegXMM }
movntdqa<SSE41D>, 0x660f382a, <SSE41D:cpu>, Modrm|<SSE41D:attr>|NoSuf, { Xmmword|Unspecified|BaseIndex, RegXMM }
mpsadbw<sse41>, 0x660f3a42, <sse41:cpu>, Modrm|<sse41:attr>|<sse41:vvvv>|NoSuf, { Imm8|Imm8S, RegXMM|Unspecified|BaseIndex, RegXMM }
@@ -1669,8 +1669,8 @@ vdpps, 0x6640, AVX, Modrm|Vex|Space0F3A|
vextractf128, 0x6619, AVX, Modrm|Vex256|Space0F3A|VexW0|NoSuf, { Imm8, RegYMM, Unspecified|BaseIndex|RegXMM }
// vextractf32x4 in disguise (see vround{p,s}{s,d} comment)
vextractf128, 0x6619, APX_F&AVX512VL, Modrm|EVex256|Space0F3A|VexW0|Disp8MemShift=4|NoSuf, { Imm8, RegYMM, Xmmword|Unspecified|BaseIndex }
-vextractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexWIG|Disp8MemShift=2|NoSuf, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
-vextractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexWIG|NoSuf, { Imm8, RegXMM, Reg64 }
+vextractps, 0x6617, AVX|AVX512F, Modrm|Vex128|EVex128|Space0F3A|VexWIG|Disp8MemShift=2|NoSuf|Optimize, { Imm8, RegXMM, Reg32|Unspecified|BaseIndex }
+vextractps, 0x6617, x64&(AVX|AVX512F), RegMem|Vex128|EVex128|Space0F3A|VexWIG|NoSuf|Optimize, { Imm8, RegXMM, Reg64 }
vhaddpd, 0x667c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vhaddps, 0xf27c, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vhsubpd, 0x667d, AVX, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }