@@ -4775,6 +4775,43 @@ optimize_encoding (void)
*/
i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
}
+ else if (!optimize_for_space
+ && i.tm.base_opcode == 0xd0
+ && (i.tm.opcode_space == SPACE_BASE
+ || i.tm.opcode_space == SPACE_EVEXMAP4)
+ && !i.mem_operands)
+ {
+ /* Optimize: -O:
+ shlb $1, %rN -> addb %rN, %rN
+ shlw $1, %rN -> addw %rN, %rN
+ shll $1, %rN -> addl %rN, %rN
+ shlq $1, %rN -> addq %rN, %rN
+
+ shlb $1, %rN, %rM -> addb %rN, %rN, %rM
+ shlw $1, %rN, %rM -> addw %rN, %rN, %rM
+ shll $1, %rN, %rM -> addl %rN, %rN, %rM
+ shlq $1, %rN, %rM -> addq %rN, %rN, %rM
+ */
+ gas_assert (i.tm.extension_opcode == 4);
+ i.tm.base_opcode = 0x00;
+ i.tm.extension_opcode = None;
+ if (i.operands >= 2)
+ {
+ i.tm.operand_types[0] = i.tm.operand_types[1];
+ i.op[0].regs = i.op[1].regs;
+ i.types[0] = i.types[1];
+ }
+ else
+ {
+ /* Legacy form with omitted shift count operand. */
+ i.tm.operand_types[1] = i.tm.operand_types[0];
+ i.op[1].regs = i.op[0].regs;
+ i.types[1] = i.types[0];
+ i.operands = 2;
+ }
+ i.reg_operands++;
+ i.imm_operands = 0;
+ }
else if (i.tm.base_opcode == 0xba
&& i.tm.opcode_space == SPACE_0F
&& i.reg_operands == 1
@@ -5031,6 +5068,48 @@ optimize_encoding (void)
i.op[1].regs = i.op[0].regs;
}
}
+ else if (i.tm.extension_opcode == 6
+ && i.tm.base_opcode >= 0x71
+ && i.tm.base_opcode <= 0x73
+ && i.tm.opcode_space == SPACE_0F
+ && i.op[0].imms->X_op == O_constant
+ && i.op[0].imms->X_add_number == 1
+ && !i.mem_operands)
+ {
+ /* Optimize: -O:
+ psllw $1, %mmxN -> paddw %mmxN, %mmxN
+ psllw $1, %xmmN -> paddw %xmmN, %xmmN
+ vpsllw $1, %xmmN, %xmmM -> vpaddw %xmmN, %xmmN, %xmmM
+ vpsllw $1, %ymmN, %ymmM -> vpaddw %ymmN, %ymmN, %ymmM
+ vpsllw $1, %zmmN, %zmmM -> vpaddw %zmmN, %zmmN, %zmmM
+
+ pslld $1, %mmxN -> paddd %mmxN, %mmxN
+ pslld $1, %xmmN -> paddd %xmmN, %xmmN
+ vpslld $1, %xmmN, %xmmM -> vpaddd %xmmN, %xmmN, %xmmM
+ vpslld $1, %ymmN, %ymmM -> vpaddd %ymmN, %ymmN, %ymmM
+ vpslld $1, %zmmN, %zmmM -> vpaddd %zmmN, %zmmN, %zmmM
+
+ psllq $1, %xmmN -> paddq %xmmN, %xmmN
+ vpsllq $1, %xmmN, %xmmM -> vpaddq %xmmN, %xmmN, %xmmM
+ vpsllq $1, %ymmN, %ymmM -> vpaddq %ymmN, %ymmN, %ymmM
+ vpsllq $1, %zmmN, %zmmM -> vpaddq %zmmN, %zmmN, %zmmM
+ */
+ if (i.tm.base_opcode != 0x73)
+ i.tm.base_opcode |= 0xfc; /* {,v}padd{w,d} */
+ else
+ {
+ gas_assert (i.tm.operand_types[1].bitfield.class != RegMMX);
+ i.tm.base_opcode = 0xd4; /* {,v}paddq */
+ }
+ i.tm.extension_opcode = None;
+ if (i.tm.opcode_modifier.vexvvvv)
+ i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
+ i.tm.operand_types[0] = i.tm.operand_types[1];
+ i.op[0].regs = i.op[1].regs;
+ i.types[0] = i.types[1];
+ i.reg_operands++;
+ i.imm_operands = 0;
+ }
else if (optimize_for_space
&& i.tm.base_opcode == 0x59
&& i.tm.opcode_space == SPACE_0F38
@@ -24,6 +24,18 @@ Disassembly of section .text:
+[a-f0-9]+: 09 f6 or %esi,%esi
+[a-f0-9]+: 87 0a xchg %ecx,\(%edx\)
+[a-f0-9]+: 87 11 xchg %edx,\(%ecx\)
+ +[a-f0-9]+: d0 e2 shl \$1,%dl
+ +[a-f0-9]+: d0 e2 shl \$1,%dl
+ +[a-f0-9]+: 66 d1 e2 shl \$1,%dx
+ +[a-f0-9]+: 66 d1 e2 shl \$1,%dx
+ +[a-f0-9]+: d1 e2 shl \$1,%edx
+ +[a-f0-9]+: d1 e2 shl \$1,%edx
+ +[a-f0-9]+: d0 e2 shl \$1,%dl
+ +[a-f0-9]+: d0 e2 shl \$1,%dl
+ +[a-f0-9]+: 66 d1 e2 shl \$1,%dx
+ +[a-f0-9]+: 66 d1 e2 shl \$1,%dx
+ +[a-f0-9]+: d1 e2 shl \$1,%edx
+ +[a-f0-9]+: d1 e2 shl \$1,%edx
+[a-f0-9]+: c5 f1 55 e9 vandnpd %xmm1,%xmm1,%xmm5
+[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2
+[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2
@@ -164,5 +176,17 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pcmpeqd %xmm2,%xmm2
+[a-f0-9]+: c5 .* vpcmpeqd %xmm2,%xmm2,%xmm0
+[a-f0-9]+: c5 .* vpcmpeqd %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: 0f .* paddw %mm2,%mm2
+ +[a-f0-9]+: 66 .* paddw %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vpaddw %xmm2,%xmm2,%xmm3
+ +[a-f0-9]+: 62 .* vpaddw %xmm2,%xmm2,%xmm3\{%k4\}
+ +[a-f0-9]+: 0f .* paddd %mm2,%mm2
+ +[a-f0-9]+: 66 .* paddd %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vpaddd %ymm2,%ymm2,%ymm3
+ +[a-f0-9]+: 62 .* vpaddd %ymm2,%ymm2,%ymm3\{%k4\}
+ +[a-f0-9]+: 0f .* psllq \$(0x)?1,%mm2
+ +[a-f0-9]+: 66 .* paddq %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vpaddq %xmm2,%xmm2,%xmm3
+ +[a-f0-9]+: 62 .* vpaddq %zmm2,%zmm2,%zmm3
+[a-f0-9]+: c5 .* vpunpcklqdq %xmm2,%xmm2,%xmm0
#pass
@@ -22,6 +22,24 @@ _start:
lock xchg %ecx, (%edx)
lock xchg (%ecx), %edx
+ shl $1, %dl
+ shl %dl
+
+ shl $1, %dx
+ shl %dx
+
+ shl $1, %edx
+ shl %edx
+
+ sal $1, %dl
+ sal %dl
+
+ sal $1, %dx
+ sal %dx
+
+ sal $1, %edx
+ sal %edx
+
vandnpd %zmm1, %zmm1, %zmm5
vmovdqa32 %xmm1, %xmm2
@@ -184,4 +202,19 @@ _start:
vpcmpeqq %xmm2, %xmm2, %xmm0
vpcmpeqq %ymm2, %ymm2, %ymm0
+ psllw $1, %mm2
+ psllw $1, %xmm2
+ vpsllw $1, %xmm2, %xmm3
+ vpsllw $1, %xmm2, %xmm3{%k4}
+
+ pslld $1, %mm2
+ pslld $1, %xmm2
+ vpslld $1, %ymm2, %ymm3
+ vpslld $1, %ymm2, %ymm3{%k4}
+
+ psllq $1, %mm2 # This needs leaving alone.
+ psllq $1, %xmm2
+ vpsllq $1, %xmm2, %xmm3
+ vpsllq $1, %zmm2, %zmm3
+
vpbroadcastq %xmm2, %xmm0
@@ -25,6 +25,18 @@ Disassembly of section .text:
+[a-f0-9]+: 85 f6 test %esi,%esi
+[a-f0-9]+: 87 0a xchg %ecx,\(%edx\)
+[a-f0-9]+: 87 11 xchg %edx,\(%ecx\)
+ +[a-f0-9]+: 00 d2 add %dl,%dl
+ +[a-f0-9]+: 00 d2 add %dl,%dl
+ +[a-f0-9]+: 66 01 d2 add %dx,%dx
+ +[a-f0-9]+: 66 01 d2 add %dx,%dx
+ +[a-f0-9]+: 01 d2 add %edx,%edx
+ +[a-f0-9]+: 01 d2 add %edx,%edx
+ +[a-f0-9]+: 00 d2 add %dl,%dl
+ +[a-f0-9]+: 00 d2 add %dl,%dl
+ +[a-f0-9]+: 66 01 d2 add %dx,%dx
+ +[a-f0-9]+: 66 01 d2 add %dx,%dx
+ +[a-f0-9]+: 01 d2 add %edx,%edx
+ +[a-f0-9]+: 01 d2 add %edx,%edx
+[a-f0-9]+: c5 f1 55 e9 vandnpd %xmm1,%xmm1,%xmm5
+[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2
+[a-f0-9]+: c5 f9 6f d1 vmovdqa %xmm1,%xmm2
@@ -165,5 +177,17 @@ Disassembly of section .text:
+[a-f0-9]+: 66 .* pcmpeqq %xmm2,%xmm2
+[a-f0-9]+: c4 .* vpcmpeqq %xmm2,%xmm2,%xmm0
+[a-f0-9]+: c4 .* vpcmpeqq %ymm2,%ymm2,%ymm0
+ +[a-f0-9]+: 0f .* paddw %mm2,%mm2
+ +[a-f0-9]+: 66 .* paddw %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vpaddw %xmm2,%xmm2,%xmm3
+ +[a-f0-9]+: 62 .* vpaddw %xmm2,%xmm2,%xmm3\{%k4\}
+ +[a-f0-9]+: 0f .* paddd %mm2,%mm2
+ +[a-f0-9]+: 66 .* paddd %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vpaddd %ymm2,%ymm2,%ymm3
+ +[a-f0-9]+: 62 .* vpaddd %ymm2,%ymm2,%ymm3\{%k4\}
+ +[a-f0-9]+: 0f .* psllq \$(0x)?1,%mm2
+ +[a-f0-9]+: 66 .* paddq %xmm2,%xmm2
+ +[a-f0-9]+: c5 .* vpaddq %xmm2,%xmm2,%xmm3
+ +[a-f0-9]+: 62 .* vpaddq %zmm2,%zmm2,%zmm3
+[a-f0-9]+: c4 .* vpbroadcastq %xmm2,%xmm0
#pass
@@ -95,4 +95,28 @@ Disassembly of section .text:
+[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+[a-f0-9]+: 62 ec 74 10 28 d1 sub %r18b,%r17b,%r17b
+[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: 00 d2 add %dl,%dl
+ +[a-f0-9]+: 66 01 d2 add %dx,%dx
+ +[a-f0-9]+: 01 d2 add %edx,%edx
+ +[a-f0-9]+: 48 01 d2 add %rdx,%rdx
+ +[a-f0-9]+: 00 d2 add %dl,%dl
+ +[a-f0-9]+: 66 01 d2 add %dx,%dx
+ +[a-f0-9]+: 01 d2 add %edx,%edx
+ +[a-f0-9]+: 48 01 d2 add %rdx,%rdx
+ +[a-f0-9]+: 62 f4 7c 18 00 d2 add %dl,%dl,%al
+ +[a-f0-9]+: 62 f4 7d 18 01 d2 add %dx,%dx,%ax
+ +[a-f0-9]+: 62 f4 7c 18 01 d2 add %edx,%edx,%eax
+ +[a-f0-9]+: 62 f4 fc 18 01 d2 add %rdx,%rdx,%rax
+ +[a-f0-9]+: 00 d2 add %dl,%dl
+ +[a-f0-9]+: 66 01 d2 add %dx,%dx
+ +[a-f0-9]+: 01 d2 add %edx,%edx
+ +[a-f0-9]+: 48 01 d2 add %rdx,%rdx
+ +[a-f0-9]+: 00 d2 add %dl,%dl
+ +[a-f0-9]+: 66 01 d2 add %dx,%dx
+ +[a-f0-9]+: 01 d2 add %edx,%edx
+ +[a-f0-9]+: 48 01 d2 add %rdx,%rdx
+ +[a-f0-9]+: 62 f4 7c 18 00 d2 add %dl,%dl,%al
+ +[a-f0-9]+: 62 f4 7d 18 01 d2 add %dx,%dx,%ax
+ +[a-f0-9]+: 62 f4 7c 18 01 d2 add %edx,%edx,%eax
+ +[a-f0-9]+: 62 f4 fc 18 01 d2 add %rdx,%rdx,%rax
#pass
@@ -89,3 +89,33 @@ _start:
sub %r17b, %r17b, %r17b
sub %r18b, %r17b, %r17b
sub %r18b, %r18b, %r17b
+
+ shl $1, %dl
+ shl $1, %dx
+ shl $1, %edx
+ shl $1, %rdx
+
+ shl %dl
+ shl %dx
+ shl %edx
+ shl %rdx
+
+ shl $1, %dl, %al
+ shl $1, %dx, %ax
+ shl $1, %edx, %eax
+ shl $1, %rdx, %rax
+
+ sal $1, %dl
+ sal $1, %dx
+ sal $1, %edx
+ sal $1, %rdx
+
+ sal %dl
+ sal %dx
+ sal %edx
+ sal %rdx
+
+ sal $1, %dl, %al
+ sal $1, %dx, %ax
+ sal $1, %edx, %eax
+ sal $1, %rdx, %rax
@@ -428,26 +428,26 @@ imulzu, 0x69, APX_F, Modrm|No_bSuf|No_sS
<div>
-<sr:opc:imm8:nf, +
- rol:0:Imm8|Imm8S:NF, +
- ror:1:Imm8|Imm8S:NF, +
- rcl:2:Imm8:, +
- rcr:3:Imm8:, +
- sal:4:Imm8:NF, +
- shl:4:Imm8:NF, +
- shr:5:Imm8:NF, +
- sar:7:Imm8:NF>
-
-<sr>, 0xd0/<sr:opc>, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|<sr:nf>, { Imm1, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
-<sr>, 0xd0/<sr:opc>, 0, W|Modrm|No_sSuf, { Imm1, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-<sr>, 0xd0/<sr:opc>, APX_F, W|Modrm|No_sSuf|EVexMap4|<sr:nf>, { Imm1, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
+<sr:opc:imm8:opt1:nf, +
+ rol:0:Imm8|Imm8S::NF, +
+ ror:1:Imm8|Imm8S::NF, +
+ rcl:2:Imm8::, +
+ rcr:3:Imm8::, +
+ sal:4:Imm8:Optimize:NF, +
+ shl:4:Imm8:Optimize:NF, +
+ shr:5:Imm8::NF, +
+ sar:7:Imm8::NF>
+
+<sr>, 0xd0/<sr:opc>, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|<sr:opt1>|<sr:nf>, { Imm1, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+<sr>, 0xd0/<sr:opc>, 0, W|Modrm|No_sSuf|<sr:opt1>, { Imm1, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
+<sr>, 0xd0/<sr:opc>, APX_F, W|Modrm|No_sSuf|EVexMap4|<sr:opt1>|<sr:nf>, { Imm1, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
<sr>, 0xc0/<sr:opc>, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|<sr:nf>, { <sr:imm8>, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
<sr>, 0xc0/<sr:opc>, i186, W|Modrm|No_sSuf, { <sr:imm8>, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
<sr>, 0xc0/<sr:opc>, APX_F, W|Modrm|No_sSuf|EVexMap4|<sr:nf>, { <sr:imm8>, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
<sr>, 0xd2/<sr:opc>, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|<sr:nf>, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
<sr>, 0xd2/<sr:opc>, 0, W|Modrm|No_sSuf, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
<sr>, 0xd2/<sr:opc>, APX_F, W|Modrm|No_sSuf|EVexMap4|<sr:nf>, { ShiftCount, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-<sr>, 0xd0/<sr:opc>, 0, W|Modrm|No_sSuf, { Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
+<sr>, 0xd0/<sr:opc>, 0, W|Modrm|No_sSuf|<sr:opt1>, { Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
<sr>
@@ -1023,14 +1023,14 @@ pause, 0xf390, i186, NoSuf, {}
$avx:AVX|AVX512VL:66:Vex128|EVex128|Src1VVVV|VexW0|Disp8MemShift=4|SSE2AVX:Vex128|EVex128|DstVVVV|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
$sse:SSE2:66:::RegXMM:Xmmword, +
$mmx:MMX::::RegMMX:Qword>
-<MMXdq:opc:cpu:pfx:attr:vvvv:reg:mem, +
- d:0:AVX|AVX512VL:66:Vex128|EVex128|Src1VVVV|VexW0|Disp8MemShift=4|SSE2AVX:Vex128|EVex128|DstVVVV|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
- d:0:SSE2:66:::RegXMM:Xmmword, +
- d:0:MMX::::RegMMX:Qword, +
- q:1:AVX:66:Vex128|Src1VVVV|VexW0|SSE2AVX:Vex128|DstVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
- q:1:AVX512VL:66:EVex128|Src1VVVV|VexW1|Disp8MemShift=4|SSE2AVX:EVex128|DstVVVV|VexW1|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
- q:1:SSE2:66:::RegXMM:Xmmword, +
- q:1:MMX::::RegMMX:Qword>
+<MMXdq:opc:cpu:pfx:attr:vvvv:optim:reg:mem, +
+ d:0:AVX|AVX512VL:66:Vex128|EVex128|Src1VVVV|VexW0|Disp8MemShift=4|SSE2AVX:Vex128|EVex128|DstVVVV|VexW0|Disp8MemShift=4|SSE2AVX:Optimize:RegXMM:Xmmword, +
+ d:0:SSE2:66:::Optimize:RegXMM:Xmmword, +
+ d:0:MMX::::Optimize:RegMMX:Qword, +
+ q:1:AVX:66:Vex128|Src1VVVV|VexW0|SSE2AVX:Vex128|DstVVVV|VexW0|SSE2AVX:Optimize:RegXMM:Xmmword, +
+ q:1:AVX512VL:66:EVex128|Src1VVVV|VexW1|Disp8MemShift=4|SSE2AVX:EVex128|DstVVVV|VexW1|Disp8MemShift=4|SSE2AVX:Optimize:RegXMM:Xmmword, +
+ q:1:SSE2:66:::Optimize:RegXMM:Xmmword, +
+ q:1:MMX:::::RegMMX:Qword>
<MMXBW:cpu:pfx:attr:vvvv:reg:mem, +
$avx:AVX:66:Vex128|Src1VVVV|VexW0|SSE2AVX:Vex128|DstVVVV|VexW0|SSE2AVX:RegXMM:Xmmword, +
$apx:AVX512BW&AVX512VL:66:EVex128|Src1VVVV|VexW0|Disp8MemShift=4|SSE2AVX:EVex128|DstVVVV|VexW0|Disp8MemShift=4|SSE2AVX:RegXMM:Xmmword, +
@@ -1103,9 +1103,9 @@ pmulhw<MMXBW>, 0x<MMXBW:pfx>0fe5, <MMXBW
pmullw<MMXBW>, 0x<MMXBW:pfx>0fd5, <MMXBW:cpu>, Modrm|<MMXBW:attr>|C|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
por<MMX>, 0x<MMX:pfx>0feb, <MMX:cpu>, Modrm|<MMX:attr>|C|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
psllw<MMXBW>, 0x<MMXBW:pfx>0ff1, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
-psllw<MMXBW>, 0x<MMXBW:pfx>0f71/6, <MMXBW:cpu>, Modrm|<MMXBW:vvvv>|NoSuf, { Imm8, <MMXBW:reg> }
+psllw<MMXBW>, 0x<MMXBW:pfx>0f71/6, <MMXBW:cpu>, Modrm|<MMXBW:vvvv>|NoSuf|Optimize, { Imm8, <MMXBW:reg> }
psll<MMXdq>, 0x<MMXdq:pfx>0ff2 | <MMXdq:opc>, <MMXdq:cpu>, Modrm|<MMXdq:attr>|NoSuf, { <MMXdq:reg>|<MMXdq:mem>|Unspecified|BaseIndex, <MMXdq:reg> }
-psll<MMXdq>, 0x<MMXdq:pfx>0f72 | <MMXdq:opc>/6, <MMXdq:cpu>, Modrm|<MMXdq:vvvv>|NoSuf, { Imm8, <MMXdq:reg> }
+psll<MMXdq>, 0x<MMXdq:pfx>0f72 | <MMXdq:opc>/6, <MMXdq:cpu>, Modrm|<MMXdq:vvvv>|NoSuf|<MMXdq:optim>, { Imm8, <MMXdq:reg> }
psraw<MMXBW>, 0x<MMXBW:pfx>0fe1, <MMXBW:cpu>, Modrm|<MMXBW:attr>|NoSuf, { <MMXBW:reg>|<MMXBW:mem>|Unspecified|BaseIndex, <MMXBW:reg> }
psraw<MMXBW>, 0x<MMXBW:pfx>0f71/4, <MMXBW:cpu>, Modrm|<MMXBW:vvvv>|NoSuf, { Imm8, <MMXBW:reg> }
psrad<MMX>, 0x<MMX:pfx>0fe2, <MMX:cpu>, Modrm|<MMX:attr>|NoSuf, { <MMX:reg>|<MMX:mem>|Unspecified|BaseIndex, <MMX:reg> }
@@ -1815,10 +1815,10 @@ vpshufhw, 0xf370, AVX|AVX2, Modrm|Vex|Sp
vpshuflw, 0xf270, AVX|AVX2, Modrm|Vex|Space0F|VexWIG|CheckOperandSize|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM }
vpsign<bw>, 0x6608 | <bw:opc>, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpsignd, 0x660a, AVX|AVX2, Modrm|Vex|Space0F38|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM|RegYMM, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsll<dq>, 0x6672 | <dq:opc>/6, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsll<dq>, 0x6672 | <dq:opc>/6, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
vpsll<dq>, 0x66f2 | <dq:opc>, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpslldq, 0x6673/7, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
-vpsllw, 0x6671/6, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
+vpsllw, 0x6671/6, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf|Optimize, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
vpsllw, 0x66f1, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
vpsrad, 0x6672/4, AVX|AVX2, Modrm|Vex|Space0F|DstVVVV|VexWIG|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM, RegXMM|RegYMM }
vpsrad, 0x66e2, AVX|AVX2, Modrm|Vex|Space0F|Src1VVVV|VexWIG|CheckOperandSize|NoSuf, { Unspecified|BaseIndex|RegXMM, RegXMM|RegYMM, RegXMM|RegYMM }
@@ -2561,7 +2561,7 @@ vpror<dq>, 0x6672/0, AVX512F, Modrm|Mask
vpshufd, 0x6670, AVX512F, Modrm|Masking|Space0F|VexW=1|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8|Imm8S, RegXMM|RegYMM|RegZMM|Dword|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vpsll<dq>, 0x66f2 | <dq:opc>, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsll<dq>, 0x6672 | <dq:opc>/6, AVX512F, Modrm|Masking|Space0F|DstVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsll<dq>, 0x6672 | <dq:opc>/6, AVX512F, Modrm|Masking|Space0F|DstVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vpsra<dq>, 0x66e2, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vpsra<dq>, 0x6672/4, AVX512F, Modrm|Masking|Space0F|DstVVVV|<dq:vexw>|Broadcast|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|<dq:elem>|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vpsrl<dq>, 0x66d2 | <dq:opc>, AVX512F, Modrm|Masking|Space0F|Src1VVVV|<dq:vexw>|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
@@ -2805,7 +2805,7 @@ vpminsw, 0x66EA, AVX512BW, Modrm|Masking
vpmulhuw, 0x66E4, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vpmulhw, 0x66E5, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vpmullw, 0x66D5, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
-vpsllw, 0x6671/6, AVX512BW, Modrm|Masking|Space0F|VexWIG|DstVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
+vpsllw, 0x6671/6, AVX512BW, Modrm|Masking|Space0F|VexWIG|DstVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf|Optimize, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vpsllw, 0x66F1, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }
vpsraw, 0x6671/4, AVX512BW, Modrm|Masking|Space0F|VexWIG|DstVVVV|Disp8ShiftVL|CheckOperandSize|NoSuf, { Imm8, RegXMM|RegYMM|RegZMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM }
vpsraw, 0x66E1, AVX512BW, Modrm|Masking|Space0F|VexWIG|Src1VVVV|Disp8MemShift=4|CheckOperandSize|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM|RegYMM|RegZMM, RegXMM|RegYMM|RegZMM }