@@ -4693,6 +4693,34 @@ optimize_encoding (void)
}
}
}
+ else if (i.reg_operands == 3
+ && i.op[0].regs == i.op[1].regs
+ && i.encoding != encoding_evex
+ && (i.tm.mnem_off == MN_xor
+ || i.tm.mnem_off == MN_sub))
+ {
+ /* Optimize: -O:
+ xorb %rNb, %rNb, %rMb -> xorl %rMd, %rMd
+ xorw %rNw, %rNw, %rMw -> xorl %rMd, %rMd
+ xorl %rNd, %rNd, %rMd -> xorl %rMd, %rMd
+ xorq %rN, %rN, %rM -> xorl %rMd, %rMd
+ subb %rNb, %rNb, %rMb -> subl %rMd, %rMd
+ subw %rNw, %rNw, %rMw -> subl %rMd, %rMd
+ subl %rNd, %rNd, %rMd -> subl %rMd, %rMd
+ subq %rN, %rN, %rM -> subl %rMd, %rMd
+ */
+ i.tm.opcode_space = SPACE_BASE;
+ i.tm.opcode_modifier.evex = 0;
+ i.tm.opcode_modifier.size = SIZE32;
+ i.types[0].bitfield.byte = 0;
+ i.types[0].bitfield.word = 0;
+ i.types[0].bitfield.dword = 1;
+ i.types[0].bitfield.qword = 0;
+ i.op[0].regs = i.op[2].regs;
+ i.types[1] = i.types[0];
+ i.op[1].regs = i.op[2].regs;
+ i.reg_operands = 2;
+ }
else if (optimize > 1
&& !optimize_for_space
&& i.reg_operands == 2
@@ -71,4 +71,28 @@ Disassembly of section .text:
+[a-f0-9]+: 48 0f ba f0 1f btr \$0x1f,%rax
+[a-f0-9]+: 66 0f ba e8 0f bts \$0xf,%ax
+[a-f0-9]+: 48 0f ba e8 1f bts \$0x1f,%rax
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 48 31 d1 xor %rdx,%rcx
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: 48 29 d1 sub %rdx,%rcx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: d5 58 31 d1 xor %r18,%r17
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: d5 58 29 d1 sub %r18,%r17
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 62 f4 75 18 31 d1 xor %dx,%cx,%cx
+ +[a-f0-9]+: 31 c9 xor %ecx,%ecx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: 62 f4 75 18 29 d1 sub %dx,%cx,%cx
+ +[a-f0-9]+: 29 c9 sub %ecx,%ecx
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: 62 ec 74 10 30 d1 xor %r18b,%r17b,%r17b
+ +[a-f0-9]+: d5 50 31 c9 xor %r17d,%r17d
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
+ +[a-f0-9]+: 62 ec 74 10 28 d1 sub %r18b,%r17b,%r17b
+ +[a-f0-9]+: d5 50 29 c9 sub %r17d,%r17d
#pass
@@ -65,3 +65,27 @@ _start:
btr $31, %rax
bts $15, %ax
bts $31, %rax
+ xor %rcx, %rcx, %rcx
+ xor %rdx, %rcx, %rcx
+ xor %rdx, %rdx, %rcx
+ sub %rcx, %rcx, %rcx
+ sub %rdx, %rcx, %rcx
+ sub %rdx, %rdx, %rcx
+ xor %r17, %r17, %r17
+ xor %r18, %r17, %r17
+ xor %r18, %r18, %r17
+ sub %r17, %r17, %r17
+ sub %r18, %r17, %r17
+ sub %r18, %r18, %r17
+ xor %cx, %cx, %cx
+ xor %dx, %cx, %cx
+ xor %dx, %dx, %cx
+ sub %cx, %cx, %cx
+ sub %dx, %cx, %cx
+ sub %dx, %dx, %cx
+ xor %r17b, %r17b, %r17b
+ xor %r18b, %r17b, %r17b
+ xor %r18b, %r18b, %r17b
+ sub %r17b, %r17b, %r17b
+ sub %r18b, %r17b, %r17b
+ sub %r18b, %r18b, %r17b
@@ -320,7 +320,7 @@ inc, 0x40, No64, No_bSuf|No_sSuf|No_qSuf
inc, 0xfe/0, APX_F, W|Modrm|No_sSuf|CheckOperandSize|DstVVVV|EVexMap4|NF, {Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64}
inc, 0xfe/0, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
+sub, 0x28, APX_F, D|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64, }
sub, 0x28, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
sub, 0x83/5, APX_F, Modrm|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
sub, 0x83/5, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }
@@ -366,7 +366,7 @@ or, 0xc, 0, W|No_sSuf, { Imm8|Imm16|Imm3
or, 0x80/1, APX_F, W|Modrm|CheckOperandSize|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
or, 0x80/1, 0, W|Modrm|No_sSuf|HLEPrefixLock, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
+xor, 0x30, APX_F, D|C|W|CheckOperandSize|Modrm|No_sSuf|DstVVVV|EVexMap4|NF|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg8|Reg16|Reg32|Reg64 }
xor, 0x30, 0, D|W|CheckOperandSize|Modrm|No_sSuf|HLEPrefixLock|Optimize, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
xor, 0x83/6, APX_F, Modrm|CheckOperandSize|No_bSuf|No_sSuf|DstVVVV|EVexMap4|NF, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
xor, 0x83/6, 0, Modrm|No_bSuf|No_sSuf|HLEPrefixLock, { Imm8S, Reg16|Reg32|Reg64|Unspecified|BaseIndex }