[v2,4/8] x86-64: restrict by-imm31 optimization

Message ID a5887def-9a22-425b-87e1-0c5d71f44374@suse.com
State New
Headers
Series x86: a few more optimizations |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Jan Beulich June 21, 2024, 12:51 p.m. UTC
  Avoid changing the encoding when there's no size gain: If there's a REX
or REX2 prefix anyway and the base opcode wouldn't be changed, dropping
just REX.W / REX2.W has no (size) effect. (Same for the AND-by-imm7 case
in the same big conditional.)

While there also pull out the .qword check: For the 2-register-operands
case whether that's done on the 1st or 2nd operand doesn't matter. Due
to reduction in necessary parentheses this improves readability a tiny
bit.
---
v2: New.
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4809,8 +4809,8 @@  optimize_encoding (void)
     }
   else if (flag_code == CODE_64BIT
 	   && i.tm.opcode_space == SPACE_BASE
-	   && ((i.types[1].bitfield.qword
-		&& i.reg_operands == 1
+	   && i.types[i.operands - 1].bitfield.qword
+	   && ((i.reg_operands == 1
 		&& i.imm_operands == 1
 		&& i.op[0].imms->X_op == O_constant
 		&& ((i.tm.base_opcode == 0xb8
@@ -4818,26 +4818,29 @@  optimize_encoding (void)
 		     && fits_in_unsigned_long (i.op[0].imms->X_add_number))
 		    || (fits_in_imm31 (i.op[0].imms->X_add_number)
 			&& (i.tm.base_opcode == 0x24
-			    || (i.tm.base_opcode == 0x80
-				&& i.tm.extension_opcode == 0x4)
-			    || i.tm.mnem_off == MN_test
+			    || (((i.tm.base_opcode == 0x80
+				  && i.tm.extension_opcode == 0x4)
+				 || i.tm.mnem_off == MN_test)
+				&& !(i.op[1].regs->reg_flags
+				     & (RegRex | RegRex2)))
 			    || ((i.tm.base_opcode | 1) == 0xc7
 				&& i.tm.extension_opcode == 0x0)))
 		    || (fits_in_imm7 (i.op[0].imms->X_add_number)
 			&& i.tm.base_opcode == 0x83
-			&& i.tm.extension_opcode == 0x4)))
-	       || (i.types[0].bitfield.qword
-		   && ((i.reg_operands == 2
-			&& i.op[0].regs == i.op[1].regs
-			&& (i.tm.mnem_off == MN_xor
-			    || i.tm.mnem_off == MN_sub))
-		       || i.tm.mnem_off == MN_clr))))
+			&& i.tm.extension_opcode == 0x4
+			&& !(i.op[1].regs->reg_flags & (RegRex | RegRex2)))))
+	       || ((i.reg_operands == 2
+		    && i.op[0].regs == i.op[1].regs
+		    && (i.tm.mnem_off == MN_xor
+			|| i.tm.mnem_off == MN_sub))
+		   || i.tm.mnem_off == MN_clr)))
     {
       /* Optimize: -O:
 	   andq $imm31, %r64   -> andl $imm31, %r32
 	   andq $imm7, %r64    -> andl $imm7, %r32
 	   testq $imm31, %r64  -> testl $imm31, %r32
 	   xorq %r64, %r64     -> xorl %r32, %r32
+	   clrq %r64           -> clrl %r32
 	   subq %r64, %r64     -> subl %r32, %r32
 	   movq $imm31, %r64   -> movl $imm31, %r32
 	   movq $imm32, %r64   -> movl $imm32, %r32
--- a/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
+++ b/gas/testsuite/gas/i386/x86-64-apx-ndd-optimize.d
@@ -35,7 +35,7 @@  Disassembly of section .text:
 \s*[a-f0-9]+:\s*62 7c 74 10 20 f9    	and    %r15b,%r17b,%r17b
 \s*[a-f0-9]+:\s*4d 23 38             	and    \(%r8\),%r15
 \s*[a-f0-9]+:\s*d5 49 23 04 07       	and    \(%r15,%rax,1\),%r16
-\s*[a-f0-9]+:\s*d5 11 81 e6 34 12 00 00 	and    \$0x1234,%r30d
+\s*[a-f0-9]+:\s*d5 19 81 e6 34 12 00 00 	and    \$0x1234,%r30
 \s*[a-f0-9]+:\s*d5 1c 09 f9          	or     %r15,%r17
 \s*[a-f0-9]+:\s*62 7c 74 10 08 f9    	or     %r15b,%r17b,%r17b
 \s*[a-f0-9]+:\s*4d 0b 38             	or     \(%r8\),%r15
--- a/gas/testsuite/gas/i386/x86-64-optimize-1.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-1.d
@@ -11,19 +11,19 @@  Disassembly of section .text:
  +[a-f0-9]+:	48 25 00 00 00 00    	and    \$0x0,%rax	2: R_X86_64_32S	foo
  +[a-f0-9]+:	25 ff ff ff 7f       	and    \$0x7fffffff,%eax
  +[a-f0-9]+:	81 e3 ff ff ff 7f    	and    \$0x7fffffff,%ebx
- +[a-f0-9]+:	41 81 e6 ff ff ff 7f 	and    \$0x7fffffff,%r14d
+ +[a-f0-9]+:	49 81 e6 ff ff ff 7f 	and    \$0x7fffffff,%r14
  +[a-f0-9]+:	48 25 00 00 00 80    	and    \$0xffffffff80000000,%rax
  +[a-f0-9]+:	48 81 e3 00 00 00 80 	and    \$0xffffffff80000000,%rbx
  +[a-f0-9]+:	49 81 e6 00 00 00 80 	and    \$0xffffffff80000000,%r14
  +[a-f0-9]+:	83 e0 7f             	and    \$0x7f,%eax
  +[a-f0-9]+:	83 e3 7f             	and    \$0x7f,%ebx
- +[a-f0-9]+:	41 83 e6 7f          	and    \$0x7f,%r14d
+ +[a-f0-9]+:	49 83 e6 7f          	and    \$0x7f,%r14
  +[a-f0-9]+:	48 83 e0 80          	and    \$0xffffffffffffff80,%rax
  +[a-f0-9]+:	48 83 e3 80          	and    \$0xffffffffffffff80,%rbx
  +[a-f0-9]+:	49 83 e6 80          	and    \$0xffffffffffffff80,%r14
  +[a-f0-9]+:	a9 ff ff ff 7f       	test   \$0x7fffffff,%eax
  +[a-f0-9]+:	f7 c3 ff ff ff 7f    	test   \$0x7fffffff,%ebx
- +[a-f0-9]+:	41 f7 c6 ff ff ff 7f 	test   \$0x7fffffff,%r14d
+ +[a-f0-9]+:	49 f7 c6 ff ff ff 7f 	test   \$0x7fffffff,%r14
  +[a-f0-9]+:	48 a9 00 00 00 80    	test   \$0xffffffff80000000,%rax
  +[a-f0-9]+:	48 f7 c3 00 00 00 80 	test   \$0xffffffff80000000,%rbx
  +[a-f0-9]+:	49 f7 c6 00 00 00 80 	test   \$0xffffffff80000000,%r14
--- a/gas/testsuite/gas/i386/x86-64-optimize-3b.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3b.d
@@ -21,11 +21,11 @@  Disassembly of section .text:
  +[a-f0-9]+:	f7 c7 7f 00 00 00    	test   \$0x7f,%edi
  +[a-f0-9]+:	66 f7 c7 7f 00       	test   \$0x7f,%di
  +[a-f0-9]+:	40 f6 c7 7f          	test   \$0x7f,%dil
- +[a-f0-9]+:	41 f7 c1 7f 00 00 00 	test   \$0x7f,%r9d
+ +[a-f0-9]+:	49 f7 c1 7f 00 00 00 	test   \$0x7f,%r9
  +[a-f0-9]+:	41 f7 c1 7f 00 00 00 	test   \$0x7f,%r9d
  +[a-f0-9]+:	66 41 f7 c1 7f 00    	test   \$0x7f,%r9w
  +[a-f0-9]+:	41 f6 c1 7f          	test   \$0x7f,%r9b
- +[a-f0-9]+:	41 f7 c4 7f 00 00 00 	test   \$0x7f,%r12d
+ +[a-f0-9]+:	49 f7 c4 7f 00 00 00 	test   \$0x7f,%r12
  +[a-f0-9]+:	41 f7 c4 7f 00 00 00 	test   \$0x7f,%r12d
  +[a-f0-9]+:	66 41 f7 c4 7f 00    	test   \$0x7f,%r12w
  +[a-f0-9]+:	41 f6 c4 7f          	test   \$0x7f,%r12b