[v2,5/8] x86/APX: extend TEST-by-imm7 optimization to CTESTcc

Message ID 70a41e52-4496-4ff8-956b-8f4fe170df46@suse.com
State New
Headers
Series x86: a few more optimizations |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Jan Beulich June 21, 2024, 12:51 p.m. UTC
  The same properties apply there.
---
I actually wonder why this optimization is limited to -Os: Reduced
encoding size is also a performance benefit, by reducing decode bandwith
needs and cache footprint. Imo restriction to -Os should cover only
cases where there may be a negative impact on performance, i.e. when
there's "a price to pay" for the size reduction.
---
v2: New.
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -4770,7 +4770,9 @@  optimize_encoding (void)
     }
 
   if (optimize_for_space
-      && i.tm.mnem_off == MN_test
+      && (i.tm.mnem_off == MN_test
+          || (i.tm.base_opcode == 0xf6
+              && i.tm.opcode_space == SPACE_EVEXMAP4))
       && i.reg_operands == 1
       && i.imm_operands == 1
       && !i.types[1].bitfield.byte
@@ -4779,9 +4781,13 @@  optimize_encoding (void)
       && fits_in_imm7 (i.op[0].imms->X_add_number))
     {
       /* Optimize: -Os:
-	   test $imm7, %r64/%r32/%r16  -> test $imm7, %r8
+	   test      $imm7, %r64/%r32/%r16  -> test      $imm7, %r8
+	   ctest<cc> $imm7, %r64/%r32/%r16  -> ctest<cc> $imm7, %r8
        */
       unsigned int base_regnum = i.op[1].regs->reg_num;
+
+      gas_assert (!i.tm.opcode_modifier.modrm || i.tm.extension_opcode == 0);
+
       if (flag_code == CODE_64BIT || base_regnum < 4)
 	{
 	  i.types[1].bitfield.byte = 1;
--- a/gas/testsuite/gas/i386/x86-64-optimize-3.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3.d
@@ -28,6 +28,50 @@  Disassembly of section .text:
  +[a-f0-9]+:	41 f6 c4 7f          	test   \$0x7f,%r12b
  +[a-f0-9]+:	41 f6 c4 7f          	test   \$0x7f,%r12b
  +[a-f0-9]+:	41 f6 c4 7f          	test   \$0x7f,%r12b
+ +[a-f0-9]+:	d5 10 f6 c6 7f       	test   \$0x7f,%r22b
+ +[a-f0-9]+:	d5 10 f6 c6 7f       	test   \$0x7f,%r22b
+ +[a-f0-9]+:	d5 10 f6 c6 7f       	test   \$0x7f,%r22b
+ +[a-f0-9]+:	d5 10 f6 c6 7f       	test   \$0x7f,%r22b
+ +[a-f0-9]+:	62 f4 04 0a f6 c3 7f 	ctestt( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 04 0a f6 c3 7f 	ctestt( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 04 0a f6 c3 7f 	ctestt( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 04 0a f6 c3 7f 	ctestt( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 04 0a f6 c7 7f 	ctestt( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 f4 04 0a f6 c7 7f 	ctestt( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 f4 04 0a f6 c7 7f 	ctestt( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 f4 04 0a f6 c7 7f 	ctestt( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 d4 04 0a f6 c1 7f 	ctestt( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 04 0a f6 c1 7f 	ctestt( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 04 0a f6 c1 7f 	ctestt( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 04 0a f6 c1 7f 	ctestt( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 04 0a f6 c4 7f 	ctestt( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 d4 04 0a f6 c4 7f 	ctestt( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 d4 04 0a f6 c4 7f 	ctestt( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 d4 04 0a f6 c4 7f 	ctestt( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 fc 04 0a f6 c6 7f 	ctestt( \{dfv=\})? \$0x7f,%r22b
+ +[a-f0-9]+:	62 fc 04 0a f6 c6 7f 	ctestt( \{dfv=\})? \$0x7f,%r22b
+ +[a-f0-9]+:	62 fc 04 0a f6 c6 7f 	ctestt( \{dfv=\})? \$0x7f,%r22b
+ +[a-f0-9]+:	62 fc 04 0a f6 c6 7f 	ctestt( \{dfv=\})? \$0x7f,%r22b
+ +[a-f0-9]+:	62 f4 04 02 f6 c3 7f 	ctestb( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 04 02 f6 c3 7f 	ctestb( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 04 02 f6 c3 7f 	ctestb( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 04 02 f6 c3 7f 	ctestb( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 04 02 f6 c7 7f 	ctestb( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 f4 04 02 f6 c7 7f 	ctestb( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 f4 04 02 f6 c7 7f 	ctestb( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 f4 04 02 f6 c7 7f 	ctestb( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 d4 04 02 f6 c1 7f 	ctestb( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 04 02 f6 c1 7f 	ctestb( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 04 02 f6 c1 7f 	ctestb( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 04 02 f6 c1 7f 	ctestb( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 04 02 f6 c4 7f 	ctestb( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 d4 04 02 f6 c4 7f 	ctestb( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 d4 04 02 f6 c4 7f 	ctestb( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 d4 04 02 f6 c4 7f 	ctestb( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 fc 04 02 f6 c6 7f 	ctestb( \{dfv=\})? \$0x7f,%r22b
+ +[a-f0-9]+:	62 fc 04 02 f6 c6 7f 	ctestb( \{dfv=\})? \$0x7f,%r22b
+ +[a-f0-9]+:	62 fc 04 02 f6 c6 7f 	ctestb( \{dfv=\})? \$0x7f,%r22b
+ +[a-f0-9]+:	62 fc 04 02 f6 c6 7f 	ctestb( \{dfv=\})? \$0x7f,%r22b
  +[a-f0-9]+:	20 c9                	and    %cl,%cl
  +[a-f0-9]+:	66 21 d2             	and    %dx,%dx
  +[a-f0-9]+:	21 db                	and    %ebx,%ebx
--- a/gas/testsuite/gas/i386/x86-64-optimize-3.s
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3.s
@@ -6,22 +6,28 @@  _start:
 	testl	$0x7f, %eax
 	testw	$0x7f, %ax
 	testb	$0x7f, %al
-	test	$0x7f, %rbx
-	test	$0x7f, %ebx
-	test	$0x7f, %bx
-	test	$0x7f, %bl
-	test	$0x7f, %rdi
-	test	$0x7f, %edi
-	test	$0x7f, %di
-	test	$0x7f, %dil
-	test	$0x7f, %r9
-	test	$0x7f, %r9d
-	test	$0x7f, %r9w
-	test	$0x7f, %r9b
-	test	$0x7f, %r12
-	test	$0x7f, %r12d
-	test	$0x7f, %r12w
-	test	$0x7f, %r12b
+	.irp tst, test, "{evex} test", ctestc
+	\tst	$0x7f, %rbx
+	\tst	$0x7f, %ebx
+	\tst	$0x7f, %bx
+	\tst	$0x7f, %bl
+	\tst	$0x7f, %rdi
+	\tst	$0x7f, %edi
+	\tst	$0x7f, %di
+	\tst	$0x7f, %dil
+	\tst	$0x7f, %r9
+	\tst	$0x7f, %r9d
+	\tst	$0x7f, %r9w
+	\tst	$0x7f, %r9b
+	\tst	$0x7f, %r12
+	\tst	$0x7f, %r12d
+	\tst	$0x7f, %r12w
+	\tst	$0x7f, %r12b
+	\tst	$0x7f, %r22
+	\tst	$0x7f, %r22d
+	\tst	$0x7f, %r22w
+	\tst	$0x7f, %r22b
+	.endr
 
 	and	%cl, %cl
 	and	%dx, %dx
--- a/gas/testsuite/gas/i386/x86-64-optimize-3b.d
+++ b/gas/testsuite/gas/i386/x86-64-optimize-3b.d
@@ -29,6 +29,50 @@  Disassembly of section .text:
  +[a-f0-9]+:	41 f7 c4 7f 00 00 00 	test   \$0x7f,%r12d
  +[a-f0-9]+:	66 41 f7 c4 7f 00    	test   \$0x7f,%r12w
  +[a-f0-9]+:	41 f6 c4 7f          	test   \$0x7f,%r12b
+ +[a-f0-9]+:	d5 18 f7 c6 7f 00 00 00 	test   \$0x7f,%r22
+ +[a-f0-9]+:	d5 10 f7 c6 7f 00 00 00 	test   \$0x7f,%r22d
+ +[a-f0-9]+:	66 d5 10 f7 c6 7f 00 	test   \$0x7f,%r22w
+ +[a-f0-9]+:	d5 10 f6 c6 7f       	test   \$0x7f,%r22b
+ +[a-f0-9]+:	62 f4 84 0a f7 c3 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%rbx
+ +[a-f0-9]+:	62 f4 04 0a f7 c3 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%ebx
+ +[a-f0-9]+:	62 f4 05 0a f7 c3 7f 00 	ctestt( \{dfv=\})? \$0x7f,%bx
+ +[a-f0-9]+:	62 f4 04 0a f6 c3 7f 	ctestt( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 84 0a f7 c7 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%rdi
+ +[a-f0-9]+:	62 f4 04 0a f7 c7 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%edi
+ +[a-f0-9]+:	62 f4 05 0a f7 c7 7f 00 	ctestt( \{dfv=\})? \$0x7f,%di
+ +[a-f0-9]+:	62 f4 04 0a f6 c7 7f 	ctestt( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 d4 84 0a f7 c1 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%r9
+ +[a-f0-9]+:	62 d4 04 0a f7 c1 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%r9d
+ +[a-f0-9]+:	62 d4 05 0a f7 c1 7f 00 	ctestt( \{dfv=\})? \$0x7f,%r9w
+ +[a-f0-9]+:	62 d4 04 0a f6 c1 7f 	ctestt( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 84 0a f7 c4 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%r12
+ +[a-f0-9]+:	62 d4 04 0a f7 c4 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%r12d
+ +[a-f0-9]+:	62 d4 05 0a f7 c4 7f 00 	ctestt( \{dfv=\})? \$0x7f,%r12w
+ +[a-f0-9]+:	62 d4 04 0a f6 c4 7f 	ctestt( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 fc 84 0a f7 c6 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%r22
+ +[a-f0-9]+:	62 fc 04 0a f7 c6 7f 00 00 00 	ctestt( \{dfv=\})? \$0x7f,%r22d
+ +[a-f0-9]+:	62 fc 05 0a f7 c6 7f 00 	ctestt( \{dfv=\})? \$0x7f,%r22w
+ +[a-f0-9]+:	62 fc 04 0a f6 c6 7f 	ctestt( \{dfv=\})? \$0x7f,%r22b
+ +[a-f0-9]+:	62 f4 84 02 f7 c3 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%rbx
+ +[a-f0-9]+:	62 f4 04 02 f7 c3 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%ebx
+ +[a-f0-9]+:	62 f4 05 02 f7 c3 7f 00 	ctestb( \{dfv=\})? \$0x7f,%bx
+ +[a-f0-9]+:	62 f4 04 02 f6 c3 7f 	ctestb( \{dfv=\})? \$0x7f,%bl
+ +[a-f0-9]+:	62 f4 84 02 f7 c7 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%rdi
+ +[a-f0-9]+:	62 f4 04 02 f7 c7 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%edi
+ +[a-f0-9]+:	62 f4 05 02 f7 c7 7f 00 	ctestb( \{dfv=\})? \$0x7f,%di
+ +[a-f0-9]+:	62 f4 04 02 f6 c7 7f 	ctestb( \{dfv=\})? \$0x7f,%dil
+ +[a-f0-9]+:	62 d4 84 02 f7 c1 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%r9
+ +[a-f0-9]+:	62 d4 04 02 f7 c1 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%r9d
+ +[a-f0-9]+:	62 d4 05 02 f7 c1 7f 00 	ctestb( \{dfv=\})? \$0x7f,%r9w
+ +[a-f0-9]+:	62 d4 04 02 f6 c1 7f 	ctestb( \{dfv=\})? \$0x7f,%r9b
+ +[a-f0-9]+:	62 d4 84 02 f7 c4 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%r12
+ +[a-f0-9]+:	62 d4 04 02 f7 c4 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%r12d
+ +[a-f0-9]+:	62 d4 05 02 f7 c4 7f 00 	ctestb( \{dfv=\})? \$0x7f,%r12w
+ +[a-f0-9]+:	62 d4 04 02 f6 c4 7f 	ctestb( \{dfv=\})? \$0x7f,%r12b
+ +[a-f0-9]+:	62 fc 84 02 f7 c6 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%r22
+ +[a-f0-9]+:	62 fc 04 02 f7 c6 7f 00 00 00 	ctestb( \{dfv=\})? \$0x7f,%r22d
+ +[a-f0-9]+:	62 fc 05 02 f7 c6 7f 00 	ctestb( \{dfv=\})? \$0x7f,%r22w
+ +[a-f0-9]+:	62 fc 04 02 f6 c6 7f 	ctestb( \{dfv=\})? \$0x7f,%r22b
  +[a-f0-9]+:	84 c9                	test   %cl,%cl
  +[a-f0-9]+:	66 85 d2             	test   %dx,%dx
  +[a-f0-9]+:	21 db                	and    %ebx,%ebx
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -357,10 +357,10 @@  test, 0x84, 0, D|W|C|CheckOperandSize|Mo
 test, 0xa8, 0, W|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Acc|Byte|Word|Dword|Qword }
 test, 0xf6/0, 0, W|Modrm|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 test, 0x840a, 0, D|W|C|CheckOperandSize|Modrm|EVexMap4|Scc|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-test, 0xf60a/0, 0, W|Modrm|EVexMap4|Scc|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
+test, 0xf60a/0, 0, W|Modrm|EVexMap4|Scc|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 
 ctest<scc>, 0x840<scc:opc>, APX_F, D|W|C|CheckOperandSize|Modrm|EVexMap4|Scc|No_sSuf, { Reg8|Reg16|Reg32|Reg64, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
-ctest<scc>, 0xf60<scc:opc>/0, APX_F, W|Modrm|EVexMap4|Scc|No_sSuf, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
+ctest<scc>, 0xf60<scc:opc>/0, APX_F, W|Modrm|EVexMap4|Scc|No_sSuf|Optimize, { Imm8|Imm16|Imm32|Imm32S, Reg8|Reg16|Reg32|Reg64|Unspecified|BaseIndex }
 
 <scc>