[x86] Pre-reload splitter to transform and;cmp into not;test.

Message ID 006301d87130$3747a240$a5d6e6c0$@nextmovesoftware.com
State Committed
Commit 29ae455901ac711470c4aa4f42d51f62e0b3753a
Headers
Series [x86] Pre-reload splitter to transform and;cmp into not;test. |

Commit Message

Roger Sayle May 26, 2022, 6:41 p.m. UTC
  A common idiom for testing if a specific set of bits is set in a value
is to use "(X & Y) == Y", which on x86 results in an AND followed by a
CMP.  A slightly improved implementation is to instead use (~X & Y)==0,
that uses a NOT and a TEST (or ANDN where available); still two "fast"
instructions, but typically shorter especially if Y is an immediate
constant.  Because the above transformation would require more gimple
statements in SSA, and may only be a win on targets with flags registers,
it isn't performed by the middle-end, instead leaving this choice to
the backend.

As an example, here's the change in code generation for pr91400-1.c
[which now requires a tweak to its dg-final clauses].

Before:
        movl    __cpu_model+12(%rip), %eax
        andl    $68, %eax       // 3 bytes
        cmpl    $68, %eax       // 3 bytes
        sete    %al
        ret

After:
        movl    __cpu_model+12(%rip), %eax
        notl    %eax            // 2 bytes
        testb   $68, %al        // 2 bytes
        sete    %al
        ret


This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2022-05-26  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/i386/i386.md (*test<mode>_not): New define_insn_and_split
	to split a combined "and;cmp" sequence into "not;test".

gcc/testsuite/ChangeLog
	* gcc.target/i386/pr91400-1.c: Update for improved code generation.
	* gcc.target/i386/pr91400-2.c: Likewise.
	* gcc.target/i386/testnot-1.c: New test case.
	* gcc.target/i386/testnot-2.c: Likewise.


Thanks in advance,
Roger
--
  

Comments

Uros Bizjak May 26, 2022, 6:59 p.m. UTC | #1
On Thu, May 26, 2022 at 8:41 PM Roger Sayle <roger@nextmovesoftware.com> wrote:
>
>
> A common idiom for testing if a specific set of bits is set in a value
> is to use "(X & Y) == Y", which on x86 results in an AND followed by a
> CMP.  A slightly improved implementation is to instead use (~X & Y)==0,
> that uses a NOT and a TEST (or ANDN where available); still two "fast"
> instructions, but typically shorter especially if Y is an immediate
> constant.  Because the above transformation would require more gimple
> statements in SSA, and may only be a win on targets with flags registers,
> it isn't performed by the middle-end, instead leaving this choice to
> the backend.
>
> As an example, here's the change in code generation for pr91400-1.c
> [which now requires a tweak to its dg-final clauses].
>
> Before:
>         movl    __cpu_model+12(%rip), %eax
>         andl    $68, %eax       // 3 bytes
>         cmpl    $68, %eax       // 3 bytes
>         sete    %al
>         ret
>
> After:
>         movl    __cpu_model+12(%rip), %eax
>         notl    %eax            // 2 bytes
>         testb   $68, %al        // 2 bytes
>         sete    %al
>         ret
>
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32},
> with no new failures.  Ok for mainline?
>
>
> 2022-05-26  Roger Sayle  <roger@nextmovesoftware.com>
>
> gcc/ChangeLog
>         * config/i386/i386.md (*test<mode>_not): New define_insn_and_split
>         to split a combined "and;cmp" sequence into "not;test".
>
> gcc/testsuite/ChangeLog
>         * gcc.target/i386/pr91400-1.c: Update for improved code generation.
>         * gcc.target/i386/pr91400-2.c: Likewise.
>         * gcc.target/i386/testnot-1.c: New test case.
>         * gcc.target/i386/testnot-2.c: Likewise.

OK.

Thanks,
Uros.
  

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b9b8f78..602dfa7 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9716,6 +9716,27 @@ 
   operands[2] = gen_rtx_AND (mode, val, immed_wide_int_const (mask, mode));
 })
 
+;; Split and;cmp (as optimized by combine) into not;test
+;; Except when TARGET_BMI provides andn (*andn_<mode>_ccno).
+(define_insn_and_split "*test<mode>_not"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (and:SWI
+	    (not:SWI (match_operand:SWI 0 "register_operand"))
+	    (match_operand:SWI 1 "<nonmemory_szext_operand>"))
+	  (const_int 0)))]
+  "ix86_pre_reload_split ()
+   && (!TARGET_BMI || !REG_P (operands[1]))"
+  "#"
+  "&& 1"
+  [(set (match_dup 2) (not:SWI (match_dup 0)))
+   (set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (and:SWI (match_dup 2) (match_dup 1))
+		     (const_int 0)))]
+{
+  operands[2] = gen_reg_rtx (<MODE>mode);
+})
+
 ;; Convert HImode/SImode test instructions with immediate to QImode ones.
 ;; i386 does not allow to encode test with 8bit sign extended immediate, so
 ;; this is relatively important trick.
diff --git a/gcc/testsuite/gcc.target/i386/pr91400-1.c b/gcc/testsuite/gcc.target/i386/pr91400-1.c
index 6124058..751dc6c 100644
--- a/gcc/testsuite/gcc.target/i386/pr91400-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr91400-1.c
@@ -1,8 +1,8 @@ 
 /* PR target/91400 */
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
-/* { dg-final { scan-assembler-times "andl" 1 } } */
-/* { dg-final { scan-assembler-times "cmpl" 1 } } */
+/* { dg-final { scan-assembler-times "notl" 1 } } */
+/* { dg-final { scan-assembler-times "testb" 1 } } */
 /* { dg-final { scan-assembler-times "sete" 1 } } */
 /* { dg-final { scan-assembler-not "cmove" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr91400-2.c b/gcc/testsuite/gcc.target/i386/pr91400-2.c
index 1af5a2f..914acd7 100644
--- a/gcc/testsuite/gcc.target/i386/pr91400-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr91400-2.c
@@ -1,8 +1,8 @@ 
 /* PR target/91400 */
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
-/* { dg-final { scan-assembler-times "andl" 1 } } */
-/* { dg-final { scan-assembler-times "cmpl" 1 } } */
+/* { dg-final { scan-assembler-times "notl" 1 } } */
+/* { dg-final { scan-assembler-times "testb" 1 } } */
 /* { dg-final { scan-assembler-times "sete" 1 } } */
 /* { dg-final { scan-assembler-not "cmove" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/testnot-1.c b/gcc/testsuite/gcc.target/i386/testnot-1.c
new file mode 100644
index 0000000..9ebcb5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/testnot-1.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int foo(int x)
+{
+    return (x & 1234) == 1234;
+}
+
+int foos(short x)
+{
+    return (x & 1234) == 1234;
+}
+
+int fooc(char x)
+{
+    return (x & 123) == 123;
+}
+
+int fool(long long x)
+{
+    return (x & 1234) == 1234;
+}
+
+/* { dg-final { scan-assembler-not "cmp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/testnot-2.c b/gcc/testsuite/gcc.target/i386/testnot-2.c
new file mode 100644
index 0000000..52fdaf3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/testnot-2.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+int foo(int x, int y)
+{
+    return (x & y) == y;
+}
+
+int foos(short x, short y)
+{
+    return (x & y) == y;
+}
+
+int fooc(char x, char y)
+{
+    return (x & y) == y;
+}
+
+int fool(long long x, long long y)
+{
+    return (x & y) == y;
+}
+
+/* { dg-final { scan-assembler-not "cmp" } } */