[2/2] AArch64 Support new tbranch optab.

Message ID Y1+3ThtA9vUT43aA@arm.com
State Committed
Headers
Series [1/2] middle-end: Add new tbranch optab to add support for bit-test-and-branch operations |

Commit Message

Tamar Christina Oct. 31, 2022, 11:53 a.m. UTC
  Hi All,

This implements the new tbranch optab for AArch64.

Instead of emitting the instruction directly I've chosen to expand the pattern
using a zero extract and generating the existing pattern for comparisons for two
reasons:

  1. Allows for CSE of the actual comparison.
  2. It looks like the code in expand makes the label as unused and removed it
     if it doesn't see a separate reference to it.

Because of this expansion though I disable the pattern at -O0 since we have no
combine in that case so we'd end up with worse code.  I did try emitting the
pattern directly, but as mentioned in no#2 expand would then kill the label.

While doing this I noticed that the version that checks the signbit doesn't work
The reason for this looks like an incorrect pattern.  The [us]fbx
instructions are defined for index + size == regiter size.  They architecturally
alias to different instructions and binutils handles this correctly.

In GCC however we tried to prematurely optimize this and added a separate split
pattern.  But this pattern is also missing alternatives only handling DImode.

This just removes this and relaxes the constraints on the normal bfx pattern.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
	(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
	(tbranch<mode>4): New.
	(*<optab><mode>): Rename to...
	(*<optab><GPI:mode><ALLI:mode>): ... this.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/tbz_1.c: New test.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..6a4494a9a370139313cc8e57447717aafa14da2d 100644




--
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..6a4494a9a370139313cc8e57447717aafa14da2d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,28 @@ (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+		(match_operator 0 "aarch64_comparison_operator"
+		 [(match_operand:ALLI 1 "register_operand")
+		  (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+		(label_ref (match_operand 3 "" ""))
+		(pc)))]
+  "optimize > 0"
+{
+  rtx bitvalue = gen_reg_rtx (DImode);
+  emit_insn (gen_extzv (bitvalue, operands[1], const1_rtx, operands[2]));
+  operands[2] = const0_rtx;
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+					 operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -959,15 +975,15 @@ (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -5752,39 +5768,19 @@ (define_expand "<optab>"
 )
 
 
-(define_insn "*<optab><mode>"
+(define_insn "*<optab><GPI:mode><ALLI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r")
+	(ANY_EXTRACT:GPI (match_operand:ALLI 1 "register_operand" "r")
 			 (match_operand 2
-			   "aarch64_simd_shift_imm_offset_<mode>" "n")
+			   "aarch64_simd_shift_imm_offset_<ALLI:mode>" "n")
 			 (match_operand 3
-			   "aarch64_simd_shift_imm_<mode>" "n")))]
+			   "aarch64_simd_shift_imm_<ALLI:mode>" "n")))]
   "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
-	     1, GET_MODE_BITSIZE (<MODE>mode) - 1)"
-  "<su>bfx\\t%<w>0, %<w>1, %3, %2"
+	     1, GET_MODE_BITSIZE (<ALLI:MODE>mode))"
+  "<su>bfx\\t%<GPI:w>0, %<GPI:w>1, %3, %2"
   [(set_attr "type" "bfx")]
 )
 
-;; When the bit position and width add up to 32 we can use a W-reg LSR
-;; instruction taking advantage of the implicit zero-extension of the X-reg.
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(zero_extract:DI (match_operand:DI 1 "register_operand")
-			 (match_operand 2
-			   "aarch64_simd_shift_imm_offset_di")
-			 (match_operand 3
-			   "aarch64_simd_shift_imm_di")))]
-  "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1,
-	     GET_MODE_BITSIZE (DImode) - 1)
-   && (INTVAL (operands[2]) + INTVAL (operands[3]))
-       == GET_MODE_BITSIZE (SImode)"
-  [(set (match_dup 0)
-	(zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))]
-  {
-    operands[4] = gen_lowpart (SImode, operands[1]);
-  }
-)
-
 ;; Bitfield Insert (insv)
 (define_expand "insv<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	x[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	x[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}
  

Comments

Tamar Christina Nov. 14, 2022, 3:58 p.m. UTC | #1
Hello,

Ping and updated patch.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
	(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
	(tbranch<mode>4): New.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/tbz_1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd712bde55c7c72e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+		(match_operator 0 "aarch64_comparison_operator"
+		 [(match_operand:ALLI 1 "register_operand")
+		  (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+		(label_ref (match_operand 3 "" ""))
+		(pc)))]
+  "optimize > 0"
+{
+  rtx bitvalue = gen_reg_rtx (DImode);
+  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE (operands[1]), 0);
+  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
+  operands[2] = const0_rtx;
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+					 operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	x[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	x[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}
  
Richard Sandiford Nov. 15, 2022, 10:36 a.m. UTC | #2
Tamar Christina <Tamar.Christina@arm.com> writes:
> Hello,
>
> Ping and updated patch.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
>         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>         (tbranch<mode>4): New.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/tbz_1.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd712bde55c7c72e 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>                       (const_int 1)))]
>  )
>
> -(define_insn "*tb<optab><mode>1"
> +(define_expand "tbranch<mode>4"
>    [(set (pc) (if_then_else
> -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
> -                                   (const_int 1)
> -                                   (match_operand 1
> -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> +               (match_operator 0 "aarch64_comparison_operator"
> +                [(match_operand:ALLI 1 "register_operand")
> +                 (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
> +               (label_ref (match_operand 3 "" ""))
> +               (pc)))]
> +  "optimize > 0"

Why's the pattern conditional on optimize?  Seems a valid choice at -O0 too.

I think the split here shows the difficulty with having a single optab
and a comparison operator though.  operand 0 can be something like:

  (eq x 1)

but we're not comparing x for equality with 1.  We're testing whether
bit 1 is zero.  This means that operand 0 can't be taken literally
and can't be used directly in insn patterns.

In an earlier review, I'd said:

  For the TB instructions (and for other similar instructions that I've
  seen on other architectures) it would be more useful to have a single-bit
  test, with operand 4 specifying the bit position.  Arguably it might then
  be better to have separate eq and ne optabs, to avoid the awkward doubling
  of the operands (operand 1 contains operands 2 and 3).

I think we should do that eq/ne split (sorry for not pushing harder for
it before).

Thanks,
Richard



> +{
> +  rtx bitvalue = gen_reg_rtx (DImode);
> +  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE (operands[1]), 0);
> +  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
> +  operands[2] = const0_rtx;
> +  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
> +                                        operands[2]);
> +})
> +
> +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
> +  [(set (pc) (if_then_else
> +             (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
> +                                    (const_int 1)
> +                                    (match_operand 1
> +                                      "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
>                    (const_int 0))
>              (label_ref (match_operand 2 "" ""))
>              (pc)))
> @@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
>        {
>         if (get_attr_far_branch (insn) == 1)
>           return aarch64_gen_far_branch (operands, 2, "Ltb",
> -                                        "<inv_tb>\\t%<w>0, %1, ");
> +                                        "<inv_tb>\\t%<ALLI:w>0, %1, ");
>         else
>           {
>             operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
> -           return "tst\t%<w>0, %1\;<bcond>\t%l2";
> +           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
>           }
>        }
>      else
> -      return "<tbz>\t%<w>0, %1, %l2";
> +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
>    }
>    [(set_attr "type" "branch")
>     (set (attr "length")
> diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> @@ -0,0 +1,95 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdbool.h>
> +
> +void h(void);
> +
> +/*
> +** g1:
> +**     tbnz    x[0-9]+, #?0, .L([0-9]+)
> +**     ret
> +**     ...
> +*/
> +void g1(bool x)
> +{
> +  if (__builtin_expect (x, 0))
> +    h ();
> +}
> +
> +/*
> +** g2:
> +**     tbz     x[0-9]+, #?0, .L([0-9]+)
> +**     b       h
> +**     ...
> +*/
> +void g2(bool x)
> +{
> +  if (__builtin_expect (x, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_ge:
> +**     tbnz    w[0-9]+, #?31, .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g3_ge(int x)
> +{
> +  if (__builtin_expect (x >= 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_gt:
> +**     cmp     w[0-9]+, 0
> +**     ble     .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g3_gt(int x)
> +{
> +  if (__builtin_expect (x > 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_lt:
> +**     tbz     w[0-9]+, #?31, .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g3_lt(int x)
> +{
> +  if (__builtin_expect (x < 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_le:
> +**     cmp     w[0-9]+, 0
> +**     bgt     .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g3_le(int x)
> +{
> +  if (__builtin_expect (x <= 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g5:
> +**     mov     w[0-9]+, 65279
> +**     tst     w[0-9]+, w[0-9]+
> +**     beq     .L[0-9]+
> +**     b       h
> +**     ...
> +*/
> +void g5(int x)
> +{
> +  if (__builtin_expect (x & 0xfeff, 1))
> +    h ();
> +}
  
Tamar Christina Nov. 15, 2022, 10:42 a.m. UTC | #3
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 10:36 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> > Hello,
> >
> > Ping and updated patch.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >         (tbranch<mode>4): New.
> >
> > gcc/testsuite/ChangeLog:
> >
> >         * gcc.target/aarch64/tbz_1.c: New test.
> >
> > --- inline copy of patch ---
> >
> > diff --git a/gcc/config/aarch64/aarch64.md
> > b/gcc/config/aarch64/aarch64.md index
> >
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> 71
> > 2bde55c7c72e 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >                       (const_int 1)))]
> >  )
> >
> > -(define_insn "*tb<optab><mode>1"
> > +(define_expand "tbranch<mode>4"
> >    [(set (pc) (if_then_else
> > -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand"
> "r")
> > -                                   (const_int 1)
> > -                                   (match_operand 1
> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> > +               (match_operator 0 "aarch64_comparison_operator"
> > +                [(match_operand:ALLI 1 "register_operand")
> > +                 (match_operand:ALLI 2
> "aarch64_simd_shift_imm_<ALLI:mode>")])
> > +               (label_ref (match_operand 3 "" ""))
> > +               (pc)))]
> > +  "optimize > 0"
> 
> Why's the pattern conditional on optimize?  Seems a valid choice at -O0 too.
> 

Hi,

I had explained the reason why in the original patch, just didn't repeat it in the ping:

Instead of emitting the instruction directly I've chosen to expand the pattern using a zero extract and generating the existing pattern for comparisons for two
reasons:

  1. Allows for CSE of the actual comparison.
  2. It looks like the code in expand makes the label as unused and removed it
     if it doesn't see a separate reference to it.

Because of this expansion though I disable the pattern at -O0 since we have no combine in that case so we'd end up with worse code.  I did try emitting the pattern directly, but as mentioned in no#2 expand would then kill the label.

Basically I emit the pattern directly, immediately during expand the label is marked as dead for some weird reason.

Tamar.

> I think the split here shows the difficulty with having a single optab and a
> comparison operator though.  operand 0 can be something like:
> 
>   (eq x 1)
> 
> but we're not comparing x for equality with 1.  We're testing whether bit 1 is
> zero.  This means that operand 0 can't be taken literally and can't be used
> directly in insn patterns.
> 
> In an earlier review, I'd said:
> 
>   For the TB instructions (and for other similar instructions that I've
>   seen on other architectures) it would be more useful to have a single-bit
>   test, with operand 4 specifying the bit position.  Arguably it might then
>   be better to have separate eq and ne optabs, to avoid the awkward
> doubling
>   of the operands (operand 1 contains operands 2 and 3).
> 
> I think we should do that eq/ne split (sorry for not pushing harder for it
> before).
> 
> Thanks,
> Richard
> 
> 
> 
> > +{
> > +  rtx bitvalue = gen_reg_rtx (DImode);
> > +  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE
> > +(operands[1]), 0);
> > +  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
> > +  operands[2] = const0_rtx;
> > +  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
> bitvalue,
> > +                                        operands[2]);
> > +})
> > +
> > +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
> > +  [(set (pc) (if_then_else
> > +             (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand"
> "r")
> > +                                    (const_int 1)
> > +                                    (match_operand 1
> > +
> > +"aarch64_simd_shift_imm_<ALLI:mode>" "n"))
> >                    (const_int 0))
> >              (label_ref (match_operand 2 "" ""))
> >              (pc)))
> > @@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
> >        {
> >         if (get_attr_far_branch (insn) == 1)
> >           return aarch64_gen_far_branch (operands, 2, "Ltb",
> > -                                        "<inv_tb>\\t%<w>0, %1, ");
> > +                                        "<inv_tb>\\t%<ALLI:w>0, %1,
> > + ");
> >         else
> >           {
> >             operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL
> (operands[1]));
> > -           return "tst\t%<w>0, %1\;<bcond>\t%l2";
> > +           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
> >           }
> >        }
> >      else
> > -      return "<tbz>\t%<w>0, %1, %l2";
> > +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
> >    }
> >    [(set_attr "type" "branch")
> >     (set (attr "length")
> > diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> > b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> > new file mode 100644
> > index
> >
> 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549c
> e1
> > a0cff6774463
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> > @@ -0,0 +1,95 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables
> > +-fno-asynchronous-unwind-tables" } */
> > +/* { dg-final { check-function-bodies "**" "" "" { target { le } } }
> > +} */
> > +
> > +#include <stdbool.h>
> > +
> > +void h(void);
> > +
> > +/*
> > +** g1:
> > +**     tbnz    x[0-9]+, #?0, .L([0-9]+)
> > +**     ret
> > +**     ...
> > +*/
> > +void g1(bool x)
> > +{
> > +  if (__builtin_expect (x, 0))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g2:
> > +**     tbz     x[0-9]+, #?0, .L([0-9]+)
> > +**     b       h
> > +**     ...
> > +*/
> > +void g2(bool x)
> > +{
> > +  if (__builtin_expect (x, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g3_ge:
> > +**     tbnz    w[0-9]+, #?31, .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g3_ge(int x)
> > +{
> > +  if (__builtin_expect (x >= 0, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g3_gt:
> > +**     cmp     w[0-9]+, 0
> > +**     ble     .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g3_gt(int x)
> > +{
> > +  if (__builtin_expect (x > 0, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g3_lt:
> > +**     tbz     w[0-9]+, #?31, .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g3_lt(int x)
> > +{
> > +  if (__builtin_expect (x < 0, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g3_le:
> > +**     cmp     w[0-9]+, 0
> > +**     bgt     .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g3_le(int x)
> > +{
> > +  if (__builtin_expect (x <= 0, 1))
> > +    h ();
> > +}
> > +
> > +/*
> > +** g5:
> > +**     mov     w[0-9]+, 65279
> > +**     tst     w[0-9]+, w[0-9]+
> > +**     beq     .L[0-9]+
> > +**     b       h
> > +**     ...
> > +*/
> > +void g5(int x)
> > +{
> > +  if (__builtin_expect (x & 0xfeff, 1))
> > +    h ();
> > +}
  
Richard Sandiford Nov. 15, 2022, 10:50 a.m. UTC | #4
Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Tuesday, November 15, 2022 10:36 AM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>
>> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> 
>> Tamar Christina <Tamar.Christina@arm.com> writes:
>> > Hello,
>> >
>> > Ping and updated patch.
>> >
>> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >
>> > Ok for master?
>> >
>> > Thanks,
>> > Tamar
>> >
>> > gcc/ChangeLog:
>> >
>> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
>> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>> >         (tbranch<mode>4): New.
>> >
>> > gcc/testsuite/ChangeLog:
>> >
>> >         * gcc.target/aarch64/tbz_1.c: New test.
>> >
>> > --- inline copy of patch ---
>> >
>> > diff --git a/gcc/config/aarch64/aarch64.md
>> > b/gcc/config/aarch64/aarch64.md index
>> >
>> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
>> 71
>> > 2bde55c7c72e 100644
>> > --- a/gcc/config/aarch64/aarch64.md
>> > +++ b/gcc/config/aarch64/aarch64.md
>> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>> >                       (const_int 1)))]
>> >  )
>> >
>> > -(define_insn "*tb<optab><mode>1"
>> > +(define_expand "tbranch<mode>4"
>> >    [(set (pc) (if_then_else
>> > -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand"
>> "r")
>> > -                                   (const_int 1)
>> > -                                   (match_operand 1
>> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
>> > +               (match_operator 0 "aarch64_comparison_operator"
>> > +                [(match_operand:ALLI 1 "register_operand")
>> > +                 (match_operand:ALLI 2
>> "aarch64_simd_shift_imm_<ALLI:mode>")])
>> > +               (label_ref (match_operand 3 "" ""))
>> > +               (pc)))]
>> > +  "optimize > 0"
>> 
>> Why's the pattern conditional on optimize?  Seems a valid choice at -O0 too.
>> 
>
> Hi,
>
> I had explained the reason why in the original patch, just didn't repeat it in the ping:
>
> Instead of emitting the instruction directly I've chosen to expand the pattern using a zero extract and generating the existing pattern for comparisons for two
> reasons:
>
>   1. Allows for CSE of the actual comparison.
>   2. It looks like the code in expand makes the label as unused and removed it
>      if it doesn't see a separate reference to it.
>
> Because of this expansion though I disable the pattern at -O0 since we have no combine in that case so we'd end up with worse code.  I did try emitting the pattern directly, but as mentioned in no#2 expand would then kill the label.
>
> Basically I emit the pattern directly, immediately during expand the label is marked as dead for some weird reason.

Isn't #2 a bug though?  It seems like something we should fix rather than
work around.

Thanks,
Richard


>
> Tamar.
>
>> I think the split here shows the difficulty with having a single optab and a
>> comparison operator though.  operand 0 can be something like:
>> 
>>   (eq x 1)
>> 
>> but we're not comparing x for equality with 1.  We're testing whether bit 1 is
>> zero.  This means that operand 0 can't be taken literally and can't be used
>> directly in insn patterns.
>> 
>> In an earlier review, I'd said:
>> 
>>   For the TB instructions (and for other similar instructions that I've
>>   seen on other architectures) it would be more useful to have a single-bit
>>   test, with operand 4 specifying the bit position.  Arguably it might then
>>   be better to have separate eq and ne optabs, to avoid the awkward
>> doubling
>>   of the operands (operand 1 contains operands 2 and 3).
>> 
>> I think we should do that eq/ne split (sorry for not pushing harder for it
>> before).
>> 
>> Thanks,
>> Richard
>> 
>> 
>> 
>> > +{
>> > +  rtx bitvalue = gen_reg_rtx (DImode);
>> > +  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE
>> > +(operands[1]), 0);
>> > +  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
>> > +  operands[2] = const0_rtx;
>> > +  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
>> bitvalue,
>> > +                                        operands[2]);
>> > +})
>> > +
>> > +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
>> > +  [(set (pc) (if_then_else
>> > +             (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand"
>> "r")
>> > +                                    (const_int 1)
>> > +                                    (match_operand 1
>> > +
>> > +"aarch64_simd_shift_imm_<ALLI:mode>" "n"))
>> >                    (const_int 0))
>> >              (label_ref (match_operand 2 "" ""))
>> >              (pc)))
>> > @@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
>> >        {
>> >         if (get_attr_far_branch (insn) == 1)
>> >           return aarch64_gen_far_branch (operands, 2, "Ltb",
>> > -                                        "<inv_tb>\\t%<w>0, %1, ");
>> > +                                        "<inv_tb>\\t%<ALLI:w>0, %1,
>> > + ");
>> >         else
>> >           {
>> >             operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL
>> (operands[1]));
>> > -           return "tst\t%<w>0, %1\;<bcond>\t%l2";
>> > +           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
>> >           }
>> >        }
>> >      else
>> > -      return "<tbz>\t%<w>0, %1, %l2";
>> > +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
>> >    }
>> >    [(set_attr "type" "branch")
>> >     (set (attr "length")
>> > diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c
>> > b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
>> > new file mode 100644
>> > index
>> >
>> 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549c
>> e1
>> > a0cff6774463
>> > --- /dev/null
>> > +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
>> > @@ -0,0 +1,95 @@
>> > +/* { dg-do compile } */
>> > +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables
>> > +-fno-asynchronous-unwind-tables" } */
>> > +/* { dg-final { check-function-bodies "**" "" "" { target { le } } }
>> > +} */
>> > +
>> > +#include <stdbool.h>
>> > +
>> > +void h(void);
>> > +
>> > +/*
>> > +** g1:
>> > +**     tbnz    x[0-9]+, #?0, .L([0-9]+)
>> > +**     ret
>> > +**     ...
>> > +*/
>> > +void g1(bool x)
>> > +{
>> > +  if (__builtin_expect (x, 0))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g2:
>> > +**     tbz     x[0-9]+, #?0, .L([0-9]+)
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g2(bool x)
>> > +{
>> > +  if (__builtin_expect (x, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g3_ge:
>> > +**     tbnz    w[0-9]+, #?31, .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g3_ge(int x)
>> > +{
>> > +  if (__builtin_expect (x >= 0, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g3_gt:
>> > +**     cmp     w[0-9]+, 0
>> > +**     ble     .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g3_gt(int x)
>> > +{
>> > +  if (__builtin_expect (x > 0, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g3_lt:
>> > +**     tbz     w[0-9]+, #?31, .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g3_lt(int x)
>> > +{
>> > +  if (__builtin_expect (x < 0, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g3_le:
>> > +**     cmp     w[0-9]+, 0
>> > +**     bgt     .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g3_le(int x)
>> > +{
>> > +  if (__builtin_expect (x <= 0, 1))
>> > +    h ();
>> > +}
>> > +
>> > +/*
>> > +** g5:
>> > +**     mov     w[0-9]+, 65279
>> > +**     tst     w[0-9]+, w[0-9]+
>> > +**     beq     .L[0-9]+
>> > +**     b       h
>> > +**     ...
>> > +*/
>> > +void g5(int x)
>> > +{
>> > +  if (__builtin_expect (x & 0xfeff, 1))
>> > +    h ();
>> > +}
  
Tamar Christina Nov. 15, 2022, 11 a.m. UTC | #5
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 10:51 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> Sent: Tuesday, November 15, 2022 10:36 AM
> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> >> <Marcus.Shawcroft@arm.com>
> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >>
> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> > Hello,
> >> >
> >> > Ping and updated patch.
> >> >
> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >> >
> >> > Ok for master?
> >> >
> >> > Thanks,
> >> > Tamar
> >> >
> >> > gcc/ChangeLog:
> >> >
> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >> >         (tbranch<mode>4): New.
> >> >
> >> > gcc/testsuite/ChangeLog:
> >> >
> >> >         * gcc.target/aarch64/tbz_1.c: New test.
> >> >
> >> > --- inline copy of patch ---
> >> >
> >> > diff --git a/gcc/config/aarch64/aarch64.md
> >> > b/gcc/config/aarch64/aarch64.md index
> >> >
> >>
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> >> 71
> >> > 2bde55c7c72e 100644
> >> > --- a/gcc/config/aarch64/aarch64.md
> >> > +++ b/gcc/config/aarch64/aarch64.md
> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >> >                       (const_int 1)))]
> >> >  )
> >> >
> >> > -(define_insn "*tb<optab><mode>1"
> >> > +(define_expand "tbranch<mode>4"
> >> >    [(set (pc) (if_then_else
> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand"
> >> "r")
> >> > -                                   (const_int 1)
> >> > -                                   (match_operand 1
> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> >> > +               (match_operator 0 "aarch64_comparison_operator"
> >> > +                [(match_operand:ALLI 1 "register_operand")
> >> > +                 (match_operand:ALLI 2
> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
> >> > +               (label_ref (match_operand 3 "" ""))
> >> > +               (pc)))]
> >> > +  "optimize > 0"
> >>
> >> Why's the pattern conditional on optimize?  Seems a valid choice at -O0
> too.
> >>
> >
> > Hi,
> >
> > I had explained the reason why in the original patch, just didn't repeat it in
> the ping:
> >
> > Instead of emitting the instruction directly I've chosen to expand the
> > pattern using a zero extract and generating the existing pattern for
> > comparisons for two
> > reasons:
> >
> >   1. Allows for CSE of the actual comparison.
> >   2. It looks like the code in expand makes the label as unused and removed
> it
> >      if it doesn't see a separate reference to it.
> >
> > Because of this expansion though I disable the pattern at -O0 since we
> have no combine in that case so we'd end up with worse code.  I did try
> emitting the pattern directly, but as mentioned in no#2 expand would then
> kill the label.
> >
> > Basically I emit the pattern directly, immediately during expand the label is
> marked as dead for some weird reason.
> 
> Isn't #2 a bug though?  It seems like something we should fix rather than
> work around.

Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to split the optabs
still? Isn't the problem atm that I need the split?  If I'm emitting the instruction
directly then the recog pattern for it can just be (eq (vec_extract x 1) 0) which is
the correct semantics?

Thanks,
Tamar
> 
> Thanks,
> Richard
> 
> 
> >
> > Tamar.
> >
> >> I think the split here shows the difficulty with having a single
> >> optab and a comparison operator though.  operand 0 can be something
> like:
> >>
> >>   (eq x 1)
> >>
> >> but we're not comparing x for equality with 1.  We're testing whether
> >> bit 1 is zero.  This means that operand 0 can't be taken literally
> >> and can't be used directly in insn patterns.
> >>
> >> In an earlier review, I'd said:
> >>
> >>   For the TB instructions (and for other similar instructions that I've
> >>   seen on other architectures) it would be more useful to have a single-bit
> >>   test, with operand 4 specifying the bit position.  Arguably it might then
> >>   be better to have separate eq and ne optabs, to avoid the awkward
> >> doubling
> >>   of the operands (operand 1 contains operands 2 and 3).
> >>
> >> I think we should do that eq/ne split (sorry for not pushing harder
> >> for it before).
> >>
> >> Thanks,
> >> Richard
> >>
> >>
> >>
> >> > +{
> >> > +  rtx bitvalue = gen_reg_rtx (DImode);
> >> > +  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE
> >> > +(operands[1]), 0);
> >> > +  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
> >> > +  operands[2] = const0_rtx;
> >> > +  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
> >> bitvalue,
> >> > +                                        operands[2]);
> >> > +})
> >> > +
> >> > +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
> >> > +  [(set (pc) (if_then_else
> >> > +             (EQL (zero_extract:GPI (match_operand:ALLI 0
> "register_operand"
> >> "r")
> >> > +                                    (const_int 1)
> >> > +                                    (match_operand 1
> >> > +
> >> > +"aarch64_simd_shift_imm_<ALLI:mode>" "n"))
> >> >                    (const_int 0))
> >> >              (label_ref (match_operand 2 "" ""))
> >> >              (pc)))
> >> > @@ -959,15 +976,15 @@ (define_insn "*tb<optab><mode>1"
> >> >        {
> >> >         if (get_attr_far_branch (insn) == 1)
> >> >           return aarch64_gen_far_branch (operands, 2, "Ltb",
> >> > -                                        "<inv_tb>\\t%<w>0, %1, ");
> >> > +                                        "<inv_tb>\\t%<ALLI:w>0,
> >> > + %1, ");
> >> >         else
> >> >           {
> >> >             operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL
> >> (operands[1]));
> >> > -           return "tst\t%<w>0, %1\;<bcond>\t%l2";
> >> > +           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
> >> >           }
> >> >        }
> >> >      else
> >> > -      return "<tbz>\t%<w>0, %1, %l2";
> >> > +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
> >> >    }
> >> >    [(set_attr "type" "branch")
> >> >     (set (attr "length")
> >> > diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> >> > b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> >> > new file mode 100644
> >> > index
> >> >
> >>
> 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549c
> >> e1
> >> > a0cff6774463
> >> > --- /dev/null
> >> > +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> >> > @@ -0,0 +1,95 @@
> >> > +/* { dg-do compile } */
> >> > +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables
> >> > +-fno-asynchronous-unwind-tables" } */
> >> > +/* { dg-final { check-function-bodies "**" "" "" { target { le } }
> >> > +} } */
> >> > +
> >> > +#include <stdbool.h>
> >> > +
> >> > +void h(void);
> >> > +
> >> > +/*
> >> > +** g1:
> >> > +**     tbnz    x[0-9]+, #?0, .L([0-9]+)
> >> > +**     ret
> >> > +**     ...
> >> > +*/
> >> > +void g1(bool x)
> >> > +{
> >> > +  if (__builtin_expect (x, 0))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g2:
> >> > +**     tbz     x[0-9]+, #?0, .L([0-9]+)
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g2(bool x)
> >> > +{
> >> > +  if (__builtin_expect (x, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g3_ge:
> >> > +**     tbnz    w[0-9]+, #?31, .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g3_ge(int x)
> >> > +{
> >> > +  if (__builtin_expect (x >= 0, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g3_gt:
> >> > +**     cmp     w[0-9]+, 0
> >> > +**     ble     .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g3_gt(int x)
> >> > +{
> >> > +  if (__builtin_expect (x > 0, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g3_lt:
> >> > +**     tbz     w[0-9]+, #?31, .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g3_lt(int x)
> >> > +{
> >> > +  if (__builtin_expect (x < 0, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g3_le:
> >> > +**     cmp     w[0-9]+, 0
> >> > +**     bgt     .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g3_le(int x)
> >> > +{
> >> > +  if (__builtin_expect (x <= 0, 1))
> >> > +    h ();
> >> > +}
> >> > +
> >> > +/*
> >> > +** g5:
> >> > +**     mov     w[0-9]+, 65279
> >> > +**     tst     w[0-9]+, w[0-9]+
> >> > +**     beq     .L[0-9]+
> >> > +**     b       h
> >> > +**     ...
> >> > +*/
> >> > +void g5(int x)
> >> > +{
> >> > +  if (__builtin_expect (x & 0xfeff, 1))
> >> > +    h ();
> >> > +}
  
Richard Sandiford Nov. 15, 2022, 11:14 a.m. UTC | #6
Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Tuesday, November 15, 2022 10:51 AM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>
>> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> 
>> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> -----Original Message-----
>> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> Sent: Tuesday, November 15, 2022 10:36 AM
>> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> >> <Marcus.Shawcroft@arm.com>
>> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >>
>> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> > Hello,
>> >> >
>> >> > Ping and updated patch.
>> >> >
>> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >> >
>> >> > Ok for master?
>> >> >
>> >> > Thanks,
>> >> > Tamar
>> >> >
>> >> > gcc/ChangeLog:
>> >> >
>> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
>> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>> >> >         (tbranch<mode>4): New.
>> >> >
>> >> > gcc/testsuite/ChangeLog:
>> >> >
>> >> >         * gcc.target/aarch64/tbz_1.c: New test.
>> >> >
>> >> > --- inline copy of patch ---
>> >> >
>> >> > diff --git a/gcc/config/aarch64/aarch64.md
>> >> > b/gcc/config/aarch64/aarch64.md index
>> >> >
>> >>
>> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
>> >> 71
>> >> > 2bde55c7c72e 100644
>> >> > --- a/gcc/config/aarch64/aarch64.md
>> >> > +++ b/gcc/config/aarch64/aarch64.md
>> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>> >> >                       (const_int 1)))]
>> >> >  )
>> >> >
>> >> > -(define_insn "*tb<optab><mode>1"
>> >> > +(define_expand "tbranch<mode>4"
>> >> >    [(set (pc) (if_then_else
>> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand"
>> >> "r")
>> >> > -                                   (const_int 1)
>> >> > -                                   (match_operand 1
>> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
>> >> > +               (match_operator 0 "aarch64_comparison_operator"
>> >> > +                [(match_operand:ALLI 1 "register_operand")
>> >> > +                 (match_operand:ALLI 2
>> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
>> >> > +               (label_ref (match_operand 3 "" ""))
>> >> > +               (pc)))]
>> >> > +  "optimize > 0"
>> >>
>> >> Why's the pattern conditional on optimize?  Seems a valid choice at -O0
>> too.
>> >>
>> >
>> > Hi,
>> >
>> > I had explained the reason why in the original patch, just didn't repeat it in
>> the ping:
>> >
>> > Instead of emitting the instruction directly I've chosen to expand the
>> > pattern using a zero extract and generating the existing pattern for
>> > comparisons for two
>> > reasons:
>> >
>> >   1. Allows for CSE of the actual comparison.
>> >   2. It looks like the code in expand makes the label as unused and removed
>> it
>> >      if it doesn't see a separate reference to it.
>> >
>> > Because of this expansion though I disable the pattern at -O0 since we
>> have no combine in that case so we'd end up with worse code.  I did try
>> emitting the pattern directly, but as mentioned in no#2 expand would then
>> kill the label.
>> >
>> > Basically I emit the pattern directly, immediately during expand the label is
>> marked as dead for some weird reason.
>> 
>> Isn't #2 a bug though?  It seems like something we should fix rather than
>> work around.
>
> Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to split the optabs
> still? Isn't the problem atm that I need the split?  If I'm emitting the instruction
> directly then the recog pattern for it can just be (eq (vec_extract x 1) 0) which is
> the correct semantics?

What rtx does the code that uses the optab pass for operand 0?

Richard
  
Tamar Christina Nov. 15, 2022, 11:23 a.m. UTC | #7
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 11:15 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> Sent: Tuesday, November 15, 2022 10:51 AM
> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> >> <Marcus.Shawcroft@arm.com>
> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >>
> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> -----Original Message-----
> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> Shawcroft
> >> >> <Marcus.Shawcroft@arm.com>
> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >>
> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> > Hello,
> >> >> >
> >> >> > Ping and updated patch.
> >> >> >
> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >> >> >
> >> >> > Ok for master?
> >> >> >
> >> >> > Thanks,
> >> >> > Tamar
> >> >> >
> >> >> > gcc/ChangeLog:
> >> >> >
> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename
> to...
> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >> >> >         (tbranch<mode>4): New.
> >> >> >
> >> >> > gcc/testsuite/ChangeLog:
> >> >> >
> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
> >> >> >
> >> >> > --- inline copy of patch ---
> >> >> >
> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
> >> >> > b/gcc/config/aarch64/aarch64.md index
> >> >> >
> >> >>
> >>
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> >> >> 71
> >> >> > 2bde55c7c72e 100644
> >> >> > --- a/gcc/config/aarch64/aarch64.md
> >> >> > +++ b/gcc/config/aarch64/aarch64.md
> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >> >> >                       (const_int 1)))]
> >> >> >  )
> >> >> >
> >> >> > -(define_insn "*tb<optab><mode>1"
> >> >> > +(define_expand "tbranch<mode>4"
> >> >> >    [(set (pc) (if_then_else
> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
> "register_operand"
> >> >> "r")
> >> >> > -                                   (const_int 1)
> >> >> > -                                   (match_operand 1
> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
> >> >> > +                [(match_operand:ALLI 1 "register_operand")
> >> >> > +                 (match_operand:ALLI 2
> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
> >> >> > +               (label_ref (match_operand 3 "" ""))
> >> >> > +               (pc)))]
> >> >> > +  "optimize > 0"
> >> >>
> >> >> Why's the pattern conditional on optimize?  Seems a valid choice
> >> >> at -O0
> >> too.
> >> >>
> >> >
> >> > Hi,
> >> >
> >> > I had explained the reason why in the original patch, just didn't
> >> > repeat it in
> >> the ping:
> >> >
> >> > Instead of emitting the instruction directly I've chosen to expand
> >> > the pattern using a zero extract and generating the existing
> >> > pattern for comparisons for two
> >> > reasons:
> >> >
> >> >   1. Allows for CSE of the actual comparison.
> >> >   2. It looks like the code in expand makes the label as unused and
> >> > removed
> >> it
> >> >      if it doesn't see a separate reference to it.
> >> >
> >> > Because of this expansion though I disable the pattern at -O0 since
> >> > we
> >> have no combine in that case so we'd end up with worse code.  I did
> >> try emitting the pattern directly, but as mentioned in no#2 expand
> >> would then kill the label.
> >> >
> >> > Basically I emit the pattern directly, immediately during expand
> >> > the label is
> >> marked as dead for some weird reason.
> >>
> >> Isn't #2 a bug though?  It seems like something we should fix rather
> >> than work around.
> >
> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
> > split the optabs still? Isn't the problem atm that I need the split?
> > If I'm emitting the instruction directly then the recog pattern for it
> > can just be (eq (vec_extract x 1) 0) which is the correct semantics?
> 
> What rtx does the code that uses the optab pass for operand 0?

It gets passed the full comparison:

(eq (reg/v:SI 92 [ x ])
    (const_int 0 [0]))

of which we only look at the operator.

Tamar.

> 
> Richard
  
Richard Sandiford Nov. 15, 2022, 11:33 a.m. UTC | #8
Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Tuesday, November 15, 2022 11:15 AM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>
>> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> 
>> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> -----Original Message-----
>> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> Sent: Tuesday, November 15, 2022 10:51 AM
>> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> >> <Marcus.Shawcroft@arm.com>
>> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >>
>> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> -----Original Message-----
>> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
>> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
>> Shawcroft
>> >> >> <Marcus.Shawcroft@arm.com>
>> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >> >>
>> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> > Hello,
>> >> >> >
>> >> >> > Ping and updated patch.
>> >> >> >
>> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>> >> >> >
>> >> >> > Ok for master?
>> >> >> >
>> >> >> > Thanks,
>> >> >> > Tamar
>> >> >> >
>> >> >> > gcc/ChangeLog:
>> >> >> >
>> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename
>> to...
>> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>> >> >> >         (tbranch<mode>4): New.
>> >> >> >
>> >> >> > gcc/testsuite/ChangeLog:
>> >> >> >
>> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
>> >> >> >
>> >> >> > --- inline copy of patch ---
>> >> >> >
>> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
>> >> >> > b/gcc/config/aarch64/aarch64.md index
>> >> >> >
>> >> >>
>> >>
>> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
>> >> >> 71
>> >> >> > 2bde55c7c72e 100644
>> >> >> > --- a/gcc/config/aarch64/aarch64.md
>> >> >> > +++ b/gcc/config/aarch64/aarch64.md
>> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>> >> >> >                       (const_int 1)))]
>> >> >> >  )
>> >> >> >
>> >> >> > -(define_insn "*tb<optab><mode>1"
>> >> >> > +(define_expand "tbranch<mode>4"
>> >> >> >    [(set (pc) (if_then_else
>> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
>> "register_operand"
>> >> >> "r")
>> >> >> > -                                   (const_int 1)
>> >> >> > -                                   (match_operand 1
>> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
>> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
>> >> >> > +                [(match_operand:ALLI 1 "register_operand")
>> >> >> > +                 (match_operand:ALLI 2
>> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
>> >> >> > +               (label_ref (match_operand 3 "" ""))
>> >> >> > +               (pc)))]
>> >> >> > +  "optimize > 0"
>> >> >>
>> >> >> Why's the pattern conditional on optimize?  Seems a valid choice
>> >> >> at -O0
>> >> too.
>> >> >>
>> >> >
>> >> > Hi,
>> >> >
>> >> > I had explained the reason why in the original patch, just didn't
>> >> > repeat it in
>> >> the ping:
>> >> >
>> >> > Instead of emitting the instruction directly I've chosen to expand
>> >> > the pattern using a zero extract and generating the existing
>> >> > pattern for comparisons for two
>> >> > reasons:
>> >> >
>> >> >   1. Allows for CSE of the actual comparison.
>> >> >   2. It looks like the code in expand makes the label as unused and
>> >> > removed
>> >> it
>> >> >      if it doesn't see a separate reference to it.
>> >> >
>> >> > Because of this expansion though I disable the pattern at -O0 since
>> >> > we
>> >> have no combine in that case so we'd end up with worse code.  I did
>> >> try emitting the pattern directly, but as mentioned in no#2 expand
>> >> would then kill the label.
>> >> >
>> >> > Basically I emit the pattern directly, immediately during expand
>> >> > the label is
>> >> marked as dead for some weird reason.
>> >>
>> >> Isn't #2 a bug though?  It seems like something we should fix rather
>> >> than work around.
>> >
>> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
>> > split the optabs still? Isn't the problem atm that I need the split?
>> > If I'm emitting the instruction directly then the recog pattern for it
>> > can just be (eq (vec_extract x 1) 0) which is the correct semantics?
>> 
>> What rtx does the code that uses the optab pass for operand 0?
>
> It gets passed the full comparison:
>
> (eq (reg/v:SI 92 [ x ])
>     (const_int 0 [0]))
>
> of which we only look at the operator.

OK, that's what I thought.  The problem is then the one I mentioned above.
This rtx doesn't describe the operation that the optab is supposed to
perform, so it can never be used in the instruction pattern.  (This is
different from something like cbranch, where operand 0 can be used directly
if the target supports a very general compare-and-branch instruction.)

If we want to use a single optab, the code that generates the optab should
pass something like:

  (eq/ne (zero_extract op0 (const_int 1) op1) (const_int 0))

as operand 0, so that operand 0 specifies the real test condition.

Thanks,
Richard
  
Tamar Christina Nov. 15, 2022, 11:39 a.m. UTC | #9
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 11:34 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> Sent: Tuesday, November 15, 2022 11:15 AM
> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> >> <Marcus.Shawcroft@arm.com>
> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >>
> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> -----Original Message-----
> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> Sent: Tuesday, November 15, 2022 10:51 AM
> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> Shawcroft
> >> >> <Marcus.Shawcroft@arm.com>
> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >>
> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> -----Original Message-----
> >> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
> >> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> >> Shawcroft
> >> >> >> <Marcus.Shawcroft@arm.com>
> >> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >> >>
> >> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> > Hello,
> >> >> >> >
> >> >> >> > Ping and updated patch.
> >> >> >> >
> >> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no
> issues.
> >> >> >> >
> >> >> >> > Ok for master?
> >> >> >> >
> >> >> >> > Thanks,
> >> >> >> > Tamar
> >> >> >> >
> >> >> >> > gcc/ChangeLog:
> >> >> >> >
> >> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1):
> >> >> >> > Rename
> >> to...
> >> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >> >> >> >         (tbranch<mode>4): New.
> >> >> >> >
> >> >> >> > gcc/testsuite/ChangeLog:
> >> >> >> >
> >> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
> >> >> >> >
> >> >> >> > --- inline copy of patch ---
> >> >> >> >
> >> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
> >> >> >> > b/gcc/config/aarch64/aarch64.md index
> >> >> >> >
> >> >> >>
> >> >>
> >>
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> >> >> >> 71
> >> >> >> > 2bde55c7c72e 100644
> >> >> >> > --- a/gcc/config/aarch64/aarch64.md
> >> >> >> > +++ b/gcc/config/aarch64/aarch64.md
> >> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >> >> >> >                       (const_int 1)))]
> >> >> >> >  )
> >> >> >> >
> >> >> >> > -(define_insn "*tb<optab><mode>1"
> >> >> >> > +(define_expand "tbranch<mode>4"
> >> >> >> >    [(set (pc) (if_then_else
> >> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
> >> "register_operand"
> >> >> >> "r")
> >> >> >> > -                                   (const_int 1)
> >> >> >> > -                                   (match_operand 1
> >> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> >> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
> >> >> >> > +                [(match_operand:ALLI 1 "register_operand")
> >> >> >> > +                 (match_operand:ALLI 2
> >> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
> >> >> >> > +               (label_ref (match_operand 3 "" ""))
> >> >> >> > +               (pc)))]
> >> >> >> > +  "optimize > 0"
> >> >> >>
> >> >> >> Why's the pattern conditional on optimize?  Seems a valid
> >> >> >> choice at -O0
> >> >> too.
> >> >> >>
> >> >> >
> >> >> > Hi,
> >> >> >
> >> >> > I had explained the reason why in the original patch, just
> >> >> > didn't repeat it in
> >> >> the ping:
> >> >> >
> >> >> > Instead of emitting the instruction directly I've chosen to
> >> >> > expand the pattern using a zero extract and generating the
> >> >> > existing pattern for comparisons for two
> >> >> > reasons:
> >> >> >
> >> >> >   1. Allows for CSE of the actual comparison.
> >> >> >   2. It looks like the code in expand makes the label as unused
> >> >> > and removed
> >> >> it
> >> >> >      if it doesn't see a separate reference to it.
> >> >> >
> >> >> > Because of this expansion though I disable the pattern at -O0
> >> >> > since we
> >> >> have no combine in that case so we'd end up with worse code.  I
> >> >> did try emitting the pattern directly, but as mentioned in no#2
> >> >> expand would then kill the label.
> >> >> >
> >> >> > Basically I emit the pattern directly, immediately during expand
> >> >> > the label is
> >> >> marked as dead for some weird reason.
> >> >>
> >> >> Isn't #2 a bug though?  It seems like something we should fix
> >> >> rather than work around.
> >> >
> >> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
> >> > split the optabs still? Isn't the problem atm that I need the split?
> >> > If I'm emitting the instruction directly then the recog pattern for
> >> > it can just be (eq (vec_extract x 1) 0) which is the correct semantics?
> >>
> >> What rtx does the code that uses the optab pass for operand 0?
> >
> > It gets passed the full comparison:
> >
> > (eq (reg/v:SI 92 [ x ])
> >     (const_int 0 [0]))
> >
> > of which we only look at the operator.
> 
> OK, that's what I thought.  The problem is then the one I mentioned above.
> This rtx doesn't describe the operation that the optab is supposed to
> perform, so it can never be used in the instruction pattern.  (This is different
> from something like cbranch, where operand 0 can be used directly if the
> target supports a very general compare-and-branch instruction.)
> 
> If we want to use a single optab, the code that generates the optab should
> pass something like:
> 
>   (eq/ne (zero_extract op0 (const_int 1) op1) (const_int 0))
> 
> as operand 0, so that operand 0 specifies the real test condition.

Ok, I guess you're worried about the generic case as another target could
could use operand0 as is rather than looking at the operator only like we do.

I think I rather change the RTX expression, as I do so anyway to add the pos.
This way I avoid another back and forth about the generic optab in the mid-end..

So I'll change the RTX, thanks!

> 
> Thanks,
> Richard
  
Tamar Christina Nov. 22, 2022, 1:48 p.m. UTC | #10
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, November 15, 2022 11:34 AM
> To: Tamar Christina <Tamar.Christina@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> <Marcus.Shawcroft@arm.com>
> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> 
> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> Sent: Tuesday, November 15, 2022 11:15 AM
> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
> >> <Marcus.Shawcroft@arm.com>
> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >>
> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> -----Original Message-----
> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> Sent: Tuesday, November 15, 2022 10:51 AM
> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> Shawcroft
> >> >> <Marcus.Shawcroft@arm.com>
> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >>
> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> -----Original Message-----
> >> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
> >> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
> >> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
> >> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> >> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
> >> Shawcroft
> >> >> >> <Marcus.Shawcroft@arm.com>
> >> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
> >> >> >>
> >> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
> >> >> >> > Hello,
> >> >> >> >
> >> >> >> > Ping and updated patch.
> >> >> >> >
> >> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no
> issues.
> >> >> >> >
> >> >> >> > Ok for master?
> >> >> >> >
> >> >> >> > Thanks,
> >> >> >> > Tamar
> >> >> >> >
> >> >> >> > gcc/ChangeLog:
> >> >> >> >
> >> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1):
> >> >> >> > Rename
> >> to...
> >> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> >> >> >> >         (tbranch<mode>4): New.
> >> >> >> >
> >> >> >> > gcc/testsuite/ChangeLog:
> >> >> >> >
> >> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
> >> >> >> >
> >> >> >> > --- inline copy of patch ---
> >> >> >> >
> >> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
> >> >> >> > b/gcc/config/aarch64/aarch64.md index
> >> >> >> >
> >> >> >>
> >> >>
> >>
> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
> >> >> >> 71
> >> >> >> > 2bde55c7c72e 100644
> >> >> >> > --- a/gcc/config/aarch64/aarch64.md
> >> >> >> > +++ b/gcc/config/aarch64/aarch64.md
> >> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
> >> >> >> >                       (const_int 1)))]
> >> >> >> >  )
> >> >> >> >
> >> >> >> > -(define_insn "*tb<optab><mode>1"
> >> >> >> > +(define_expand "tbranch<mode>4"
> >> >> >> >    [(set (pc) (if_then_else
> >> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
> >> "register_operand"
> >> >> >> "r")
> >> >> >> > -                                   (const_int 1)
> >> >> >> > -                                   (match_operand 1
> >> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
> >> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
> >> >> >> > +                [(match_operand:ALLI 1 "register_operand")
> >> >> >> > +                 (match_operand:ALLI 2
> >> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
> >> >> >> > +               (label_ref (match_operand 3 "" ""))
> >> >> >> > +               (pc)))]
> >> >> >> > +  "optimize > 0"
> >> >> >>
> >> >> >> Why's the pattern conditional on optimize?  Seems a valid
> >> >> >> choice at -O0
> >> >> too.
> >> >> >>
> >> >> >
> >> >> > Hi,
> >> >> >
> >> >> > I had explained the reason why in the original patch, just
> >> >> > didn't repeat it in
> >> >> the ping:
> >> >> >
> >> >> > Instead of emitting the instruction directly I've chosen to
> >> >> > expand the pattern using a zero extract and generating the
> >> >> > existing pattern for comparisons for two
> >> >> > reasons:
> >> >> >
> >> >> >   1. Allows for CSE of the actual comparison.
> >> >> >   2. It looks like the code in expand makes the label as unused
> >> >> > and removed
> >> >> it
> >> >> >      if it doesn't see a separate reference to it.
> >> >> >
> >> >> > Because of this expansion though I disable the pattern at -O0
> >> >> > since we
> >> >> have no combine in that case so we'd end up with worse code.  I
> >> >> did try emitting the pattern directly, but as mentioned in no#2
> >> >> expand would then kill the label.
> >> >> >
> >> >> > Basically I emit the pattern directly, immediately during expand
> >> >> > the label is
> >> >> marked as dead for some weird reason.
> >> >>
> >> >> Isn't #2 a bug though?  It seems like something we should fix
> >> >> rather than work around.
> >> >
> >> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
> >> > split the optabs still? Isn't the problem atm that I need the split?
> >> > If I'm emitting the instruction directly then the recog pattern for
> >> > it can just be (eq (vec_extract x 1) 0) which is the correct semantics?
> >>
> >> What rtx does the code that uses the optab pass for operand 0?
> >
> > It gets passed the full comparison:
> >
> > (eq (reg/v:SI 92 [ x ])
> >     (const_int 0 [0]))
> >
> > of which we only look at the operator.
> 
> OK, that's what I thought.  The problem is then the one I mentioned above.
> This rtx doesn't describe the operation that the optab is supposed to
> perform, so it can never be used in the instruction pattern.  (This is different
> from something like cbranch, where operand 0 can be used directly if the
> target supports a very general compare-and-branch instruction.)

So I was wrong before about which RTL it gets passed.  Deep in the expansion
Code the rtl operation 

(eq (reg/v:SI 92 [ x ])
      (const_int 0 [0]))

Gets broken up and passed piecewise.

First thing it does it explicitly check that the first argument in RTL is an operator:

gcc_assert (insn_operand_matches (icode, 0, test));

and then the jump is emitted by breaking apart the rtl into it's operands:

4646      insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
4647                          XEXP (test, 1), label));

And so the operands are:

>>> p debug (operand0)
(reg/v:SI 92 [ xD.4391 ])

>>> p debug (operand1)
(const_int 0 [0])

>>> p debug (operand2)
(code_label 0 0 0 2 (nil) [0 uses])

And targets never get to see the equality check.  If the documentation of the optab is
Updated to say that the target operand1 is to be used in a zero_extract with operand0
and compared with 0 then that should be fine no?  that's the semantic of the optab itself.

Based on that I don't think we need to split this optab do we?  Just update the docs to
clarify the zero extract semantics?

Thanks,
Tamar

> 
> If we want to use a single optab, the code that generates the optab should
> pass something like:
> 
>   (eq/ne (zero_extract op0 (const_int 1) op1) (const_int 0))
> 
> as operand 0, so that operand 0 specifies the real test condition.
> 
> Thanks,
> Richard
  
Richard Sandiford Nov. 22, 2022, 2 p.m. UTC | #11
Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Tuesday, November 15, 2022 11:34 AM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> <Marcus.Shawcroft@arm.com>
>> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> 
>> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> -----Original Message-----
>> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> Sent: Tuesday, November 15, 2022 11:15 AM
>> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus Shawcroft
>> >> <Marcus.Shawcroft@arm.com>
>> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >>
>> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> -----Original Message-----
>> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> >> Sent: Tuesday, November 15, 2022 10:51 AM
>> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
>> Shawcroft
>> >> >> <Marcus.Shawcroft@arm.com>
>> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >> >>
>> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> >> -----Original Message-----
>> >> >> >> From: Richard Sandiford <richard.sandiford@arm.com>
>> >> >> >> Sent: Tuesday, November 15, 2022 10:36 AM
>> >> >> >> To: Tamar Christina <Tamar.Christina@arm.com>
>> >> >> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
>> >> >> >> <Richard.Earnshaw@arm.com>; nd <nd@arm.com>; Marcus
>> >> Shawcroft
>> >> >> >> <Marcus.Shawcroft@arm.com>
>> >> >> >> Subject: Re: [PATCH 2/2]AArch64 Support new tbranch optab.
>> >> >> >>
>> >> >> >> Tamar Christina <Tamar.Christina@arm.com> writes:
>> >> >> >> > Hello,
>> >> >> >> >
>> >> >> >> > Ping and updated patch.
>> >> >> >> >
>> >> >> >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no
>> issues.
>> >> >> >> >
>> >> >> >> > Ok for master?
>> >> >> >> >
>> >> >> >> > Thanks,
>> >> >> >> > Tamar
>> >> >> >> >
>> >> >> >> > gcc/ChangeLog:
>> >> >> >> >
>> >> >> >> >         * config/aarch64/aarch64.md (*tb<optab><mode>1):
>> >> >> >> > Rename
>> >> to...
>> >> >> >> >         (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
>> >> >> >> >         (tbranch<mode>4): New.
>> >> >> >> >
>> >> >> >> > gcc/testsuite/ChangeLog:
>> >> >> >> >
>> >> >> >> >         * gcc.target/aarch64/tbz_1.c: New test.
>> >> >> >> >
>> >> >> >> > --- inline copy of patch ---
>> >> >> >> >
>> >> >> >> > diff --git a/gcc/config/aarch64/aarch64.md
>> >> >> >> > b/gcc/config/aarch64/aarch64.md index
>> >> >> >> >
>> >> >> >>
>> >> >>
>> >>
>> 2bc2684b82c35a44e0a2cea6e3aaf32d939f8cdf..d7684c93fba5b717d568e1a4fd
>> >> >> >> 71
>> >> >> >> > 2bde55c7c72e 100644
>> >> >> >> > --- a/gcc/config/aarch64/aarch64.md
>> >> >> >> > +++ b/gcc/config/aarch64/aarch64.md
>> >> >> >> > @@ -943,12 +943,29 @@ (define_insn "*cb<optab><mode>1"
>> >> >> >> >                       (const_int 1)))]
>> >> >> >> >  )
>> >> >> >> >
>> >> >> >> > -(define_insn "*tb<optab><mode>1"
>> >> >> >> > +(define_expand "tbranch<mode>4"
>> >> >> >> >    [(set (pc) (if_then_else
>> >> >> >> > -             (EQL (zero_extract:DI (match_operand:GPI 0
>> >> "register_operand"
>> >> >> >> "r")
>> >> >> >> > -                                   (const_int 1)
>> >> >> >> > -                                   (match_operand 1
>> >> >> >> > -                                     "aarch64_simd_shift_imm_<mode>" "n"))
>> >> >> >> > +               (match_operator 0 "aarch64_comparison_operator"
>> >> >> >> > +                [(match_operand:ALLI 1 "register_operand")
>> >> >> >> > +                 (match_operand:ALLI 2
>> >> >> >> "aarch64_simd_shift_imm_<ALLI:mode>")])
>> >> >> >> > +               (label_ref (match_operand 3 "" ""))
>> >> >> >> > +               (pc)))]
>> >> >> >> > +  "optimize > 0"
>> >> >> >>
>> >> >> >> Why's the pattern conditional on optimize?  Seems a valid
>> >> >> >> choice at -O0
>> >> >> too.
>> >> >> >>
>> >> >> >
>> >> >> > Hi,
>> >> >> >
>> >> >> > I had explained the reason why in the original patch, just
>> >> >> > didn't repeat it in
>> >> >> the ping:
>> >> >> >
>> >> >> > Instead of emitting the instruction directly I've chosen to
>> >> >> > expand the pattern using a zero extract and generating the
>> >> >> > existing pattern for comparisons for two
>> >> >> > reasons:
>> >> >> >
>> >> >> >   1. Allows for CSE of the actual comparison.
>> >> >> >   2. It looks like the code in expand makes the label as unused
>> >> >> > and removed
>> >> >> it
>> >> >> >      if it doesn't see a separate reference to it.
>> >> >> >
>> >> >> > Because of this expansion though I disable the pattern at -O0
>> >> >> > since we
>> >> >> have no combine in that case so we'd end up with worse code.  I
>> >> >> did try emitting the pattern directly, but as mentioned in no#2
>> >> >> expand would then kill the label.
>> >> >> >
>> >> >> > Basically I emit the pattern directly, immediately during expand
>> >> >> > the label is
>> >> >> marked as dead for some weird reason.
>> >> >>
>> >> >> Isn't #2 a bug though?  It seems like something we should fix
>> >> >> rather than work around.
>> >> >
>> >> > Yes it's a bug ☹ ok if I'm going to fix that bug then do I need to
>> >> > split the optabs still? Isn't the problem atm that I need the split?
>> >> > If I'm emitting the instruction directly then the recog pattern for
>> >> > it can just be (eq (vec_extract x 1) 0) which is the correct semantics?
>> >>
>> >> What rtx does the code that uses the optab pass for operand 0?
>> >
>> > It gets passed the full comparison:
>> >
>> > (eq (reg/v:SI 92 [ x ])
>> >     (const_int 0 [0]))
>> >
>> > of which we only look at the operator.
>> 
>> OK, that's what I thought.  The problem is then the one I mentioned above.
>> This rtx doesn't describe the operation that the optab is supposed to
>> perform, so it can never be used in the instruction pattern.  (This is different
>> from something like cbranch, where operand 0 can be used directly if the
>> target supports a very general compare-and-branch instruction.)
>
> So I was wrong before about which RTL it gets passed.  Deep in the expansion
> Code the rtl operation 
>
> (eq (reg/v:SI 92 [ x ])
>       (const_int 0 [0]))
>
> Gets broken up and passed piecewise.
>
> First thing it does it explicitly check that the first argument in RTL is an operator:
>
> gcc_assert (insn_operand_matches (icode, 0, test));
>
> and then the jump is emitted by breaking apart the rtl into it's operands:
>
> 4646      insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> 4647                          XEXP (test, 1), label));

Yeah, but the question was what the code that generates the tbranch
optab passes for operand 0 ("test" in the call above).  And like you
said, it's the EQ rtx above, with XEXPs 0 and 1 being passed as operands
1 and 2.  I think the point still stands that that EQ rtx doesn't
describe the correct operation.

> And so the operands are:
>
>>>> p debug (operand0)
> (reg/v:SI 92 [ xD.4391 ])
>
>>>> p debug (operand1)
> (const_int 0 [0])
>
>>>> p debug (operand2)
> (code_label 0 0 0 2 (nil) [0 uses])
>
> And targets never get to see the equality check.

But the .md pattern was:

(define_expand "tbranch<mode>4"
  [(set (pc) (if_then_else
		(match_operator 0 "aarch64_comparison_operator"
		 [(match_operand:ALLI 1 "register_operand")
		  (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
		(label_ref (match_operand 3 "" ""))
		(pc)))]
  "optimize > 0"
{
  rtx bitvalue = gen_reg_rtx (DImode);
  rtx tmp = simplify_gen_subreg (DImode, operands[1], GET_MODE (operands[1]), 0);
  emit_insn (gen_extzv (bitvalue, tmp, const1_rtx, operands[2]));
  operands[2] = const0_rtx;
  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
					 operands[2]);
})

where the EQ/NE rtx is passed and matched as operand 0.

> If the documentation of the optab is
> Updated to say that the target operand1 is to be used in a zero_extract with operand0
> and compared with 0 then that should be fine no?  that's the semantic of the optab itself.
>
> Based on that I don't think we need to split this optab do we?  Just update the docs to
> clarify the zero extract semantics?

Well, the point of...

>> If we want to use a single optab, the code that generates the optab should
>> pass something like:
>> 
>>   (eq/ne (zero_extract op0 (const_int 1) op1) (const_int 0))
>> 
>> as operand 0, so that operand 0 specifies the real test condition.
>> 
>> Thanks,
>> Richard

...was that we should either (a) split the optab or (b) keep the single
optab and pass a "proper" description of the operation as operand 0.

Thanks,
Richard
  
Tamar Christina Dec. 1, 2022, 4:44 p.m. UTC | #12
Hi,

I hadn't received any reply so I had implemented various ways to do this (about 8 of them in fact).

The conclusion is that no, we cannot emit one big RTL for the final instruction immediately.
The reason that all comparisons in the AArch64 backend expand to separate CC compares, and
separate testing of the operands is for ifcvt.

The separate CC compare is needed so ifcvt can produce csel, cset etc from the compares.  Unlike
say combine, ifcvt can not do recog on a parallel with a clobber.  Should we emit the instruction
directly then ifcvt will not be able to say, make a csel, because we have no patterns which handle
zero_extract and compare. (unlike combine ifcvt cannot transform the extract into an AND).

While you could provide various patterns for this (and I did try) you end up with broken patterns
because you can't add the clobber to the CC register.  If you do, ifcvt recog fails.

i.e.

int
f1 (int x)
{
  if (x & 1)
    return 1;
  return x;
}

We lose csel here.

Secondly the reason the compare with an explicit CC mode is needed is so that ifcvt can transform
the operation into a version that doesn't require the flags to be set.  But it only does so if it know
the explicit usage of the CC reg.

For instance 

int
foo (int a, int b)
{
  return ((a & (1 << 25)) ? 5 : 4);
}

Doesn't require a comparison, the optimal form is:

foo(int, int):
        ubfx    x0, x0, 25, 1
        add     w0, w0, 4
        ret

and no compare is actually needed.  If you represent the instruction using an ANDS instead of a zero_extract
then you get close, but you end up with an ands followed by an add, which is a slower operation.

These two reasons are the main reasons why all comparisons in AArch64 expand the way they do, so tbranch
Shouldn't do anything differently here.  Additionally the reason for the optab was to pass range information
to the backend during expansion.

In this version however I have represented the expand using an ANDS instead.  This allows us not to regress
on -O0 as the previous version did.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Note that this patch relies on https://patchwork.sourceware.org/project/gcc/patch/Y1+4qItMrQHbdqqD@arm.com/ 
which has yet to be reviewed but which cleans up extensions so they can be used like this.

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
	(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
	(tbranch_<code><mode>4): New.
	(zero_extend<SI_ONLY:mode><SD_HSDI:mode>2,
	zero_extend<HI_ONLY:mode><SD_HSDI:mode>2,
	zero_extend<QI_ONLY:mode><SD_HSDI:mode>2): Make dynamic calls with @.
	* config/aarch64/iterators.md(ZEROM, zerom): New.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/tbz_1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4c181a96e555c2a58c59fc991000b2a2fa9bd244..7ee1d01e050004e42cd2d0049f0200da71d918bb 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -946,12 +946,33 @@ (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch_<code><mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+              (EQL (match_operand:ALLI 0 "register_operand")
+                   (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
+              (label_ref (match_operand 2 ""))
+              (pc)))]
+  ""
+{
+  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
+  rtx reg = gen_reg_rtx (<ZEROM>mode);
+  if (<MODE>mode == <ZEROM>mode)
+    reg = operands[0];
+  else
+    emit_insn (gen_zero_extend2 (<MODE>mode, <ZEROM>mode, reg, operands[0]));
+  rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
+  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
+  operands[1] = const0_rtx;
+  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
+					 operands[1]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -962,15 +983,15 @@ (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -1962,7 +1983,7 @@ (define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp")]
 )
 
-(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
         (zero_extend:SD_HSDI
 	  (match_operand:SI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
@@ -1978,7 +1999,7 @@ (define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp,fp,fp,fp")]
 )
 
-(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
         (zero_extend:SD_HSDI
 	  (match_operand:HI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
@@ -1994,7 +2015,7 @@ (define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
 )
 
-(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
         (zero_extend:SD_HSDI
 	  (match_operand:QI_ONLY 1 "nonimmediate_operand" "r,m,m,w,w")))]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index df72c079f218db9727a96924cab496e91ce6df59..816e44753fb9f6245f3abdb6d3e689a36986ac99 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1107,6 +1107,8 @@ (define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI "s") (DI "d")])
 ;; Give the length suffix letter for a sign- or zero-extension.
 (define_mode_attr size [(QI "b") (HI "h") (SI "w")])
 (define_mode_attr sizel [(QI "b") (HI "h") (SI "")])
+(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
+(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
 
 ;; Give the number of bits in the mode
 (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..39deb58e278e2180ab270b5a999cac62cb17c682
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	w[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	w[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}
  
Richard Sandiford Dec. 5, 2022, 2:06 p.m. UTC | #13
Tamar Christina <Tamar.Christina@arm.com> writes:
> Hi,
>
> I hadn't received any reply so I had implemented various ways to do this (about 8 of them in fact).
>
> The conclusion is that no, we cannot emit one big RTL for the final instruction immediately.
> The reason that all comparisons in the AArch64 backend expand to separate CC compares, and
> separate testing of the operands is for ifcvt.
>
> The separate CC compare is needed so ifcvt can produce csel, cset etc from the compares.  Unlike
> say combine, ifcvt can not do recog on a parallel with a clobber.  Should we emit the instruction
> directly then ifcvt will not be able to say, make a csel, because we have no patterns which handle
> zero_extract and compare. (unlike combine ifcvt cannot transform the extract into an AND).
>
> While you could provide various patterns for this (and I did try) you end up with broken patterns
> because you can't add the clobber to the CC register.  If you do, ifcvt recog fails.
>
> i.e.
>
> int
> f1 (int x)
> {
>   if (x & 1)
>     return 1;
>   return x;
> }
>
> We lose csel here.
>
> Secondly the reason the compare with an explicit CC mode is needed is so that ifcvt can transform
> the operation into a version that doesn't require the flags to be set.  But it only does so if it know
> the explicit usage of the CC reg.
>
> For instance 
>
> int
> foo (int a, int b)
> {
>   return ((a & (1 << 25)) ? 5 : 4);
> }
>
> Doesn't require a comparison, the optimal form is:
>
> foo(int, int):
>         ubfx    x0, x0, 25, 1
>         add     w0, w0, 4
>         ret
>
> and no compare is actually needed.  If you represent the instruction using an ANDS instead of a zero_extract
> then you get close, but you end up with an ands followed by an add, which is a slower operation.
>
> These two reasons are the main reasons why all comparisons in AArch64 expand the way they do, so tbranch
> Shouldn't do anything differently here.

Thanks for the (useful) investigation.  Makes sense.

> Additionally the reason for the optab was to pass range information
> to the backend during expansion.

Yeah.  But I think the fundamental reason that AArch64 defines the
optab is still that it has native support for the associated operation
(which is a good thing, an honest reason).  The fact that we split it
apart for if-conversion---in a different form from normal comparisons---
is an implementation detail.  So it still seems like a proper optab,
rather than a crutch to convey tree info.

> In this version however I have represented the expand using an ANDS instead.  This allows us not to regress
> on -O0 as the previous version did.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Note that this patch relies on https://patchwork.sourceware.org/project/gcc/patch/Y1+4qItMrQHbdqqD@arm.com/ 
> which has yet to be reviewed but which cleans up extensions so they can be used like this.
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
> 	(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
> 	(tbranch_<code><mode>4): New.
> 	(zero_extend<SI_ONLY:mode><SD_HSDI:mode>2,
> 	zero_extend<HI_ONLY:mode><SD_HSDI:mode>2,
> 	zero_extend<QI_ONLY:mode><SD_HSDI:mode>2): Make dynamic calls with @.
> 	* config/aarch64/iterators.md(ZEROM, zerom): New.
>
> gcc/testsuite/ChangeLog:
>
> 	* gcc.target/aarch64/tbz_1.c: New test.
>
> --- inline copy of patch ---
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 4c181a96e555c2a58c59fc991000b2a2fa9bd244..7ee1d01e050004e42cd2d0049f0200da71d918bb 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -946,12 +946,33 @@ (define_insn "*cb<optab><mode>1"
>  		      (const_int 1)))]
>  )
>  
> -(define_insn "*tb<optab><mode>1"
> +(define_expand "tbranch_<code><mode>4"
>    [(set (pc) (if_then_else
> -	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
> -				    (const_int 1)
> -				    (match_operand 1
> -				      "aarch64_simd_shift_imm_<mode>" "n"))
> +              (EQL (match_operand:ALLI 0 "register_operand")
> +                   (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
> +              (label_ref (match_operand 2 ""))
> +              (pc)))]
> +  ""
> +{
> +  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
> +  rtx reg = gen_reg_rtx (<ZEROM>mode);
> +  if (<MODE>mode == <ZEROM>mode)
> +    reg = operands[0];
> +  else
> +    emit_insn (gen_zero_extend2 (<MODE>mode, <ZEROM>mode, reg, operands[0]));

I think the last five lines should just be:

  rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);

using paradoxical subregs for the QI and HI cases.  Using subregs should
generate better code, since the temporary runs the risk of having the
same value live in two different pseudos at the same time (one pseudo
with the original mode, one pseudo with the extended mode).

OK with that change and without the changes to the zero_extend pattern names.

Thanks,
Richard

> +  rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
> +  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
> +  operands[1] = const0_rtx;
> +  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
> +					 operands[1]);
> +})
> +
> +(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
> +  [(set (pc) (if_then_else
> +	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
> +				     (const_int 1)
> +				     (match_operand 1
> +				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
>  		   (const_int 0))
>  	     (label_ref (match_operand 2 "" ""))
>  	     (pc)))
> @@ -962,15 +983,15 @@ (define_insn "*tb<optab><mode>1"
>        {
>  	if (get_attr_far_branch (insn) == 1)
>  	  return aarch64_gen_far_branch (operands, 2, "Ltb",
> -					 "<inv_tb>\\t%<w>0, %1, ");
> +					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
>  	else
>  	  {
>  	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
> -	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
> +	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
>  	  }
>        }
>      else
> -      return "<tbz>\t%<w>0, %1, %l2";
> +      return "<tbz>\t%<ALLI:w>0, %1, %l2";
>    }
>    [(set_attr "type" "branch")
>     (set (attr "length")
> @@ -1962,7 +1983,7 @@ (define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
>     (set_attr "arch" "*,*,fp")]
>  )
>  
> -(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
> +(define_insn "@zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
>    [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
>          (zero_extend:SD_HSDI
>  	  (match_operand:SI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
> @@ -1978,7 +1999,7 @@ (define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
>     (set_attr "arch" "*,*,fp,fp,fp,fp")]
>  )
>  
> -(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
> +(define_insn "@zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
>    [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
>          (zero_extend:SD_HSDI
>  	  (match_operand:HI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
> @@ -1994,7 +2015,7 @@ (define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
>     (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
>  )
>  
> -(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
> +(define_insn "@zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
>    [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
>          (zero_extend:SD_HSDI
>  	  (match_operand:QI_ONLY 1 "nonimmediate_operand" "r,m,m,w,w")))]
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index df72c079f218db9727a96924cab496e91ce6df59..816e44753fb9f6245f3abdb6d3e689a36986ac99 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1107,6 +1107,8 @@ (define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI "s") (DI "d")])
>  ;; Give the length suffix letter for a sign- or zero-extension.
>  (define_mode_attr size [(QI "b") (HI "h") (SI "w")])
>  (define_mode_attr sizel [(QI "b") (HI "h") (SI "")])
> +(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
> +(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
>  
>  ;; Give the number of bits in the mode
>  (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
> diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..39deb58e278e2180ab270b5a999cac62cb17c682
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
> @@ -0,0 +1,95 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
> +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
> +
> +#include <stdbool.h>
> +
> +void h(void);
> +
> +/*
> +** g1:
> +** 	tbnz	w[0-9]+, #?0, .L([0-9]+)
> +** 	ret
> +**	...
> +*/
> +void g1(bool x)
> +{
> +  if (__builtin_expect (x, 0))
> +    h ();
> +}
> +
> +/*
> +** g2:
> +** 	tbz	w[0-9]+, #?0, .L([0-9]+)
> +** 	b	h
> +**	...
> +*/
> +void g2(bool x)
> +{
> +  if (__builtin_expect (x, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_ge:
> +** 	tbnz	w[0-9]+, #?31, .L[0-9]+
> +** 	b	h
> +**	...
> +*/
> +void g3_ge(int x)
> +{
> +  if (__builtin_expect (x >= 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_gt:
> +** 	cmp	w[0-9]+, 0
> +** 	ble	.L[0-9]+
> +** 	b	h
> +**	...
> +*/
> +void g3_gt(int x)
> +{
> +  if (__builtin_expect (x > 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_lt:
> +** 	tbz	w[0-9]+, #?31, .L[0-9]+
> +** 	b	h
> +**	...
> +*/
> +void g3_lt(int x)
> +{
> +  if (__builtin_expect (x < 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g3_le:
> +** 	cmp	w[0-9]+, 0
> +** 	bgt	.L[0-9]+
> +** 	b	h
> +**	...
> +*/
> +void g3_le(int x)
> +{
> +  if (__builtin_expect (x <= 0, 1))
> +    h ();
> +}
> +
> +/*
> +** g5:
> +** 	mov	w[0-9]+, 65279
> +** 	tst	w[0-9]+, w[0-9]+
> +** 	beq	.L[0-9]+
> +** 	b	h
> +**	...
> +*/ 
> +void g5(int x)
> +{
> +  if (__builtin_expect (x & 0xfeff, 1))
> +    h ();
> +}
  

Patch

--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -943,12 +943,28 @@  (define_insn "*cb<optab><mode>1"
 		      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch<mode>4"
   [(set (pc) (if_then_else
-	      (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-				    (const_int 1)
-				    (match_operand 1
-				      "aarch64_simd_shift_imm_<mode>" "n"))
+		(match_operator 0 "aarch64_comparison_operator"
+		 [(match_operand:ALLI 1 "register_operand")
+		  (match_operand:ALLI 2 "aarch64_simd_shift_imm_<ALLI:mode>")])
+		(label_ref (match_operand 3 "" ""))
+		(pc)))]
+  "optimize > 0"
+{
+  rtx bitvalue = gen_reg_rtx (DImode);
+  emit_insn (gen_extzv (bitvalue, operands[1], const1_rtx, operands[2]));
+  operands[2] = const0_rtx;
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), bitvalue,
+					 operands[2]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+				     (const_int 1)
+				     (match_operand 1
+				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
 		   (const_int 0))
 	     (label_ref (match_operand 2 "" ""))
 	     (pc)))
@@ -959,15 +975,15 @@  (define_insn "*tb<optab><mode>1"
       {
 	if (get_attr_far_branch (insn) == 1)
 	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<w>0, %1, ");
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
 	else
 	  {
 	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<w>0, %1\;<bcond>\t%l2";
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
 	  }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -5752,39 +5768,19 @@  (define_expand "<optab>"
 )
 
 
-(define_insn "*<optab><mode>"
+(define_insn "*<optab><GPI:mode><ALLI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(ANY_EXTRACT:GPI (match_operand:GPI 1 "register_operand" "r")
+	(ANY_EXTRACT:GPI (match_operand:ALLI 1 "register_operand" "r")
 			 (match_operand 2
-			   "aarch64_simd_shift_imm_offset_<mode>" "n")
+			   "aarch64_simd_shift_imm_offset_<ALLI:mode>" "n")
 			 (match_operand 3
-			   "aarch64_simd_shift_imm_<mode>" "n")))]
+			   "aarch64_simd_shift_imm_<ALLI:mode>" "n")))]
   "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]),
-	     1, GET_MODE_BITSIZE (<MODE>mode) - 1)"
-  "<su>bfx\\t%<w>0, %<w>1, %3, %2"
+	     1, GET_MODE_BITSIZE (<ALLI:MODE>mode))"
+  "<su>bfx\\t%<GPI:w>0, %<GPI:w>1, %3, %2"
   [(set_attr "type" "bfx")]
 )
 
-;; When the bit position and width add up to 32 we can use a W-reg LSR
-;; instruction taking advantage of the implicit zero-extension of the X-reg.
-(define_split
-  [(set (match_operand:DI 0 "register_operand")
-	(zero_extract:DI (match_operand:DI 1 "register_operand")
-			 (match_operand 2
-			   "aarch64_simd_shift_imm_offset_di")
-			 (match_operand 3
-			   "aarch64_simd_shift_imm_di")))]
-  "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1,
-	     GET_MODE_BITSIZE (DImode) - 1)
-   && (INTVAL (operands[2]) + INTVAL (operands[3]))
-       == GET_MODE_BITSIZE (SImode)"
-  [(set (match_dup 0)
-	(zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))]
-  {
-    operands[4] = gen_lowpart (SImode, operands[1]);
-  }
-)
-
 ;; Bitfield Insert (insv)
 (define_expand "insv<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..86f5d3e23cf7f1ea6f3596549ce1a0cff6774463
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** 	tbnz	x[0-9]+, #?0, .L([0-9]+)
+** 	ret
+**	...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+** 	tbz	x[0-9]+, #?0, .L([0-9]+)
+** 	b	h
+**	...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+** 	tbnz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+** 	cmp	w[0-9]+, 0
+** 	ble	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+** 	tbz	w[0-9]+, #?31, .L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+** 	cmp	w[0-9]+, 0
+** 	bgt	.L[0-9]+
+** 	b	h
+**	...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+** 	mov	w[0-9]+, 65279
+** 	tst	w[0-9]+, w[0-9]+
+** 	beq	.L[0-9]+
+** 	b	h
+**	...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+}