[1/2] middle-end: Add new tbranch optab to add support for bit-test-and-branch operations

Message ID patch-16485-tamar@arm.com
State Committed
Headers
Series [1/2] middle-end: Add new tbranch optab to add support for bit-test-and-branch operations |

Commit Message

Tamar Christina Oct. 31, 2022, 11:53 a.m. UTC
  Hi All,

This adds a new test-and-branch optab that can be used to do a conditional test
of a bit and branch.   This is similar to the cbranch optab but instead can
test any arbitrary bit inside the register.

This patch recognizes boolean comparisons and single bit mask tests.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* dojump.cc (do_jump): Pass along value.
	(do_jump_by_parts_greater_rtx): Likewise.
	(do_jump_by_parts_zero_rtx): Likewise.
	(do_jump_by_parts_equality_rtx): Likewise.
	(do_compare_rtx_and_jump): Likewise.
	(do_compare_and_jump): Likewise.
	* dojump.h (do_compare_rtx_and_jump): New.
	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
	(validate_test_and_branch): New.
	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
	supplied then check if it's suitable for tbranch.
	* optabs.def (tbranch$a4): New.
	* doc/md.texi (tbranch@var{mode}4): Document it.
	* optabs.h (emit_cmp_and_jump_insns):
	* tree.h (tree_zero_one_valued_p): New.

--- inline copy of patch -- 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index c08691ab4c9a4bfe55ae81e5e228a414d6242d78..f8b32ec12f46d3fb3815f121a16b5a8a1819b66a 100644




--
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index c08691ab4c9a4bfe55ae81e5e228a414d6242d78..f8b32ec12f46d3fb3815f121a16b5a8a1819b66a 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6972,6 +6972,13 @@ case, you can and should make operand 1's predicate reject some operators
 in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
 from the machine description.
 
+@cindex @code{tbranch@var{mode}4} instruction pattern
+@item @samp{tbranch@var{mode}4}
+Conditional branch instruction combined with a bit test-and-compare
+instruction. Operand 0 is a comparison operator.  Operand 1 is the
+operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
+Operand 3 is the @code{code_label} to jump to.
+
 @cindex @code{cbranch@var{mode}4} instruction pattern
 @item @samp{cbranch@var{mode}4}
 Conditional branch instruction combined with a compare instruction.
diff --git a/gcc/dojump.h b/gcc/dojump.h
index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
--- a/gcc/dojump.h
+++ b/gcc/dojump.h
@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
 extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
 			 profile_probability);
 
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
+				     machine_mode, rtx, rtx_code_label *,
+				     rtx_code_label *, profile_probability);
+
 extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
 				     machine_mode, rtx, rtx_code_label *,
 				     rtx_code_label *, profile_probability);
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
--- a/gcc/dojump.cc
+++ b/gcc/dojump.cc
@@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
 	}
       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
-			       GET_MODE (temp), NULL_RTX,
+			       exp, GET_MODE (temp), NULL_RTX,
 			       if_false_label, if_true_label, prob);
     }
 
@@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 
       /* All but high-order word must be compared as unsigned.  */
       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
-			       word_mode, NULL_RTX, NULL, if_true_label,
+			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
 			       prob);
 
       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
@@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 	break;
 
       /* Consider lower words only if these are equal.  */
-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
-			       NULL_RTX, NULL, if_false_label,
+      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
+			       word_mode, NULL_RTX, NULL, if_false_label,
 			       prob.invert ());
     }
 
@@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   if (part != 0)
     {
-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
+      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
 			       NULL_RTX, if_false_label, if_true_label, prob);
       return;
     }
@@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
+			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             operand_subword_force (op1, i, mode),
-                             EQ, 0, word_mode, NULL_RTX,
+			     operand_subword_force (op1, i, mode),
+			     EQ, 0, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 			 rtx_code_label *if_false_label,
 			 rtx_code_label *if_true_label,
 			 profile_probability prob)
+{
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
+			  if_false_label, if_true_label, prob);
+}
+
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
+   The decision as to signed or unsigned comparison must be made by the caller.
+
+   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
+   compared.  */
+
+void
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+			 tree val, machine_mode mode, rtx size,
+			 rtx_code_label *if_false_label,
+			 rtx_code_label *if_true_label,
+			 profile_probability prob)
 {
   rtx tem;
   rtx_code_label *dummy_label = NULL;
@@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 		    }
 		  else
 		    dest_label = if_false_label;
-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, dest_label, NULL, first_prob);
+
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, dest_label, NULL,
+					   first_prob);
 		}
 	      /* For !and_them we want to split:
 		 if (x) goto t; // prob;
@@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
               else
 		{
 		  profile_probability first_prob = prob.split (cprob);
-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, NULL, if_true_label, first_prob);
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, NULL,
+					   if_true_label, first_prob);
 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
 		    {
 		      /* x != y can be split into x unord y || x ltgt y
@@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 	    }
 	}
 
-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
+      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
 			       if_true_label, prob);
     }
 
@@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       op1 = new_op1;
     }
 
-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
-                           ((mode == BLKmode)
-                            ? expr_size (treeop0) : NULL_RTX),
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
+			   ((mode == BLKmode)
+			    ? expr_size (treeop0) : NULL_RTX),
 			   if_false_label, if_true_label, prob);
 }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index cff37ccb0dfc3dd79b97d0abfd872f340855dc96..5b368f77e91d3fce29870f1a5b54a0301e6b7794 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "libfuncs.h"
 #include "internal-fn.h"
 #include "langhooks.h"
+#include "gimple.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
 				   machine_mode *);
@@ -4620,7 +4621,7 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
 
 static void
 emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-			  profile_probability prob)
+			  direct_optab cmp_optab, profile_probability prob)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4629,7 +4630,7 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
 
   mclass = GET_MODE_CLASS (mode);
   optab_mode = (mclass == MODE_CC) ? CCmode : mode;
-  icode = optab_handler (cbranch_optab, optab_mode);
+  icode = optab_handler (cmp_optab, optab_mode);
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (insn_operand_matches (icode, 0, test));
@@ -4644,6 +4645,71 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
     add_reg_br_prob_note (insn, prob);
 }
 
+/* Check to see if the supplied comparison in PTEST can be performed as a
+   bit-test-and-branch instead.  VAL must contain the original tree
+   expression of the non-zero operand which will be used to rewrite the
+   comparison in PTEST.
+
+   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
+   else FALSE.  */
+
+bool
+static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode)
+{
+  if (!val)
+    return false;
+
+  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
+  rtx test = *ptest;
+
+  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
+    return false;
+
+  if (tree_zero_one_valued_p (val))
+    {
+      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
+      XEXP (test, 1) = gen_int_mode (pos, mode);
+      *ptest = test;
+      *pmode = mode;
+      return true;
+    }
+
+  if (TREE_CODE (val) != SSA_NAME)
+    return false;
+
+  gimple *def = SSA_NAME_DEF_STMT (val);
+  if (!is_gimple_assign (def)
+      || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
+    return false;
+
+  tree cst = gimple_assign_rhs2 (def);
+
+  if (!tree_fits_uhwi_p (cst))
+    return false;
+
+  tree op0 = gimple_assign_rhs1 (def);
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      def = SSA_NAME_DEF_STMT (op0);
+      if (gimple_assign_cast_p (def))
+	op0 = gimple_assign_rhs1 (def);
+    }
+
+  wide_int wcst = wi::uhwi (tree_to_uhwi (cst),
+			    TYPE_PRECISION (TREE_TYPE (op0)));
+  int bitpos;
+
+  if ((bitpos = wi::exact_log2 (wcst)) == -1)
+    return false;
+
+  mode = TYPE_MODE (TREE_TYPE (op0));
+  auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
+  XEXP (test, 1) = gen_int_mode (pos, mode);
+  *ptest = test;
+  *pmode = mode;
+  return true;
+}
+
 /* Generate code to compare X with Y so that the condition codes are
    set and to jump to LABEL if the condition is true.  If X is a
    constant and Y is not a constant, then the comparison is swapped to
@@ -4661,15 +4727,18 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
    It will be potentially converted into an unsigned variant based on
    UNSIGNEDP to select a proper jump instruction.
    
-   PROB is the probability of jumping to LABEL.  */
+   PROB is the probability of jumping to LABEL.  If the comparison is against
+   zero then VAL contains the expression from which the non-zero RTL is
+   derived.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 machine_mode mode, int unsignedp, rtx label,
+			 machine_mode mode, int unsignedp, tree val, rtx label,
                          profile_probability prob)
 {
   rtx op0 = x, op1 = y;
   rtx test;
+  enum insn_code icode;
 
   /* Swap operands and condition to ensure canonical RTL.  */
   if (swap_commutative_operands_p (x, y)
@@ -4690,10 +4759,37 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
+
+  /* Check if we're comparing a truth type with 0, and if so check if
+     the target supports tbranch.  */
+  machine_mode tmode = mode;
+  if (op1 == CONST0_RTX (GET_MODE (op1))
+      && validate_test_and_branch (val, &test, &tmode))
+    {
+      /* If the target supports the testbit comparison directly, great.  */
+      icode = direct_optab_handler (tbranch_optab, tmode);
+      if (icode != CODE_FOR_nothing)
+	{
+	  emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob);
+	  return;
+	}
+    }
+
+  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob);
 }
 
-
+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
+
+void
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
+			 machine_mode mode, int unsignedp, rtx label,
+			 profile_probability prob)
+{
+  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
+			   label, prob);
+}
+
+
 /* Emit a library call comparison between floating point X and Y.
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 9947aed67fb8a3b675cb0aab9aeb059f89644106..623a596aca2f538a03602e02e6ac12f43f3303c4 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,7 @@ OPTAB_D (reload_in_optab, "reload_in$a")
 OPTAB_D (reload_out_optab, "reload_out$a")
 
 OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
+OPTAB_D (tbranch_optab, "tbranch$a4")
 OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
 				     machine_mode, int, rtx,
 				     profile_probability prob
 					= profile_probability::uninitialized ());
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
+				     machine_mode, int, tree, rtx,
+				     profile_probability prob
+					= profile_probability::uninitialized ());
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
diff --git a/gcc/tree.h b/gcc/tree.h
index 8f8a9660c9e0605eb516de194640b8c1b531b798..be3d2dee82f692e81082cf21c878c10f9fe9e1f1 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
 extern bool is_truth_type_for (tree, tree);
+extern bool tree_zero_one_valued_p (tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);
  

Comments

Jeff Law Oct. 31, 2022, 9:16 p.m. UTC | #1
On 10/31/22 05:53, Tamar Christina wrote:
> Hi All,
>
> This adds a new test-and-branch optab that can be used to do a conditional test
> of a bit and branch.   This is similar to the cbranch optab but instead can
> test any arbitrary bit inside the register.
>
> This patch recognizes boolean comparisons and single bit mask tests.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* dojump.cc (do_jump): Pass along value.
> 	(do_jump_by_parts_greater_rtx): Likewise.
> 	(do_jump_by_parts_zero_rtx): Likewise.
> 	(do_jump_by_parts_equality_rtx): Likewise.
> 	(do_compare_rtx_and_jump): Likewise.
> 	(do_compare_and_jump): Likewise.
> 	* dojump.h (do_compare_rtx_and_jump): New.
> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
> 	(validate_test_and_branch): New.
> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
> 	supplied then check if it's suitable for tbranch.
> 	* optabs.def (tbranch$a4): New.
> 	* doc/md.texi (tbranch@var{mode}4): Document it.
> 	* optabs.h (emit_cmp_and_jump_insns):
> 	* tree.h (tree_zero_one_valued_p): New.
>
> --- inline copy of patch --
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index c08691ab4c9a4bfe55ae81e5e228a414d6242d78..f8b32ec12f46d3fb3815f121a16b5a8a1819b66a 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6972,6 +6972,13 @@ case, you can and should make operand 1's predicate reject some operators
>   in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>   from the machine description.
>   
> +@cindex @code{tbranch@var{mode}4} instruction pattern
> +@item @samp{tbranch@var{mode}4}
> +Conditional branch instruction combined with a bit test-and-compare
> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
> +Operand 3 is the @code{code_label} to jump to.

Should we refine/document the set of comparison operators allowed?    Is 
operand 1 an arbitrary RTL expression or more limited?  I'm guessing its 
relatively arbitrary given how you've massaged the existing 
branch-on-bit patterns from the aarch backend.


> +
> +  if (TREE_CODE (val) != SSA_NAME)
> +    return false;
> +
> +  gimple *def = SSA_NAME_DEF_STMT (val);
> +  if (!is_gimple_assign (def)
> +      || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
> +    return false;
> +
> +  tree cst = gimple_assign_rhs2 (def);
> +
> +  if (!tree_fits_uhwi_p (cst))
> +    return false;
> +
> +  tree op0 = gimple_assign_rhs1 (def);
> +  if (TREE_CODE (op0) == SSA_NAME)
> +    {
> +      def = SSA_NAME_DEF_STMT (op0);
> +      if (gimple_assign_cast_p (def))
> +	op0 = gimple_assign_rhs1 (def);
> +    }
> +
> +  wide_int wcst = wi::uhwi (tree_to_uhwi (cst),
> +			    TYPE_PRECISION (TREE_TYPE (op0)));
> +  int bitpos;
> +
> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> +    return false;

Do we have enough information lying around from Ranger to avoid the need 
to walk the def-use chain to discover that we're masking off all but one 
bit?



>   
>
> diff --git a/gcc/tree.h b/gcc/tree.h
> index 8f8a9660c9e0605eb516de194640b8c1b531b798..be3d2dee82f692e81082cf21c878c10f9fe9e1f1 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>   extern tree signed_type_for (tree);
>   extern tree unsigned_type_for (tree);
>   extern bool is_truth_type_for (tree, tree);
> +extern bool tree_zero_one_valued_p (tree);

I don't see a definition of this anywhere.


jeff
  
Tamar Christina Nov. 1, 2022, 3:53 p.m. UTC | #2
> -----Original Message-----
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Monday, October 31, 2022 9:16 PM
> To: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org
> Cc: nd <nd@arm.com>; rguenther@suse.de
> Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> for bit-test-and-branch operations
> 
> 
> On 10/31/22 05:53, Tamar Christina wrote:
> > Hi All,
> >
> > This adds a new test-and-branch optab that can be used to do a conditional
> test
> > of a bit and branch.   This is similar to the cbranch optab but instead can
> > test any arbitrary bit inside the register.
> >
> > This patch recognizes boolean comparisons and single bit mask tests.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master?
> >
> > Thanks,
> > Tamar
> >
> > gcc/ChangeLog:
> >
> > 	* dojump.cc (do_jump): Pass along value.
> > 	(do_jump_by_parts_greater_rtx): Likewise.
> > 	(do_jump_by_parts_zero_rtx): Likewise.
> > 	(do_jump_by_parts_equality_rtx): Likewise.
> > 	(do_compare_rtx_and_jump): Likewise.
> > 	(do_compare_and_jump): Likewise.
> > 	* dojump.h (do_compare_rtx_and_jump): New.
> > 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab
> to check.
> > 	(validate_test_and_branch): New.
> > 	(emit_cmp_and_jump_insns): Optiobally take a value, and when
> value is
> > 	supplied then check if it's suitable for tbranch.
> > 	* optabs.def (tbranch$a4): New.
> > 	* doc/md.texi (tbranch@var{mode}4): Document it.
> > 	* optabs.h (emit_cmp_and_jump_insns):
> > 	* tree.h (tree_zero_one_valued_p): New.
> >
> > --- inline copy of patch --
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index
> >
> c08691ab4c9a4bfe55ae81e5e228a414d6242d78..f8b32ec12f46d3fb3815f121a1
> 6b
> > 5a8a1819b66a 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -6972,6 +6972,13 @@ case, you can and should make operand 1's
> predicate reject some operators
> >   in the @samp{cstore@var{mode}4} pattern, or remove the pattern
> altogether
> >   from the machine description.
> >
> > +@cindex @code{tbranch@var{mode}4} instruction pattern @item
> > +@samp{tbranch@var{mode}4} Conditional branch instruction combined
> > +with a bit test-and-compare instruction. Operand 0 is a comparison
> > +operator.  Operand 1 is the operand of the comparison. Operand 2 is
> > +the bit position of Operand 1 to test.
> > +Operand 3 is the @code{code_label} to jump to.
> 
> Should we refine/document the set of comparison operators allowed?    Is
> operand 1 an arbitrary RTL expression or more limited?  I'm guessing its
> relatively arbitrary given how you've massaged the existing branch-on-bit
> patterns from the aarch backend.

It can be any expression in theory. However in practical terms we usually force
the values to registers before calling the expansion.  My assumption is that this
is for CSE purposes but that's only a guess.

> 
> 
> > +
> > +  if (TREE_CODE (val) != SSA_NAME)
> > +    return false;
> > +
> > +  gimple *def = SSA_NAME_DEF_STMT (val);  if (!is_gimple_assign (def)
> > +      || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
> > +    return false;
> > +
> > +  tree cst = gimple_assign_rhs2 (def);
> > +
> > +  if (!tree_fits_uhwi_p (cst))
> > +    return false;
> > +
> > +  tree op0 = gimple_assign_rhs1 (def);
> > +  if (TREE_CODE (op0) == SSA_NAME)
> > +    {
> > +      def = SSA_NAME_DEF_STMT (op0);
> > +      if (gimple_assign_cast_p (def))
> > +	op0 = gimple_assign_rhs1 (def);
> > +    }
> > +
> > +  wide_int wcst = wi::uhwi (tree_to_uhwi (cst),
> > +			    TYPE_PRECISION (TREE_TYPE (op0)));
> > +  int bitpos;
> > +
> > +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> > +    return false;
> 
> Do we have enough information lying around from Ranger to avoid the need
> to walk the def-use chain to discover that we're masking off all but one bit?
> 

That's an interesting thought.  I'll try to see if I can figure out how to query
Ranger here.  It would be nice to do so here.

Cheers,
Tamar

> 
> 
> >
> >
> > diff --git a/gcc/tree.h b/gcc/tree.h
> > index
> >
> 8f8a9660c9e0605eb516de194640b8c1b531b798..be3d2dee82f692e81082cf21c
> 878
> > c10f9fe9e1f1 100644
> > --- a/gcc/tree.h
> > +++ b/gcc/tree.h
> > @@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int,
> tree);
> >   extern tree signed_type_for (tree);
> >   extern tree unsigned_type_for (tree);
> >   extern bool is_truth_type_for (tree, tree);
> > +extern bool tree_zero_one_valued_p (tree);
> 
> I don't see a definition of this anywhere.
> 
> 
> jeff
>
  
Jeff Law Nov. 1, 2022, 5 p.m. UTC | #3
On 11/1/22 09:53, Tamar Christina wrote:
>>
>>>    from the machine description.
>>>
>>> +@cindex @code{tbranch@var{mode}4} instruction pattern @item
>>> +@samp{tbranch@var{mode}4} Conditional branch instruction combined
>>> +with a bit test-and-compare instruction. Operand 0 is a comparison
>>> +operator.  Operand 1 is the operand of the comparison. Operand 2 is
>>> +the bit position of Operand 1 to test.
>>> +Operand 3 is the @code{code_label} to jump to.
>> Should we refine/document the set of comparison operators allowed?    Is
>> operand 1 an arbitrary RTL expression or more limited?  I'm guessing its
>> relatively arbitrary given how you've massaged the existing branch-on-bit
>> patterns from the aarch backend.
> It can be any expression in theory. However in practical terms we usually force
> the values to registers before calling the expansion.  My assumption is that this
> is for CSE purposes but that's only a guess.

Understood.  And generally yes, forcing expressions into regs is good 
for CSE.


>
>> Do we have enough information lying around from Ranger to avoid the need
>> to walk the def-use chain to discover that we're masking off all but one bit?
>>
> That's an interesting thought.  I'll try to see if I can figure out how to query
> Ranger here.  It would be nice to do so here.

Reach out to Aldy, I suspect he can probably give you the necessary 
pseudocode pretty quickly.


Jeff
  
Tamar Christina Nov. 2, 2022, 9:55 a.m. UTC | #4
Hi Aldy,

I'm trying to use Ranger to determine if a range of an expression is a single bit.

If possible in case of a mask then also the position of the bit that's being checked by the mask (or the mask itself).

Do you have any pointers/existing code I can look at to do this?

Kind regards,
Tamar

> -----Original Message-----
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Tuesday, November 1, 2022 5:00 PM
> To: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org
> Cc: nd <nd@arm.com>; rguenther@suse.de
> Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> for bit-test-and-branch operations
> 
> 
> On 11/1/22 09:53, Tamar Christina wrote:
> >>
> >>>    from the machine description.
> >>>
> >>> +@cindex @code{tbranch@var{mode}4} instruction pattern @item
> >>> +@samp{tbranch@var{mode}4} Conditional branch instruction
> combined
> >>> +with a bit test-and-compare instruction. Operand 0 is a comparison
> >>> +operator.  Operand 1 is the operand of the comparison. Operand 2 is
> >>> +the bit position of Operand 1 to test.
> >>> +Operand 3 is the @code{code_label} to jump to.
> >> Should we refine/document the set of comparison operators allowed?
> >> Is operand 1 an arbitrary RTL expression or more limited?  I'm
> >> guessing its relatively arbitrary given how you've massaged the
> >> existing branch-on-bit patterns from the aarch backend.
> > It can be any expression in theory. However in practical terms we
> > usually force the values to registers before calling the expansion.
> > My assumption is that this is for CSE purposes but that's only a guess.
> 
> Understood.  And generally yes, forcing expressions into regs is good for CSE.
> 
> 
> >
> >> Do we have enough information lying around from Ranger to avoid the
> need
> >> to walk the def-use chain to discover that we're masking off all but one
> bit?
> >>
> > That's an interesting thought.  I'll try to see if I can figure out how to query
> > Ranger here.  It would be nice to do so here.
> 
> Reach out to Aldy, I suspect he can probably give you the necessary
> pseudocode pretty quickly.
> 
> 
> Jeff
>
  
Aldy Hernandez Nov. 2, 2022, 11:08 a.m. UTC | #5
On Wed, Nov 2, 2022 at 10:55 AM Tamar Christina <Tamar.Christina@arm.com> wrote:
>
> Hi Aldy,
>
> I'm trying to use Ranger to determine if a range of an expression is a single bit.
>
> If possible in case of a mask then also the position of the bit that's being checked by the mask (or the mask itself).

Just instantiate a ranger, and ask for the range of an SSA name (or an
arbitrary tree expression) at a particular gimple statement (or an
edge):

gimple_ranger ranger;
int_range_max r;
if (ranger.range_of_expr (r, <SSA_NAME>, <STMT>)) {
  // do stuff with range "r"
  if (r.singleton_p ()) {
    wide_int num = r.lower_bound ();
    // Check the bits in NUM, etc...
  }
}

You can see the full ranger API in gimple-range.h.

Note that instantiating a new ranger is relatively lightweight, but
it's not free.  So unless you're calling range_of_expr sporadically,
you probably want to have one instance for your pass.  You can pass
around the gimple_ranger around your pass.  Another way of doing this
is calling enable_rager() at pass start, and then doing:

  get_range_query (cfun)->range_of_expr (r, <SSA_NAME>, <STMT>));

gimple-loop-versioning.cc has an example of using enable_ranger /
disable_ranger.

I am assuming you are interested in ranges for integers / pointers.
Otherwise (floats, etc) you'd have to use "Value_Range" instead of
int_range_max.  I can give you examples on that if necessary.

Let me know if that helps.
Aldy

>
> Do you have any pointers/existing code I can look at to do this?
>
> Kind regards,
> Tamar
>
> > -----Original Message-----
> > From: Jeff Law <jeffreyalaw@gmail.com>
> > Sent: Tuesday, November 1, 2022 5:00 PM
> > To: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org
> > Cc: nd <nd@arm.com>; rguenther@suse.de
> > Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> > for bit-test-and-branch operations
> >
> >
> > On 11/1/22 09:53, Tamar Christina wrote:
> > >>
> > >>>    from the machine description.
> > >>>
> > >>> +@cindex @code{tbranch@var{mode}4} instruction pattern @item
> > >>> +@samp{tbranch@var{mode}4} Conditional branch instruction
> > combined
> > >>> +with a bit test-and-compare instruction. Operand 0 is a comparison
> > >>> +operator.  Operand 1 is the operand of the comparison. Operand 2 is
> > >>> +the bit position of Operand 1 to test.
> > >>> +Operand 3 is the @code{code_label} to jump to.
> > >> Should we refine/document the set of comparison operators allowed?
> > >> Is operand 1 an arbitrary RTL expression or more limited?  I'm
> > >> guessing its relatively arbitrary given how you've massaged the
> > >> existing branch-on-bit patterns from the aarch backend.
> > > It can be any expression in theory. However in practical terms we
> > > usually force the values to registers before calling the expansion.
> > > My assumption is that this is for CSE purposes but that's only a guess.
> >
> > Understood.  And generally yes, forcing expressions into regs is good for CSE.
> >
> >
> > >
> > >> Do we have enough information lying around from Ranger to avoid the
> > need
> > >> to walk the def-use chain to discover that we're masking off all but one
> > bit?
> > >>
> > > That's an interesting thought.  I'll try to see if I can figure out how to query
> > > Ranger here.  It would be nice to do so here.
> >
> > Reach out to Aldy, I suspect he can probably give you the necessary
> > pseudocode pretty quickly.
> >
> >
> > Jeff
> >
>
  
Richard Biener Nov. 5, 2022, 2:23 p.m. UTC | #6
On Wed, 2 Nov 2022, Aldy Hernandez wrote:

> On Wed, Nov 2, 2022 at 10:55 AM Tamar Christina <Tamar.Christina@arm.com> wrote:
> >
> > Hi Aldy,
> >
> > I'm trying to use Ranger to determine if a range of an expression is a single bit.
> >
> > If possible in case of a mask then also the position of the bit that's being checked by the mask (or the mask itself).
> 
> Just instantiate a ranger, and ask for the range of an SSA name (or an
> arbitrary tree expression) at a particular gimple statement (or an
> edge):
> 
> gimple_ranger ranger;
> int_range_max r;
> if (ranger.range_of_expr (r, <SSA_NAME>, <STMT>)) {
>   // do stuff with range "r"
>   if (r.singleton_p ()) {
>     wide_int num = r.lower_bound ();
>     // Check the bits in NUM, etc...
>   }
> }
> 
> You can see the full ranger API in gimple-range.h.
> 
> Note that instantiating a new ranger is relatively lightweight, but
> it's not free.  So unless you're calling range_of_expr sporadically,
> you probably want to have one instance for your pass.  You can pass
> around the gimple_ranger around your pass.  Another way of doing this
> is calling enable_rager() at pass start, and then doing:
> 
>   get_range_query (cfun)->range_of_expr (r, <SSA_NAME>, <STMT>));
> 
> gimple-loop-versioning.cc has an example of using enable_ranger /
> disable_ranger.
> 
> I am assuming you are interested in ranges for integers / pointers.
> Otherwise (floats, etc) you'd have to use "Value_Range" instead of
> int_range_max.  I can give you examples on that if necessary.
> 
> Let me know if that helps.

I think you maybe just want get_nonzero_bits?

> Aldy
> 
> >
> > Do you have any pointers/existing code I can look at to do this?
> >
> > Kind regards,
> > Tamar
> >
> > > -----Original Message-----
> > > From: Jeff Law <jeffreyalaw@gmail.com>
> > > Sent: Tuesday, November 1, 2022 5:00 PM
> > > To: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org
> > > Cc: nd <nd@arm.com>; rguenther@suse.de
> > > Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> > > for bit-test-and-branch operations
> > >
> > >
> > > On 11/1/22 09:53, Tamar Christina wrote:
> > > >>
> > > >>>    from the machine description.
> > > >>>
> > > >>> +@cindex @code{tbranch@var{mode}4} instruction pattern @item
> > > >>> +@samp{tbranch@var{mode}4} Conditional branch instruction
> > > combined
> > > >>> +with a bit test-and-compare instruction. Operand 0 is a comparison
> > > >>> +operator.  Operand 1 is the operand of the comparison. Operand 2 is
> > > >>> +the bit position of Operand 1 to test.
> > > >>> +Operand 3 is the @code{code_label} to jump to.
> > > >> Should we refine/document the set of comparison operators allowed?
> > > >> Is operand 1 an arbitrary RTL expression or more limited?  I'm
> > > >> guessing its relatively arbitrary given how you've massaged the
> > > >> existing branch-on-bit patterns from the aarch backend.
> > > > It can be any expression in theory. However in practical terms we
> > > > usually force the values to registers before calling the expansion.
> > > > My assumption is that this is for CSE purposes but that's only a guess.
> > >
> > > Understood.  And generally yes, forcing expressions into regs is good for CSE.
> > >
> > >
> > > >
> > > >> Do we have enough information lying around from Ranger to avoid the
> > > need
> > > >> to walk the def-use chain to discover that we're masking off all but one
> > > bit?
> > > >>
> > > > That's an interesting thought.  I'll try to see if I can figure out how to query
> > > > Ranger here.  It would be nice to do so here.
> > >
> > > Reach out to Aldy, I suspect he can probably give you the necessary
> > > pseudocode pretty quickly.
> > >
> > >
> > > Jeff
> > >
> >
> 
>
  
Tamar Christina Nov. 14, 2022, 3:56 p.m. UTC | #7
> -----Original Message-----
> From: Richard Biener <rguenther@suse.de>
> Sent: Saturday, November 5, 2022 2:23 PM
> To: Aldy Hernandez <aldyh@redhat.com>
> Cc: Tamar Christina <Tamar.Christina@arm.com>; Jeff Law
> <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org; nd <nd@arm.com>;
> MacLeod, Andrew <amacleod@redhat.com>
> Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> for bit-test-and-branch operations
> 
> On Wed, 2 Nov 2022, Aldy Hernandez wrote:
> 
> > On Wed, Nov 2, 2022 at 10:55 AM Tamar Christina
> <Tamar.Christina@arm.com> wrote:
> > >
> > > Hi Aldy,
> > >
> > > I'm trying to use Ranger to determine if a range of an expression is a
> single bit.
> > >
> > > If possible in case of a mask then also the position of the bit that's being
> checked by the mask (or the mask itself).
> >
> > Just instantiate a ranger, and ask for the range of an SSA name (or an
> > arbitrary tree expression) at a particular gimple statement (or an
> > edge):
> >
> > gimple_ranger ranger;
> > int_range_max r;
> > if (ranger.range_of_expr (r, <SSA_NAME>, <STMT>)) {
> >   // do stuff with range "r"
> >   if (r.singleton_p ()) {
> >     wide_int num = r.lower_bound ();
> >     // Check the bits in NUM, etc...
> >   }
> > }
> >
> > You can see the full ranger API in gimple-range.h.
> >
> > Note that instantiating a new ranger is relatively lightweight, but
> > it's not free.  So unless you're calling range_of_expr sporadically,
> > you probably want to have one instance for your pass.  You can pass
> > around the gimple_ranger around your pass.  Another way of doing this
> > is calling enable_rager() at pass start, and then doing:
> >
> >   get_range_query (cfun)->range_of_expr (r, <SSA_NAME>, <STMT>));
> >
> > gimple-loop-versioning.cc has an example of using enable_ranger /
> > disable_ranger.
> >
> > I am assuming you are interested in ranges for integers / pointers.
> > Otherwise (floats, etc) you'd have to use "Value_Range" instead of
> > int_range_max.  I can give you examples on that if necessary.
> >
> > Let me know if that helps.

It Did! I ended up going with Richi's suggestion, but the snippet was very helpful
for a different range based patch I'm trying a prototype for.

Many thanks for the example!

> 
> I think you maybe just want get_nonzero_bits?

Ah, looks like that uses range info as well.  Thanks!

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* dojump.cc (do_jump): Pass along value.
	(do_jump_by_parts_greater_rtx): Likewise.
	(do_jump_by_parts_zero_rtx): Likewise.
	(do_jump_by_parts_equality_rtx): Likewise.
	(do_compare_rtx_and_jump): Likewise.
	(do_compare_and_jump): Likewise.
	* dojump.h (do_compare_rtx_and_jump): New.
	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
	(validate_test_and_branch): New.
	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
	supplied then check if it's suitable for tbranch.
	* optabs.def (tbranch$a4): New.
	* doc/md.texi (tbranch@var{mode}4): Document it.
	* optabs.h (emit_cmp_and_jump_insns):
	* tree.h (tree_zero_one_valued_p): New.

--- inline copy of patch ---
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 34825549ed4e315b07d36dc3d63bae0cc0a3932d..342e8c4c670de251a35689d1805acceb72a8f6bf 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6958,6 +6958,13 @@ case, you can and should make operand 1's predicate reject some operators
 in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
 from the machine description.
 
+@cindex @code{tbranch@var{mode}4} instruction pattern
+@item @samp{tbranch@var{mode}4}
+Conditional branch instruction combined with a bit test-and-compare
+instruction. Operand 0 is a comparison operator.  Operand 1 is the
+operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
+Operand 3 is the @code{code_label} to jump to.
+
 @cindex @code{cbranch@var{mode}4} instruction pattern
 @item @samp{cbranch@var{mode}4}
 Conditional branch instruction combined with a compare instruction.
diff --git a/gcc/dojump.h b/gcc/dojump.h
index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
--- a/gcc/dojump.h
+++ b/gcc/dojump.h
@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
 extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
 			 profile_probability);
 
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
+				     machine_mode, rtx, rtx_code_label *,
+				     rtx_code_label *, profile_probability);
+
 extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
 				     machine_mode, rtx, rtx_code_label *,
 				     rtx_code_label *, profile_probability);
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
--- a/gcc/dojump.cc
+++ b/gcc/dojump.cc
@@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
 	}
       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
-			       GET_MODE (temp), NULL_RTX,
+			       exp, GET_MODE (temp), NULL_RTX,
 			       if_false_label, if_true_label, prob);
     }
 
@@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 
       /* All but high-order word must be compared as unsigned.  */
       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
-			       word_mode, NULL_RTX, NULL, if_true_label,
+			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
 			       prob);
 
       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
@@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 	break;
 
       /* Consider lower words only if these are equal.  */
-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
-			       NULL_RTX, NULL, if_false_label,
+      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
+			       word_mode, NULL_RTX, NULL, if_false_label,
 			       prob.invert ());
     }
 
@@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   if (part != 0)
     {
-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
+      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
 			       NULL_RTX, if_false_label, if_true_label, prob);
       return;
     }
@@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
+			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             operand_subword_force (op1, i, mode),
-                             EQ, 0, word_mode, NULL_RTX,
+			     operand_subword_force (op1, i, mode),
+			     EQ, 0, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 			 rtx_code_label *if_false_label,
 			 rtx_code_label *if_true_label,
 			 profile_probability prob)
+{
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
+			  if_false_label, if_true_label, prob);
+}
+
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
+   The decision as to signed or unsigned comparison must be made by the caller.
+
+   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
+   compared.  */
+
+void
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+			 tree val, machine_mode mode, rtx size,
+			 rtx_code_label *if_false_label,
+			 rtx_code_label *if_true_label,
+			 profile_probability prob)
 {
   rtx tem;
   rtx_code_label *dummy_label = NULL;
@@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 		    }
 		  else
 		    dest_label = if_false_label;
-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, dest_label, NULL, first_prob);
+
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, dest_label, NULL,
+					   first_prob);
 		}
 	      /* For !and_them we want to split:
 		 if (x) goto t; // prob;
@@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
               else
 		{
 		  profile_probability first_prob = prob.split (cprob);
-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, NULL, if_true_label, first_prob);
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, NULL,
+					   if_true_label, first_prob);
 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
 		    {
 		      /* x != y can be split into x unord y || x ltgt y
@@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 	    }
 	}
 
-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
+      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
 			       if_true_label, prob);
     }
 
@@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       op1 = new_op1;
     }
 
-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
-                           ((mode == BLKmode)
-                            ? expr_size (treeop0) : NULL_RTX),
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
+			   ((mode == BLKmode)
+			    ? expr_size (treeop0) : NULL_RTX),
 			   if_false_label, if_true_label, prob);
 }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index f338df410265dfe55b6896160090a453cc6a28d9..0f662ebdb818d7538bdd13fb02bcf8bcf1dbab64 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "libfuncs.h"
 #include "internal-fn.h"
 #include "langhooks.h"
+#include "gimple.h"
+#include "ssa.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
 				   machine_mode *);
@@ -4620,7 +4622,7 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
 
 static void
 emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-			  profile_probability prob)
+			  direct_optab cmp_optab, profile_probability prob)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4629,7 +4631,7 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
 
   mclass = GET_MODE_CLASS (mode);
   optab_mode = (mclass == MODE_CC) ? CCmode : mode;
-  icode = optab_handler (cbranch_optab, optab_mode);
+  icode = optab_handler (cmp_optab, optab_mode);
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (insn_operand_matches (icode, 0, test));
@@ -4644,6 +4646,56 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
     add_reg_br_prob_note (insn, prob);
 }
 
+/* Check to see if the supplied comparison in PTEST can be performed as a
+   bit-test-and-branch instead.  VAL must contain the original tree
+   expression of the non-zero operand which will be used to rewrite the
+   comparison in PTEST.
+
+   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
+   else FALSE.  */
+
+enum insn_code
+static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode)
+{
+  if (!val || TREE_CODE (val) != SSA_NAME)
+    return CODE_FOR_nothing;
+
+  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
+  rtx test = *ptest;
+
+  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
+    return CODE_FOR_nothing;
+
+  /* If the target supports the testbit comparison directly, great.  */
+  auto icode = direct_optab_handler (tbranch_optab, mode);
+  if (icode == CODE_FOR_nothing)
+    return icode;
+
+  if (tree_zero_one_valued_p (val))
+    {
+      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
+      XEXP (test, 1) = gen_int_mode (pos, mode);
+      *ptest = test;
+      *pmode = mode;
+      return icode;
+    }
+
+  wide_int wcst = get_nonzero_bits (val);
+  if (wcst == -1)
+    return CODE_FOR_nothing;
+
+  int bitpos;
+
+  if ((bitpos = wi::exact_log2 (wcst)) == -1)
+    return CODE_FOR_nothing;
+
+  auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
+  XEXP (test, 1) = gen_int_mode (pos, mode);
+  *ptest = test;
+  *pmode = mode;
+  return icode;
+}
+
 /* Generate code to compare X with Y so that the condition codes are
    set and to jump to LABEL if the condition is true.  If X is a
    constant and Y is not a constant, then the comparison is swapped to
@@ -4661,11 +4713,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
    It will be potentially converted into an unsigned variant based on
    UNSIGNEDP to select a proper jump instruction.
    
-   PROB is the probability of jumping to LABEL.  */
+   PROB is the probability of jumping to LABEL.  If the comparison is against
+   zero then VAL contains the expression from which the non-zero RTL is
+   derived.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 machine_mode mode, int unsignedp, rtx label,
+			 machine_mode mode, int unsignedp, tree val, rtx label,
                          profile_probability prob)
 {
   rtx op0 = x, op1 = y;
@@ -4690,10 +4744,32 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
+
+  /* Check if we're comparing a truth type with 0, and if so check if
+     the target supports tbranch.  */
+  machine_mode tmode = mode;
+  if (op1 == CONST0_RTX (GET_MODE (op1))
+      && validate_test_and_branch (val, &test, &tmode) != CODE_FOR_nothing)
+    {
+      emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob);
+      return;
+    }
+
+  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob);
 }
 
-

+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
+
+void
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
+			 machine_mode mode, int unsignedp, rtx label,
+			 profile_probability prob)
+{
+  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
+			   label, prob);
+}
+
+
 /* Emit a library call comparison between floating point X and Y.
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index a6db2342bed6baf13ecbd84112c8432c6972e6fe..56e37d67231e1ba74ad6c5b81d74a65f315e26e2 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,7 @@ OPTAB_D (reload_in_optab, "reload_in$a")
 OPTAB_D (reload_out_optab, "reload_out$a")
 
 OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
+OPTAB_D (tbranch_optab, "tbranch$a4")
 OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
 				     machine_mode, int, rtx,
 				     profile_probability prob
 					= profile_probability::uninitialized ());
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
+				     machine_mode, int, tree, rtx,
+				     profile_probability prob
+					= profile_probability::uninitialized ());
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
diff --git a/gcc/tree.h b/gcc/tree.h
index e6564aaccb7b69cd938ff60b6121aec41b7e8a59..f455008ceb8d91e7e073c0ad6d93dcaed65deccf 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
 extern bool is_truth_type_for (tree, tree);
+extern bool tree_zero_one_valued_p (tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);
  
Jeff Law Nov. 14, 2022, 4:22 p.m. UTC | #8
On 11/14/22 08:56, Tamar Christina wrote:
>
> gcc/ChangeLog:
>
> 	* dojump.cc (do_jump): Pass along value.
> 	(do_jump_by_parts_greater_rtx): Likewise.
> 	(do_jump_by_parts_zero_rtx): Likewise.
> 	(do_jump_by_parts_equality_rtx): Likewise.
> 	(do_compare_rtx_and_jump): Likewise.
> 	(do_compare_and_jump): Likewise.
> 	* dojump.h (do_compare_rtx_and_jump): New.
> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
> 	(validate_test_and_branch): New.
> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
> 	supplied then check if it's suitable for tbranch.
> 	* optabs.def (tbranch$a4): New.
> 	* doc/md.texi (tbranch@var{mode}4): Document it.
> 	* optabs.h (emit_cmp_and_jump_insns):
> 	* tree.h (tree_zero_one_valued_p): New.

OK.

jeff
  
Richard Biener Nov. 15, 2022, 7:33 a.m. UTC | #9
On Mon, Nov 14, 2022 at 4:57 PM Tamar Christina via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> > -----Original Message-----
> > From: Richard Biener <rguenther@suse.de>
> > Sent: Saturday, November 5, 2022 2:23 PM
> > To: Aldy Hernandez <aldyh@redhat.com>
> > Cc: Tamar Christina <Tamar.Christina@arm.com>; Jeff Law
> > <jeffreyalaw@gmail.com>; gcc-patches@gcc.gnu.org; nd <nd@arm.com>;
> > MacLeod, Andrew <amacleod@redhat.com>
> > Subject: Re: [PATCH 1/2]middle-end: Add new tbranch optab to add support
> > for bit-test-and-branch operations
> >
> > On Wed, 2 Nov 2022, Aldy Hernandez wrote:
> >
> > > On Wed, Nov 2, 2022 at 10:55 AM Tamar Christina
> > <Tamar.Christina@arm.com> wrote:
> > > >
> > > > Hi Aldy,
> > > >
> > > > I'm trying to use Ranger to determine if a range of an expression is a
> > single bit.
> > > >
> > > > If possible in case of a mask then also the position of the bit that's being
> > checked by the mask (or the mask itself).
> > >
> > > Just instantiate a ranger, and ask for the range of an SSA name (or an
> > > arbitrary tree expression) at a particular gimple statement (or an
> > > edge):
> > >
> > > gimple_ranger ranger;
> > > int_range_max r;
> > > if (ranger.range_of_expr (r, <SSA_NAME>, <STMT>)) {
> > >   // do stuff with range "r"
> > >   if (r.singleton_p ()) {
> > >     wide_int num = r.lower_bound ();
> > >     // Check the bits in NUM, etc...
> > >   }
> > > }
> > >
> > > You can see the full ranger API in gimple-range.h.
> > >
> > > Note that instantiating a new ranger is relatively lightweight, but
> > > it's not free.  So unless you're calling range_of_expr sporadically,
> > > you probably want to have one instance for your pass.  You can pass
> > > around the gimple_ranger around your pass.  Another way of doing this
> > > is calling enable_rager() at pass start, and then doing:
> > >
> > >   get_range_query (cfun)->range_of_expr (r, <SSA_NAME>, <STMT>));
> > >
> > > gimple-loop-versioning.cc has an example of using enable_ranger /
> > > disable_ranger.
> > >
> > > I am assuming you are interested in ranges for integers / pointers.
> > > Otherwise (floats, etc) you'd have to use "Value_Range" instead of
> > > int_range_max.  I can give you examples on that if necessary.
> > >
> > > Let me know if that helps.
>
> It Did! I ended up going with Richi's suggestion, but the snippet was very helpful
> for a different range based patch I'm trying a prototype for.
>
> Many thanks for the example!
>
> >
> > I think you maybe just want get_nonzero_bits?
>
> Ah, looks like that uses range info as well.  Thanks!
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         * dojump.cc (do_jump): Pass along value.
>         (do_jump_by_parts_greater_rtx): Likewise.
>         (do_jump_by_parts_zero_rtx): Likewise.
>         (do_jump_by_parts_equality_rtx): Likewise.
>         (do_compare_rtx_and_jump): Likewise.
>         (do_compare_and_jump): Likewise.
>         * dojump.h (do_compare_rtx_and_jump): New.
>         * optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
>         (validate_test_and_branch): New.
>         (emit_cmp_and_jump_insns): Optiobally take a value, and when value is
>         supplied then check if it's suitable for tbranch.
>         * optabs.def (tbranch$a4): New.
>         * doc/md.texi (tbranch@var{mode}4): Document it.
>         * optabs.h (emit_cmp_and_jump_insns):
>         * tree.h (tree_zero_one_valued_p): New.
>
> --- inline copy of patch ---
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 34825549ed4e315b07d36dc3d63bae0cc0a3932d..342e8c4c670de251a35689d1805acceb72a8f6bf 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6958,6 +6958,13 @@ case, you can and should make operand 1's predicate reject some operators
>  in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>  from the machine description.
>
> +@cindex @code{tbranch@var{mode}4} instruction pattern
> +@item @samp{tbranch@var{mode}4}
> +Conditional branch instruction combined with a bit test-and-compare
> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
> +Operand 3 is the @code{code_label} to jump to.
> +
>  @cindex @code{cbranch@var{mode}4} instruction pattern
>  @item @samp{cbranch@var{mode}4}
>  Conditional branch instruction combined with a compare instruction.
> diff --git a/gcc/dojump.h b/gcc/dojump.h
> index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
> --- a/gcc/dojump.h
> +++ b/gcc/dojump.h
> @@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
>  extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
>                          profile_probability);
>
> +extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
> +                                    machine_mode, rtx, rtx_code_label *,
> +                                    rtx_code_label *, profile_probability);
> +
>  extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
>                                      machine_mode, rtx, rtx_code_label *,
>                                      rtx_code_label *, profile_probability);
> diff --git a/gcc/dojump.cc b/gcc/dojump.cc
> index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
> --- a/gcc/dojump.cc
> +++ b/gcc/dojump.cc
> @@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
>         }
>        do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
>                                NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
> -                              GET_MODE (temp), NULL_RTX,
> +                              exp, GET_MODE (temp), NULL_RTX,
>                                if_false_label, if_true_label, prob);
>      }
>
> @@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>
>        /* All but high-order word must be compared as unsigned.  */
>        do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
> -                              word_mode, NULL_RTX, NULL, if_true_label,
> +                              NULL, word_mode, NULL_RTX, NULL, if_true_label,
>                                prob);
>
>        /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
> @@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>         break;
>
>        /* Consider lower words only if these are equal.  */
> -      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
> -                              NULL_RTX, NULL, if_false_label,
> +      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
> +                              word_mode, NULL_RTX, NULL, if_false_label,
>                                prob.invert ());
>      }
>
> @@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>
>    if (part != 0)
>      {
> -      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
> +      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
>                                NULL_RTX, if_false_label, if_true_label, prob);
>        return;
>      }
> @@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
> +                            const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
>                              if_false_label, NULL, prob);
>
>    if (if_true_label)
> @@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
>
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             operand_subword_force (op1, i, mode),
> -                             EQ, 0, word_mode, NULL_RTX,
> +                            operand_subword_force (op1, i, mode),
> +                            EQ, 0, NULL, word_mode, NULL_RTX,
>                              if_false_label, NULL, prob);
>
>    if (if_true_label)
> @@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                          rtx_code_label *if_false_label,
>                          rtx_code_label *if_true_label,
>                          profile_probability prob)
> +{
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
> +                         if_false_label, if_true_label, prob);
> +}
> +
> +/* Like do_compare_and_jump but expects the values to compare as two rtx's.
> +   The decision as to signed or unsigned comparison must be made by the caller.
> +
> +   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
> +   compared.  */
> +
> +void
> +do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
> +                        tree val, machine_mode mode, rtx size,
> +                        rtx_code_label *if_false_label,
> +                        rtx_code_label *if_true_label,
> +                        profile_probability prob)
>  {
>    rtx tem;
>    rtx_code_label *dummy_label = NULL;
> @@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                     }
>                   else
>                     dest_label = if_false_label;
> -                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -                                          size, dest_label, NULL, first_prob);
> +
> +                 do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +                                          val, mode, size, dest_label, NULL,
> +                                          first_prob);
>                 }
>               /* For !and_them we want to split:
>                  if (x) goto t; // prob;
> @@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                else
>                 {
>                   profile_probability first_prob = prob.split (cprob);
> -                 do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -                                          size, NULL, if_true_label, first_prob);
> +                 do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +                                          val, mode, size, NULL,
> +                                          if_true_label, first_prob);
>                   if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
>                     {
>                       /* x != y can be split into x unord y || x ltgt y
> @@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>             }
>         }
>
> -      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
> +      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
>                                if_true_label, prob);
>      }
>
> @@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
>        op1 = new_op1;
>      }
>
> -  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
> -                           ((mode == BLKmode)
> -                            ? expr_size (treeop0) : NULL_RTX),
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
> +                          ((mode == BLKmode)
> +                           ? expr_size (treeop0) : NULL_RTX),
>                            if_false_label, if_true_label, prob);
>  }
>
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index f338df410265dfe55b6896160090a453cc6a28d9..0f662ebdb818d7538bdd13fb02bcf8bcf1dbab64 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "libfuncs.h"
>  #include "internal-fn.h"
>  #include "langhooks.h"
> +#include "gimple.h"
> +#include "ssa.h"
>
>  static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
>                                    machine_mode *);
> @@ -4620,7 +4622,7 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
>
>  static void
>  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
> -                         profile_probability prob)
> +                         direct_optab cmp_optab, profile_probability prob)
>  {
>    machine_mode optab_mode;
>    enum mode_class mclass;
> @@ -4629,7 +4631,7 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>
>    mclass = GET_MODE_CLASS (mode);
>    optab_mode = (mclass == MODE_CC) ? CCmode : mode;
> -  icode = optab_handler (cbranch_optab, optab_mode);
> +  icode = optab_handler (cmp_optab, optab_mode);
>
>    gcc_assert (icode != CODE_FOR_nothing);
>    gcc_assert (insn_operand_matches (icode, 0, test));
> @@ -4644,6 +4646,56 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>      add_reg_br_prob_note (insn, prob);
>  }
>
> +/* Check to see if the supplied comparison in PTEST can be performed as a
> +   bit-test-and-branch instead.  VAL must contain the original tree
> +   expression of the non-zero operand which will be used to rewrite the
> +   comparison in PTEST.
> +
> +   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
> +   else FALSE.  */
> +
> +enum insn_code
> +static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode)
> +{
> +  if (!val || TREE_CODE (val) != SSA_NAME)
> +    return CODE_FOR_nothing;
> +
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
> +  rtx test = *ptest;
> +
> +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
> +    return CODE_FOR_nothing;
> +
> +  /* If the target supports the testbit comparison directly, great.  */
> +  auto icode = direct_optab_handler (tbranch_optab, mode);
> +  if (icode == CODE_FOR_nothing)
> +    return icode;
> +
> +  if (tree_zero_one_valued_p (val))
> +    {
> +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;

Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode > word_mode?

> +      XEXP (test, 1) = gen_int_mode (pos, mode);
> +      *ptest = test;
> +      *pmode = mode;
> +      return icode;
> +    }
> +
> +  wide_int wcst = get_nonzero_bits (val);
> +  if (wcst == -1)
> +    return CODE_FOR_nothing;
> +
> +  int bitpos;
> +
> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> +    return CODE_FOR_nothing;
> +
> +  auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
> +  XEXP (test, 1) = gen_int_mode (pos, mode);
> +  *ptest = test;
> +  *pmode = mode;
> +  return icode;
> +}
> +
>  /* Generate code to compare X with Y so that the condition codes are
>     set and to jump to LABEL if the condition is true.  If X is a
>     constant and Y is not a constant, then the comparison is swapped to
> @@ -4661,11 +4713,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>     It will be potentially converted into an unsigned variant based on
>     UNSIGNEDP to select a proper jump instruction.
>
> -   PROB is the probability of jumping to LABEL.  */
> +   PROB is the probability of jumping to LABEL.  If the comparison is against
> +   zero then VAL contains the expression from which the non-zero RTL is
> +   derived.  */
>
>  void
>  emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> -                        machine_mode mode, int unsignedp, rtx label,
> +                        machine_mode mode, int unsignedp, tree val, rtx label,
>                           profile_probability prob)
>  {
>    rtx op0 = x, op1 = y;
> @@ -4690,10 +4744,32 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>
>    prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
>                     &test, &mode);
> -  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
> +
> +  /* Check if we're comparing a truth type with 0, and if so check if
> +     the target supports tbranch.  */
> +  machine_mode tmode = mode;
> +  if (op1 == CONST0_RTX (GET_MODE (op1))
> +      && validate_test_and_branch (val, &test, &tmode) != CODE_FOR_nothing)
> +    {
> +      emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob);
> +      return;
> +    }
> +
> +  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob);
>  }
>
> -
>
> +/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
> +
> +void
> +emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> +                        machine_mode mode, int unsignedp, rtx label,
> +                        profile_probability prob)
> +{
> +  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
> +                          label, prob);
> +}
> +
> +
>  /* Emit a library call comparison between floating point X and Y.
>     COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
>
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index a6db2342bed6baf13ecbd84112c8432c6972e6fe..56e37d67231e1ba74ad6c5b81d74a65f315e26e2 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -220,6 +220,7 @@ OPTAB_D (reload_in_optab, "reload_in$a")
>  OPTAB_D (reload_out_optab, "reload_out$a")
>
>  OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
> +OPTAB_D (tbranch_optab, "tbranch$a4")
>  OPTAB_D (addcc_optab, "add$acc")
>  OPTAB_D (negcc_optab, "neg$acc")
>  OPTAB_D (notcc_optab, "not$acc")
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>                                      machine_mode, int, rtx,
>                                      profile_probability prob
>                                         = profile_probability::uninitialized ());
> +extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
> +                                    machine_mode, int, tree, rtx,
> +                                    profile_probability prob
> +                                       = profile_probability::uninitialized ());
>
>  /* Generate code to indirectly jump to a location given in the rtx LOC.  */
>  extern void emit_indirect_jump (rtx);
> diff --git a/gcc/tree.h b/gcc/tree.h
> index e6564aaccb7b69cd938ff60b6121aec41b7e8a59..f455008ceb8d91e7e073c0ad6d93dcaed65deccf 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4690,6 +4690,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>  extern tree signed_type_for (tree);
>  extern tree unsigned_type_for (tree);
>  extern bool is_truth_type_for (tree, tree);
> +extern bool tree_zero_one_valued_p (tree);
>  extern tree truth_type_for (tree);
>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>  extern tree build_pointer_type (tree);
  
Tamar Christina Dec. 1, 2022, 4:29 p.m. UTC | #10
> > +/* Check to see if the supplied comparison in PTEST can be performed as a
> > +   bit-test-and-branch instead.  VAL must contain the original tree
> > +   expression of the non-zero operand which will be used to rewrite the
> > +   comparison in PTEST.
> > +
> > +   Returns TRUE if operation succeeds and returns updated PMODE and
> PTEST,
> > +   else FALSE.  */
> > +
> > +enum insn_code
> > +static validate_test_and_branch (tree val, rtx *ptest, machine_mode
> > +*pmode) {
> > +  if (!val || TREE_CODE (val) != SSA_NAME)
> > +    return CODE_FOR_nothing;
> > +
> > +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));  rtx test =
> > + *ptest;
> > +
> > +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
> > +    return CODE_FOR_nothing;
> > +
> > +  /* If the target supports the testbit comparison directly, great.
> > + */  auto icode = direct_optab_handler (tbranch_optab, mode);  if
> > + (icode == CODE_FOR_nothing)
> > +    return icode;
> > +
> > +  if (tree_zero_one_valued_p (val))
> > +    {
> > +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
> 
> Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode
> > word_mode?
> 

It does now. In this particular case all that matters is the bit ordering, so I've changed
It to BITS_BIG_ENDIAN.

Also during the review of the AArch64 optab Richard Sandiford wanted me to split the
optabs apart into two.  The reason is that a match_operator still gets the full RTL.

In the case of a tbranch the full RTL has an invalid comparison, so if a target doesn't implement
the hook correctly this would lead to incorrect code.  We've now moved the operator as part of
the name itself to avoid this.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

	* dojump.cc (do_jump): Pass along value.
	(do_jump_by_parts_greater_rtx): Likewise.
	(do_jump_by_parts_zero_rtx): Likewise.
	(do_jump_by_parts_equality_rtx): Likewise.
	(do_compare_rtx_and_jump): Likewise.
	(do_compare_and_jump): Likewise.
	* dojump.h (do_compare_rtx_and_jump): New.
	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
	(validate_test_and_branch): New.
	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
	supplied then check if it's suitable for tbranch.
	* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
	* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
	* optabs.h (emit_cmp_and_jump_insns):
	* tree.h (tree_zero_one_valued_p): New.

--- inline copy of patch ---

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index d0a71ecbb806de3a6564c6ffe973fec5da5c597b..c6c4b13d756de28078a0a779876a00c614246914 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6964,6 +6964,14 @@ case, you can and should make operand 1's predicate reject some operators
 in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
 from the machine description.
 
+@cindex @code{tbranch_@var{op}@var{mode}4} instruction pattern
+@item @samp{tbranch_@var{op}@var{mode}4}
+Conditional branch instruction combined with a bit test-and-compare
+instruction. Operand 0 is a comparison operator.  Operand 1 is the
+operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
+Operand 3 is the @code{code_label} to jump to. @var{op} is one of @var{eq} or
+@var{ne}.
+
 @cindex @code{cbranch@var{mode}4} instruction pattern
 @item @samp{cbranch@var{mode}4}
 Conditional branch instruction combined with a compare instruction.
diff --git a/gcc/dojump.h b/gcc/dojump.h
index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
--- a/gcc/dojump.h
+++ b/gcc/dojump.h
@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
 extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
 			 profile_probability);
 
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
+				     machine_mode, rtx, rtx_code_label *,
+				     rtx_code_label *, profile_probability);
+
 extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
 				     machine_mode, rtx, rtx_code_label *,
 				     rtx_code_label *, profile_probability);
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
--- a/gcc/dojump.cc
+++ b/gcc/dojump.cc
@@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
 	}
       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
-			       GET_MODE (temp), NULL_RTX,
+			       exp, GET_MODE (temp), NULL_RTX,
 			       if_false_label, if_true_label, prob);
     }
 
@@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 
       /* All but high-order word must be compared as unsigned.  */
       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
-			       word_mode, NULL_RTX, NULL, if_true_label,
+			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
 			       prob);
 
       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
@@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 	break;
 
       /* Consider lower words only if these are equal.  */
-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
-			       NULL_RTX, NULL, if_false_label,
+      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
+			       word_mode, NULL_RTX, NULL, if_false_label,
 			       prob.invert ());
     }
 
@@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   if (part != 0)
     {
-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
+      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
 			       NULL_RTX, if_false_label, if_true_label, prob);
       return;
     }
@@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
+			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             operand_subword_force (op1, i, mode),
-                             EQ, 0, word_mode, NULL_RTX,
+			     operand_subword_force (op1, i, mode),
+			     EQ, 0, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 			 rtx_code_label *if_false_label,
 			 rtx_code_label *if_true_label,
 			 profile_probability prob)
+{
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
+			  if_false_label, if_true_label, prob);
+}
+
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
+   The decision as to signed or unsigned comparison must be made by the caller.
+
+   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
+   compared.  */
+
+void
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+			 tree val, machine_mode mode, rtx size,
+			 rtx_code_label *if_false_label,
+			 rtx_code_label *if_true_label,
+			 profile_probability prob)
 {
   rtx tem;
   rtx_code_label *dummy_label = NULL;
@@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 		    }
 		  else
 		    dest_label = if_false_label;
-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, dest_label, NULL, first_prob);
+
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, dest_label, NULL,
+					   first_prob);
 		}
 	      /* For !and_them we want to split:
 		 if (x) goto t; // prob;
@@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
               else
 		{
 		  profile_probability first_prob = prob.split (cprob);
-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, NULL, if_true_label, first_prob);
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, NULL,
+					   if_true_label, first_prob);
 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
 		    {
 		      /* x != y can be split into x unord y || x ltgt y
@@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 	    }
 	}
 
-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
+      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
 			       if_true_label, prob);
     }
 
@@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       op1 = new_op1;
     }
 
-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
-                           ((mode == BLKmode)
-                            ? expr_size (treeop0) : NULL_RTX),
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
+			   ((mode == BLKmode)
+			    ? expr_size (treeop0) : NULL_RTX),
 			   if_false_label, if_true_label, prob);
 }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 31b15fd3df5fa88119867a23d2abbed139a05115..303b4fd2def9278ddbc3d586103ac8274e73a982 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "libfuncs.h"
 #include "internal-fn.h"
 #include "langhooks.h"
+#include "gimple.h"
+#include "ssa.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
 				   machine_mode *);
@@ -4623,7 +4625,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
 
 static void
 emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-			  profile_probability prob)
+			  direct_optab cmp_optab, profile_probability prob,
+			  bool test_branch)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4632,12 +4635,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
 
   mclass = GET_MODE_CLASS (mode);
   optab_mode = (mclass == MODE_CC) ? CCmode : mode;
-  icode = optab_handler (cbranch_optab, optab_mode);
+  icode = optab_handler (cmp_optab, optab_mode);
 
   gcc_assert (icode != CODE_FOR_nothing);
-  gcc_assert (insn_operand_matches (icode, 0, test));
-  insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
-                                          XEXP (test, 1), label));
+  gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
+  if (test_branch)
+    insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
+					    XEXP (test, 1), label));
+  else
+    insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
+					    XEXP (test, 1), label));
+
   if (prob.initialized_p ()
       && profile_status_for_fn (cfun) != PROFILE_ABSENT
       && insn
@@ -4647,6 +4655,63 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
     add_reg_br_prob_note (insn, prob);
 }
 
+/* Check to see if the supplied comparison in PTEST can be performed as a
+   bit-test-and-branch instead.  VAL must contain the original tree
+   expression of the non-zero operand which will be used to rewrite the
+   comparison in PTEST.
+
+   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
+   else FALSE.  */
+
+static enum insn_code
+validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res)
+{
+  if (!val || TREE_CODE (val) != SSA_NAME)
+    return CODE_FOR_nothing;
+
+  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
+  rtx test = *ptest;
+  direct_optab optab;
+
+  if (GET_CODE (test) == EQ)
+    optab = tbranch_eq_optab;
+  else if (GET_CODE (test) == NE)
+    optab = tbranch_ne_optab;
+  else
+    return CODE_FOR_nothing;
+
+  *res = optab;
+
+  /* If the target supports the testbit comparison directly, great.  */
+  auto icode = direct_optab_handler (optab, mode);
+  if (icode == CODE_FOR_nothing)
+    return icode;
+
+  if (tree_zero_one_valued_p (val))
+    {
+      auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
+      XEXP (test, 1) = gen_int_mode (pos, mode);
+      *ptest = test;
+      *pmode = mode;
+      return icode;
+    }
+
+  wide_int wcst = get_nonzero_bits (val);
+  if (wcst == -1)
+    return CODE_FOR_nothing;
+
+  int bitpos;
+
+  if ((bitpos = wi::exact_log2 (wcst)) == -1)
+    return CODE_FOR_nothing;
+
+  auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
+  XEXP (test, 1) = gen_int_mode (pos, mode);
+  *ptest = test;
+  *pmode = mode;
+  return icode;
+}
+
 /* Generate code to compare X with Y so that the condition codes are
    set and to jump to LABEL if the condition is true.  If X is a
    constant and Y is not a constant, then the comparison is swapped to
@@ -4664,11 +4729,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
    It will be potentially converted into an unsigned variant based on
    UNSIGNEDP to select a proper jump instruction.
    
-   PROB is the probability of jumping to LABEL.  */
+   PROB is the probability of jumping to LABEL.  If the comparison is against
+   zero then VAL contains the expression from which the non-zero RTL is
+   derived.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 machine_mode mode, int unsignedp, rtx label,
+			 machine_mode mode, int unsignedp, tree val, rtx label,
                          profile_probability prob)
 {
   rtx op0 = x, op1 = y;
@@ -4693,10 +4760,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
+
+  /* Check if we're comparing a truth type with 0, and if so check if
+     the target supports tbranch.  */
+  machine_mode tmode = mode;
+  direct_optab optab;
+  if (op1 == CONST0_RTX (GET_MODE (op1))
+      && validate_test_and_branch (val, &test, &tmode,
+				   &optab) != CODE_FOR_nothing)
+    {
+      emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
+      return;
+    }
+
+  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
 }
 
-

+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
+
+void
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
+			 machine_mode mode, int unsignedp, rtx label,
+			 profile_probability prob)
+{
+  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
+			   label, prob);
+}
+
+
 /* Emit a library call comparison between floating point X and Y.
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index a6db2342bed6baf13ecbd84112c8432c6972e6fe..3199b05e90d6b9b9c6fb3c0353db3db02321e964 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a")
 OPTAB_D (reload_out_optab, "reload_out$a")
 
 OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
+OPTAB_D (tbranch_eq_optab, "tbranch_eq$a4")
+OPTAB_D (tbranch_ne_optab, "tbranch_ne$a4")
 OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
 				     machine_mode, int, rtx,
 				     profile_probability prob
 					= profile_probability::uninitialized ());
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
+				     machine_mode, int, tree, rtx,
+				     profile_probability prob
+					= profile_probability::uninitialized ());
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
diff --git a/gcc/tree.h b/gcc/tree.h
index a863d2e50e5ecafa3f5da4dda98d9637261d07a9..abedaa80a3983ebb6f9ac733b2eaa8d039688f0a 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4726,6 +4726,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
 extern bool is_truth_type_for (tree, tree);
+extern bool tree_zero_one_valued_p (tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);
  
Richard Biener Dec. 2, 2022, 7:09 a.m. UTC | #11
On Thu, 1 Dec 2022, Tamar Christina wrote:

> > > +/* Check to see if the supplied comparison in PTEST can be performed as a
> > > +   bit-test-and-branch instead.  VAL must contain the original tree
> > > +   expression of the non-zero operand which will be used to rewrite the
> > > +   comparison in PTEST.
> > > +
> > > +   Returns TRUE if operation succeeds and returns updated PMODE and
> > PTEST,
> > > +   else FALSE.  */
> > > +
> > > +enum insn_code
> > > +static validate_test_and_branch (tree val, rtx *ptest, machine_mode
> > > +*pmode) {
> > > +  if (!val || TREE_CODE (val) != SSA_NAME)
> > > +    return CODE_FOR_nothing;
> > > +
> > > +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));  rtx test =
> > > + *ptest;
> > > +
> > > +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
> > > +    return CODE_FOR_nothing;
> > > +
> > > +  /* If the target supports the testbit comparison directly, great.
> > > + */  auto icode = direct_optab_handler (tbranch_optab, mode);  if
> > > + (icode == CODE_FOR_nothing)
> > > +    return icode;
> > > +
> > > +  if (tree_zero_one_valued_p (val))
> > > +    {
> > > +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
> > 
> > Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode
> > > word_mode?
> > 
> 
> It does now. In this particular case all that matters is the bit ordering, so I've changed
> It to BITS_BIG_ENDIAN.

It looks like this would fit indeed.

> Also during the review of the AArch64 optab Richard Sandiford wanted me to split the
> optabs apart into two.  The reason is that a match_operator still gets the full RTL.
> 
> In the case of a tbranch the full RTL has an invalid comparison, so if a target doesn't implement
> the hook correctly this would lead to incorrect code.  We've now moved the operator as part of
> the name itself to avoid this.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> 
> Ok for master?

OK if Richard doesn't have any further comments.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
> 	* dojump.cc (do_jump): Pass along value.
> 	(do_jump_by_parts_greater_rtx): Likewise.
> 	(do_jump_by_parts_zero_rtx): Likewise.
> 	(do_jump_by_parts_equality_rtx): Likewise.
> 	(do_compare_rtx_and_jump): Likewise.
> 	(do_compare_and_jump): Likewise.
> 	* dojump.h (do_compare_rtx_and_jump): New.
> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
> 	(validate_test_and_branch): New.
> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
> 	supplied then check if it's suitable for tbranch.
> 	* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
> 	* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
> 	* optabs.h (emit_cmp_and_jump_insns):
> 	* tree.h (tree_zero_one_valued_p): New.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index d0a71ecbb806de3a6564c6ffe973fec5da5c597b..c6c4b13d756de28078a0a779876a00c614246914 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6964,6 +6964,14 @@ case, you can and should make operand 1's predicate reject some operators
>  in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>  from the machine description.
>  
> +@cindex @code{tbranch_@var{op}@var{mode}4} instruction pattern
> +@item @samp{tbranch_@var{op}@var{mode}4}
> +Conditional branch instruction combined with a bit test-and-compare
> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
> +Operand 3 is the @code{code_label} to jump to. @var{op} is one of @var{eq} or
> +@var{ne}.
> +
>  @cindex @code{cbranch@var{mode}4} instruction pattern
>  @item @samp{cbranch@var{mode}4}
>  Conditional branch instruction combined with a compare instruction.
> diff --git a/gcc/dojump.h b/gcc/dojump.h
> index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
> --- a/gcc/dojump.h
> +++ b/gcc/dojump.h
> @@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
>  extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
>  			 profile_probability);
>  
> +extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
> +				     machine_mode, rtx, rtx_code_label *,
> +				     rtx_code_label *, profile_probability);
> +
>  extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
>  				     machine_mode, rtx, rtx_code_label *,
>  				     rtx_code_label *, profile_probability);
> diff --git a/gcc/dojump.cc b/gcc/dojump.cc
> index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
> --- a/gcc/dojump.cc
> +++ b/gcc/dojump.cc
> @@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
>  	}
>        do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
>  			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
> -			       GET_MODE (temp), NULL_RTX,
> +			       exp, GET_MODE (temp), NULL_RTX,
>  			       if_false_label, if_true_label, prob);
>      }
>  
> @@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>  
>        /* All but high-order word must be compared as unsigned.  */
>        do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
> -			       word_mode, NULL_RTX, NULL, if_true_label,
> +			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
>  			       prob);
>  
>        /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
> @@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>  	break;
>  
>        /* Consider lower words only if these are equal.  */
> -      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
> -			       NULL_RTX, NULL, if_false_label,
> +      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
> +			       word_mode, NULL_RTX, NULL, if_false_label,
>  			       prob.invert ());
>      }
>  
> @@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>  
>    if (part != 0)
>      {
> -      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
> +      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
>  			       NULL_RTX, if_false_label, if_true_label, prob);
>        return;
>      }
> @@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>  
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
> +			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
>  			     if_false_label, NULL, prob);
>  
>    if (if_true_label)
> @@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
>  
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             operand_subword_force (op1, i, mode),
> -                             EQ, 0, word_mode, NULL_RTX,
> +			     operand_subword_force (op1, i, mode),
> +			     EQ, 0, NULL, word_mode, NULL_RTX,
>  			     if_false_label, NULL, prob);
>  
>    if (if_true_label)
> @@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  			 rtx_code_label *if_false_label,
>  			 rtx_code_label *if_true_label,
>  			 profile_probability prob)
> +{
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
> +			  if_false_label, if_true_label, prob);
> +}
> +
> +/* Like do_compare_and_jump but expects the values to compare as two rtx's.
> +   The decision as to signed or unsigned comparison must be made by the caller.
> +
> +   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
> +   compared.  */
> +
> +void
> +do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
> +			 tree val, machine_mode mode, rtx size,
> +			 rtx_code_label *if_false_label,
> +			 rtx_code_label *if_true_label,
> +			 profile_probability prob)
>  {
>    rtx tem;
>    rtx_code_label *dummy_label = NULL;
> @@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  		    }
>  		  else
>  		    dest_label = if_false_label;
> -                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -					   size, dest_label, NULL, first_prob);
> +
> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +					   val, mode, size, dest_label, NULL,
> +					   first_prob);
>  		}
>  	      /* For !and_them we want to split:
>  		 if (x) goto t; // prob;
> @@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                else
>  		{
>  		  profile_probability first_prob = prob.split (cprob);
> -		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -					   size, NULL, if_true_label, first_prob);
> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +					   val, mode, size, NULL,
> +					   if_true_label, first_prob);
>  		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
>  		    {
>  		      /* x != y can be split into x unord y || x ltgt y
> @@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  	    }
>  	}
>  
> -      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
> +      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
>  			       if_true_label, prob);
>      }
>  
> @@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
>        op1 = new_op1;
>      }
>  
> -  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
> -                           ((mode == BLKmode)
> -                            ? expr_size (treeop0) : NULL_RTX),
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
> +			   ((mode == BLKmode)
> +			    ? expr_size (treeop0) : NULL_RTX),
>  			   if_false_label, if_true_label, prob);
>  }
>  
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 31b15fd3df5fa88119867a23d2abbed139a05115..303b4fd2def9278ddbc3d586103ac8274e73a982 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "libfuncs.h"
>  #include "internal-fn.h"
>  #include "langhooks.h"
> +#include "gimple.h"
> +#include "ssa.h"
>  
>  static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
>  				   machine_mode *);
> @@ -4623,7 +4625,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
>  
>  static void
>  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
> -			  profile_probability prob)
> +			  direct_optab cmp_optab, profile_probability prob,
> +			  bool test_branch)
>  {
>    machine_mode optab_mode;
>    enum mode_class mclass;
> @@ -4632,12 +4635,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>  
>    mclass = GET_MODE_CLASS (mode);
>    optab_mode = (mclass == MODE_CC) ? CCmode : mode;
> -  icode = optab_handler (cbranch_optab, optab_mode);
> +  icode = optab_handler (cmp_optab, optab_mode);
>  
>    gcc_assert (icode != CODE_FOR_nothing);
> -  gcc_assert (insn_operand_matches (icode, 0, test));
> -  insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> -                                          XEXP (test, 1), label));
> +  gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
> +  if (test_branch)
> +    insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
> +					    XEXP (test, 1), label));
> +  else
> +    insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> +					    XEXP (test, 1), label));
> +
>    if (prob.initialized_p ()
>        && profile_status_for_fn (cfun) != PROFILE_ABSENT
>        && insn
> @@ -4647,6 +4655,63 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>      add_reg_br_prob_note (insn, prob);
>  }
>  
> +/* Check to see if the supplied comparison in PTEST can be performed as a
> +   bit-test-and-branch instead.  VAL must contain the original tree
> +   expression of the non-zero operand which will be used to rewrite the
> +   comparison in PTEST.
> +
> +   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
> +   else FALSE.  */
> +
> +static enum insn_code
> +validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res)
> +{
> +  if (!val || TREE_CODE (val) != SSA_NAME)
> +    return CODE_FOR_nothing;
> +
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
> +  rtx test = *ptest;
> +  direct_optab optab;
> +
> +  if (GET_CODE (test) == EQ)
> +    optab = tbranch_eq_optab;
> +  else if (GET_CODE (test) == NE)
> +    optab = tbranch_ne_optab;
> +  else
> +    return CODE_FOR_nothing;
> +
> +  *res = optab;
> +
> +  /* If the target supports the testbit comparison directly, great.  */
> +  auto icode = direct_optab_handler (optab, mode);
> +  if (icode == CODE_FOR_nothing)
> +    return icode;
> +
> +  if (tree_zero_one_valued_p (val))
> +    {
> +      auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
> +      XEXP (test, 1) = gen_int_mode (pos, mode);
> +      *ptest = test;
> +      *pmode = mode;
> +      return icode;
> +    }
> +
> +  wide_int wcst = get_nonzero_bits (val);
> +  if (wcst == -1)
> +    return CODE_FOR_nothing;
> +
> +  int bitpos;
> +
> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> +    return CODE_FOR_nothing;
> +
> +  auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
> +  XEXP (test, 1) = gen_int_mode (pos, mode);
> +  *ptest = test;
> +  *pmode = mode;
> +  return icode;
> +}
> +
>  /* Generate code to compare X with Y so that the condition codes are
>     set and to jump to LABEL if the condition is true.  If X is a
>     constant and Y is not a constant, then the comparison is swapped to
> @@ -4664,11 +4729,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>     It will be potentially converted into an unsigned variant based on
>     UNSIGNEDP to select a proper jump instruction.
>     
> -   PROB is the probability of jumping to LABEL.  */
> +   PROB is the probability of jumping to LABEL.  If the comparison is against
> +   zero then VAL contains the expression from which the non-zero RTL is
> +   derived.  */
>  
>  void
>  emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> -			 machine_mode mode, int unsignedp, rtx label,
> +			 machine_mode mode, int unsignedp, tree val, rtx label,
>                           profile_probability prob)
>  {
>    rtx op0 = x, op1 = y;
> @@ -4693,10 +4760,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>  
>    prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
>  		    &test, &mode);
> -  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
> +
> +  /* Check if we're comparing a truth type with 0, and if so check if
> +     the target supports tbranch.  */
> +  machine_mode tmode = mode;
> +  direct_optab optab;
> +  if (op1 == CONST0_RTX (GET_MODE (op1))
> +      && validate_test_and_branch (val, &test, &tmode,
> +				   &optab) != CODE_FOR_nothing)
> +    {
> +      emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
> +      return;
> +    }
> +
> +  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
>  }
>  
> -
> 
> +/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
> +
> +void
> +emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> +			 machine_mode mode, int unsignedp, rtx label,
> +			 profile_probability prob)
> +{
> +  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
> +			   label, prob);
> +}
> +
> +
>  /* Emit a library call comparison between floating point X and Y.
>     COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
>  
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index a6db2342bed6baf13ecbd84112c8432c6972e6fe..3199b05e90d6b9b9c6fb3c0353db3db02321e964 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a")
>  OPTAB_D (reload_out_optab, "reload_out$a")
>  
>  OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
> +OPTAB_D (tbranch_eq_optab, "tbranch_eq$a4")
> +OPTAB_D (tbranch_ne_optab, "tbranch_ne$a4")
>  OPTAB_D (addcc_optab, "add$acc")
>  OPTAB_D (negcc_optab, "neg$acc")
>  OPTAB_D (notcc_optab, "not$acc")
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>  				     machine_mode, int, rtx,
>  				     profile_probability prob
>  					= profile_probability::uninitialized ());
> +extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
> +				     machine_mode, int, tree, rtx,
> +				     profile_probability prob
> +					= profile_probability::uninitialized ());
>  
>  /* Generate code to indirectly jump to a location given in the rtx LOC.  */
>  extern void emit_indirect_jump (rtx);
> diff --git a/gcc/tree.h b/gcc/tree.h
> index a863d2e50e5ecafa3f5da4dda98d9637261d07a9..abedaa80a3983ebb6f9ac733b2eaa8d039688f0a 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4726,6 +4726,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>  extern tree signed_type_for (tree);
>  extern tree unsigned_type_for (tree);
>  extern bool is_truth_type_for (tree, tree);
> +extern bool tree_zero_one_valued_p (tree);
>  extern tree truth_type_for (tree);
>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>  extern tree build_pointer_type (tree);
>
  
Richard Sandiford Dec. 5, 2022, noon UTC | #12
Tamar Christina via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>> > +/* Check to see if the supplied comparison in PTEST can be performed as a
>> > +   bit-test-and-branch instead.  VAL must contain the original tree
>> > +   expression of the non-zero operand which will be used to rewrite the
>> > +   comparison in PTEST.
>> > +
>> > +   Returns TRUE if operation succeeds and returns updated PMODE and
>> PTEST,
>> > +   else FALSE.  */
>> > +
>> > +enum insn_code
>> > +static validate_test_and_branch (tree val, rtx *ptest, machine_mode
>> > +*pmode) {
>> > +  if (!val || TREE_CODE (val) != SSA_NAME)
>> > +    return CODE_FOR_nothing;
>> > +
>> > +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));  rtx test =
>> > + *ptest;
>> > +
>> > +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
>> > +    return CODE_FOR_nothing;
>> > +
>> > +  /* If the target supports the testbit comparison directly, great.
>> > + */  auto icode = direct_optab_handler (tbranch_optab, mode);  if
>> > + (icode == CODE_FOR_nothing)
>> > +    return icode;
>> > +
>> > +  if (tree_zero_one_valued_p (val))
>> > +    {
>> > +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
>> 
>> Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode
>> > word_mode?
>> 
>
> It does now. In this particular case all that matters is the bit ordering, so I've changed
> It to BITS_BIG_ENDIAN.
>
> Also during the review of the AArch64 optab Richard Sandiford wanted me to split the
> optabs apart into two.  The reason is that a match_operator still gets the full RTL.
>
> In the case of a tbranch the full RTL has an invalid comparison, so if a target doesn't implement
> the hook correctly this would lead to incorrect code.  We've now moved the operator as part of
> the name itself to avoid this.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> 	* dojump.cc (do_jump): Pass along value.
> 	(do_jump_by_parts_greater_rtx): Likewise.
> 	(do_jump_by_parts_zero_rtx): Likewise.
> 	(do_jump_by_parts_equality_rtx): Likewise.
> 	(do_compare_rtx_and_jump): Likewise.
> 	(do_compare_and_jump): Likewise.
> 	* dojump.h (do_compare_rtx_and_jump): New.
> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
> 	(validate_test_and_branch): New.
> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
> 	supplied then check if it's suitable for tbranch.
> 	* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
> 	* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
> 	* optabs.h (emit_cmp_and_jump_insns):
> 	* tree.h (tree_zero_one_valued_p): New.

Thanks for doing this.

> --- inline copy of patch ---
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index d0a71ecbb806de3a6564c6ffe973fec5da5c597b..c6c4b13d756de28078a0a779876a00c614246914 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6964,6 +6964,14 @@ case, you can and should make operand 1's predicate reject some operators
>  in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>  from the machine description.
>  
> +@cindex @code{tbranch_@var{op}@var{mode}4} instruction pattern
> +@item @samp{tbranch_@var{op}@var{mode}4}
> +Conditional branch instruction combined with a bit test-and-compare
> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
> +Operand 3 is the @code{code_label} to jump to. @var{op} is one of @var{eq} or
> +@var{ne}.
> +

The documentation still describes the old interface.  Also, there are only 3
operands now, rather than 4, so the optab name should end with 3.

>  @cindex @code{cbranch@var{mode}4} instruction pattern
>  @item @samp{cbranch@var{mode}4}
>  Conditional branch instruction combined with a compare instruction.
> diff --git a/gcc/dojump.h b/gcc/dojump.h
> index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
> --- a/gcc/dojump.h
> +++ b/gcc/dojump.h
> @@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
>  extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
>  			 profile_probability);
>  
> +extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
> +				     machine_mode, rtx, rtx_code_label *,
> +				     rtx_code_label *, profile_probability);
> +
>  extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
>  				     machine_mode, rtx, rtx_code_label *,
>  				     rtx_code_label *, profile_probability);
> diff --git a/gcc/dojump.cc b/gcc/dojump.cc
> index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
> --- a/gcc/dojump.cc
> +++ b/gcc/dojump.cc
> @@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
>  	}
>        do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
>  			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
> -			       GET_MODE (temp), NULL_RTX,
> +			       exp, GET_MODE (temp), NULL_RTX,
>  			       if_false_label, if_true_label, prob);
>      }
>  
> @@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>  
>        /* All but high-order word must be compared as unsigned.  */
>        do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
> -			       word_mode, NULL_RTX, NULL, if_true_label,
> +			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
>  			       prob);
>  
>        /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
> @@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>  	break;
>  
>        /* Consider lower words only if these are equal.  */
> -      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
> -			       NULL_RTX, NULL, if_false_label,
> +      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
> +			       word_mode, NULL_RTX, NULL, if_false_label,
>  			       prob.invert ());
>      }
>  
> @@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>  
>    if (part != 0)
>      {
> -      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
> +      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
>  			       NULL_RTX, if_false_label, if_true_label, prob);
>        return;
>      }
> @@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>  
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
> +			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
>  			     if_false_label, NULL, prob);
>  
>    if (if_true_label)
> @@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
>  
>    for (i = 0; i < nwords; i++)
>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
> -                             operand_subword_force (op1, i, mode),
> -                             EQ, 0, word_mode, NULL_RTX,
> +			     operand_subword_force (op1, i, mode),
> +			     EQ, 0, NULL, word_mode, NULL_RTX,
>  			     if_false_label, NULL, prob);
>  
>    if (if_true_label)
> @@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  			 rtx_code_label *if_false_label,
>  			 rtx_code_label *if_true_label,
>  			 profile_probability prob)
> +{
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
> +			  if_false_label, if_true_label, prob);
> +}
> +
> +/* Like do_compare_and_jump but expects the values to compare as two rtx's.
> +   The decision as to signed or unsigned comparison must be made by the caller.
> +
> +   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
> +   compared.  */
> +
> +void
> +do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
> +			 tree val, machine_mode mode, rtx size,
> +			 rtx_code_label *if_false_label,
> +			 rtx_code_label *if_true_label,
> +			 profile_probability prob)
>  {
>    rtx tem;
>    rtx_code_label *dummy_label = NULL;
> @@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  		    }
>  		  else
>  		    dest_label = if_false_label;
> -                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -					   size, dest_label, NULL, first_prob);
> +
> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +					   val, mode, size, dest_label, NULL,
> +					   first_prob);
>  		}
>  	      /* For !and_them we want to split:
>  		 if (x) goto t; // prob;
> @@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>                else
>  		{
>  		  profile_probability first_prob = prob.split (cprob);
> -		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
> -					   size, NULL, if_true_label, first_prob);
> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
> +					   val, mode, size, NULL,
> +					   if_true_label, first_prob);
>  		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
>  		    {
>  		      /* x != y can be split into x unord y || x ltgt y
> @@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>  	    }
>  	}
>  
> -      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
> +      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
>  			       if_true_label, prob);
>      }
>  
> @@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
>        op1 = new_op1;
>      }
>  
> -  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
> -                           ((mode == BLKmode)
> -                            ? expr_size (treeop0) : NULL_RTX),
> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
> +			   ((mode == BLKmode)
> +			    ? expr_size (treeop0) : NULL_RTX),
>  			   if_false_label, if_true_label, prob);
>  }
>  
> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
> index 31b15fd3df5fa88119867a23d2abbed139a05115..303b4fd2def9278ddbc3d586103ac8274e73a982 100644
> --- a/gcc/optabs.cc
> +++ b/gcc/optabs.cc
> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "libfuncs.h"
>  #include "internal-fn.h"
>  #include "langhooks.h"
> +#include "gimple.h"
> +#include "ssa.h"
>  
>  static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
>  				   machine_mode *);
> @@ -4623,7 +4625,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
>  
>  static void
>  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
> -			  profile_probability prob)
> +			  direct_optab cmp_optab, profile_probability prob,
> +			  bool test_branch)
>  {
>    machine_mode optab_mode;
>    enum mode_class mclass;
> @@ -4632,12 +4635,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>  
>    mclass = GET_MODE_CLASS (mode);
>    optab_mode = (mclass == MODE_CC) ? CCmode : mode;
> -  icode = optab_handler (cbranch_optab, optab_mode);
> +  icode = optab_handler (cmp_optab, optab_mode);
>  
>    gcc_assert (icode != CODE_FOR_nothing);
> -  gcc_assert (insn_operand_matches (icode, 0, test));
> -  insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> -                                          XEXP (test, 1), label));
> +  gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
> +  if (test_branch)
> +    insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
> +					    XEXP (test, 1), label));
> +  else
> +    insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
> +					    XEXP (test, 1), label));
> +
>    if (prob.initialized_p ()
>        && profile_status_for_fn (cfun) != PROFILE_ABSENT
>        && insn
> @@ -4647,6 +4655,63 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>      add_reg_br_prob_note (insn, prob);
>  }
>  
> +/* Check to see if the supplied comparison in PTEST can be performed as a
> +   bit-test-and-branch instead.  VAL must contain the original tree
> +   expression of the non-zero operand which will be used to rewrite the
> +   comparison in PTEST.
> +
> +   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
> +   else FALSE.  */

The function now returns an icode rather than true/false.  I think it'd
also be good to clarify what *PTEST means for the tbranch case.  How about:

/* PTEST points to a comparison that compares its first operand with zero.
   Check to see if it can be performed as a bit-test-and-branch instead.
   On success, return the instruction that performs the bit-and-test-and-branch
   and replace the second operand of *PTEST with the bit number to test.
   On failure, return CODE_FOR_nothing and leave *PTEST unchanged.

   Note that the comparison described by *PTEST should not be taken
   literally after a successful return.  *PTEST is just a convenient
   place to store the two operands of the bit-and-test.

   VAL must contain the original tree expression for the first operand
   of *PTEST.  */

Looks good to me otherwise.

Thanks,
Richard

> +static enum insn_code
> +validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res)
> +{
> +  if (!val || TREE_CODE (val) != SSA_NAME)
> +    return CODE_FOR_nothing;
> +
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
> +  rtx test = *ptest;
> +  direct_optab optab;
> +
> +  if (GET_CODE (test) == EQ)
> +    optab = tbranch_eq_optab;
> +  else if (GET_CODE (test) == NE)
> +    optab = tbranch_ne_optab;
> +  else
> +    return CODE_FOR_nothing;
> +
> +  *res = optab;
> +
> +  /* If the target supports the testbit comparison directly, great.  */
> +  auto icode = direct_optab_handler (optab, mode);
> +  if (icode == CODE_FOR_nothing)
> +    return icode;
> +
> +  if (tree_zero_one_valued_p (val))
> +    {
> +      auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
> +      XEXP (test, 1) = gen_int_mode (pos, mode);
> +      *ptest = test;
> +      *pmode = mode;
> +      return icode;
> +    }
> +
> +  wide_int wcst = get_nonzero_bits (val);
> +  if (wcst == -1)
> +    return CODE_FOR_nothing;
> +
> +  int bitpos;
> +
> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
> +    return CODE_FOR_nothing;
> +
> +  auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
> +  XEXP (test, 1) = gen_int_mode (pos, mode);
> +  *ptest = test;
> +  *pmode = mode;
> +  return icode;
> +}
> +
>  /* Generate code to compare X with Y so that the condition codes are
>     set and to jump to LABEL if the condition is true.  If X is a
>     constant and Y is not a constant, then the comparison is swapped to
> @@ -4664,11 +4729,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>     It will be potentially converted into an unsigned variant based on
>     UNSIGNEDP to select a proper jump instruction.
>     
> -   PROB is the probability of jumping to LABEL.  */
> +   PROB is the probability of jumping to LABEL.  If the comparison is against
> +   zero then VAL contains the expression from which the non-zero RTL is
> +   derived.  */
>  
>  void
>  emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> -			 machine_mode mode, int unsignedp, rtx label,
> +			 machine_mode mode, int unsignedp, tree val, rtx label,
>                           profile_probability prob)
>  {
>    rtx op0 = x, op1 = y;
> @@ -4693,10 +4760,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>  
>    prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
>  		    &test, &mode);
> -  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
> +
> +  /* Check if we're comparing a truth type with 0, and if so check if
> +     the target supports tbranch.  */
> +  machine_mode tmode = mode;
> +  direct_optab optab;
> +  if (op1 == CONST0_RTX (GET_MODE (op1))
> +      && validate_test_and_branch (val, &test, &tmode,
> +				   &optab) != CODE_FOR_nothing)
> +    {
> +      emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
> +      return;
> +    }
> +
> +  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
>  }
>  
> -
>
> +/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
> +
> +void
> +emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
> +			 machine_mode mode, int unsignedp, rtx label,
> +			 profile_probability prob)
> +{
> +  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
> +			   label, prob);
> +}
> +
> +
>  /* Emit a library call comparison between floating point X and Y.
>     COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
>  
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index a6db2342bed6baf13ecbd84112c8432c6972e6fe..3199b05e90d6b9b9c6fb3c0353db3db02321e964 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a")
>  OPTAB_D (reload_out_optab, "reload_out$a")
>  
>  OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
> +OPTAB_D (tbranch_eq_optab, "tbranch_eq$a4")
> +OPTAB_D (tbranch_ne_optab, "tbranch_ne$a4")
>  OPTAB_D (addcc_optab, "add$acc")
>  OPTAB_D (negcc_optab, "neg$acc")
>  OPTAB_D (notcc_optab, "not$acc")
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>  				     machine_mode, int, rtx,
>  				     profile_probability prob
>  					= profile_probability::uninitialized ());
> +extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
> +				     machine_mode, int, tree, rtx,
> +				     profile_probability prob
> +					= profile_probability::uninitialized ());
>  
>  /* Generate code to indirectly jump to a location given in the rtx LOC.  */
>  extern void emit_indirect_jump (rtx);
> diff --git a/gcc/tree.h b/gcc/tree.h
> index a863d2e50e5ecafa3f5da4dda98d9637261d07a9..abedaa80a3983ebb6f9ac733b2eaa8d039688f0a 100644
> --- a/gcc/tree.h
> +++ b/gcc/tree.h
> @@ -4726,6 +4726,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>  extern tree signed_type_for (tree);
>  extern tree unsigned_type_for (tree);
>  extern bool is_truth_type_for (tree, tree);
> +extern bool tree_zero_one_valued_p (tree);
>  extern tree truth_type_for (tree);
>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>  extern tree build_pointer_type (tree);
  
Richard Sandiford Dec. 5, 2022, 1:14 p.m. UTC | #13
Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Tamar Christina via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>> > +/* Check to see if the supplied comparison in PTEST can be performed as a
>>> > +   bit-test-and-branch instead.  VAL must contain the original tree
>>> > +   expression of the non-zero operand which will be used to rewrite the
>>> > +   comparison in PTEST.
>>> > +
>>> > +   Returns TRUE if operation succeeds and returns updated PMODE and
>>> PTEST,
>>> > +   else FALSE.  */
>>> > +
>>> > +enum insn_code
>>> > +static validate_test_and_branch (tree val, rtx *ptest, machine_mode
>>> > +*pmode) {
>>> > +  if (!val || TREE_CODE (val) != SSA_NAME)
>>> > +    return CODE_FOR_nothing;
>>> > +
>>> > +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));  rtx test =
>>> > + *ptest;
>>> > +
>>> > +  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
>>> > +    return CODE_FOR_nothing;
>>> > +
>>> > +  /* If the target supports the testbit comparison directly, great.
>>> > + */  auto icode = direct_optab_handler (tbranch_optab, mode);  if
>>> > + (icode == CODE_FOR_nothing)
>>> > +    return icode;
>>> > +
>>> > +  if (tree_zero_one_valued_p (val))
>>> > +    {
>>> > +      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
>>> 
>>> Does this work for BYTES_BIG_ENDIAN && !WORDS_BIG_ENDIAN and mode
>>> > word_mode?
>>> 
>>
>> It does now. In this particular case all that matters is the bit ordering, so I've changed
>> It to BITS_BIG_ENDIAN.
>>
>> Also during the review of the AArch64 optab Richard Sandiford wanted me to split the
>> optabs apart into two.  The reason is that a match_operator still gets the full RTL.
>>
>> In the case of a tbranch the full RTL has an invalid comparison, so if a target doesn't implement
>> the hook correctly this would lead to incorrect code.  We've now moved the operator as part of
>> the name itself to avoid this.
>>
>> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>>
>> Ok for master?
>>
>> Thanks,
>> Tamar
>>
>> gcc/ChangeLog:
>>
>> 	* dojump.cc (do_jump): Pass along value.
>> 	(do_jump_by_parts_greater_rtx): Likewise.
>> 	(do_jump_by_parts_zero_rtx): Likewise.
>> 	(do_jump_by_parts_equality_rtx): Likewise.
>> 	(do_compare_rtx_and_jump): Likewise.
>> 	(do_compare_and_jump): Likewise.
>> 	* dojump.h (do_compare_rtx_and_jump): New.
>> 	* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
>> 	(validate_test_and_branch): New.
>> 	(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
>> 	supplied then check if it's suitable for tbranch.
>> 	* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
>> 	* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
>> 	* optabs.h (emit_cmp_and_jump_insns):
>> 	* tree.h (tree_zero_one_valued_p): New.
>
> Thanks for doing this.
>
>> --- inline copy of patch ---
>>
>> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
>> index d0a71ecbb806de3a6564c6ffe973fec5da5c597b..c6c4b13d756de28078a0a779876a00c614246914 100644
>> --- a/gcc/doc/md.texi
>> +++ b/gcc/doc/md.texi
>> @@ -6964,6 +6964,14 @@ case, you can and should make operand 1's predicate reject some operators
>>  in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
>>  from the machine description.
>>  
>> +@cindex @code{tbranch_@var{op}@var{mode}4} instruction pattern
>> +@item @samp{tbranch_@var{op}@var{mode}4}
>> +Conditional branch instruction combined with a bit test-and-compare
>> +instruction. Operand 0 is a comparison operator.  Operand 1 is the
>> +operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
>> +Operand 3 is the @code{code_label} to jump to. @var{op} is one of @var{eq} or
>> +@var{ne}.
>> +
>
> The documentation still describes the old interface.  Also, there are only 3
> operands now, rather than 4, so the optab name should end with 3.
>
>>  @cindex @code{cbranch@var{mode}4} instruction pattern
>>  @item @samp{cbranch@var{mode}4}
>>  Conditional branch instruction combined with a compare instruction.
>> diff --git a/gcc/dojump.h b/gcc/dojump.h
>> index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
>> --- a/gcc/dojump.h
>> +++ b/gcc/dojump.h
>> @@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
>>  extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
>>  			 profile_probability);
>>  
>> +extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
>> +				     machine_mode, rtx, rtx_code_label *,
>> +				     rtx_code_label *, profile_probability);
>> +
>>  extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
>>  				     machine_mode, rtx, rtx_code_label *,
>>  				     rtx_code_label *, profile_probability);
>> diff --git a/gcc/dojump.cc b/gcc/dojump.cc
>> index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
>> --- a/gcc/dojump.cc
>> +++ b/gcc/dojump.cc
>> @@ -619,7 +619,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
>>  	}
>>        do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
>>  			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
>> -			       GET_MODE (temp), NULL_RTX,
>> +			       exp, GET_MODE (temp), NULL_RTX,
>>  			       if_false_label, if_true_label, prob);
>>      }
>>  
>> @@ -687,7 +687,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>>  
>>        /* All but high-order word must be compared as unsigned.  */
>>        do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
>> -			       word_mode, NULL_RTX, NULL, if_true_label,
>> +			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
>>  			       prob);
>>  
>>        /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
>> @@ -695,8 +695,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
>>  	break;
>>  
>>        /* Consider lower words only if these are equal.  */
>> -      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
>> -			       NULL_RTX, NULL, if_false_label,
>> +      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
>> +			       word_mode, NULL_RTX, NULL, if_false_label,
>>  			       prob.invert ());
>>      }
>>  
>> @@ -755,7 +755,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>>  
>>    if (part != 0)
>>      {
>> -      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
>> +      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
>>  			       NULL_RTX, if_false_label, if_true_label, prob);
>>        return;
>>      }
>> @@ -766,7 +766,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
>>  
>>    for (i = 0; i < nwords; i++)
>>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
>> -                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
>> +			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
>>  			     if_false_label, NULL, prob);
>>  
>>    if (if_true_label)
>> @@ -809,8 +809,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
>>  
>>    for (i = 0; i < nwords; i++)
>>      do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
>> -                             operand_subword_force (op1, i, mode),
>> -                             EQ, 0, word_mode, NULL_RTX,
>> +			     operand_subword_force (op1, i, mode),
>> +			     EQ, 0, NULL, word_mode, NULL_RTX,
>>  			     if_false_label, NULL, prob);
>>  
>>    if (if_true_label)
>> @@ -962,6 +962,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>>  			 rtx_code_label *if_false_label,
>>  			 rtx_code_label *if_true_label,
>>  			 profile_probability prob)
>> +{
>> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
>> +			  if_false_label, if_true_label, prob);
>> +}
>> +
>> +/* Like do_compare_and_jump but expects the values to compare as two rtx's.
>> +   The decision as to signed or unsigned comparison must be made by the caller.
>> +
>> +   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
>> +   compared.  */
>> +
>> +void
>> +do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>> +			 tree val, machine_mode mode, rtx size,
>> +			 rtx_code_label *if_false_label,
>> +			 rtx_code_label *if_true_label,
>> +			 profile_probability prob)
>>  {
>>    rtx tem;
>>    rtx_code_label *dummy_label = NULL;
>> @@ -1177,8 +1194,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>>  		    }
>>  		  else
>>  		    dest_label = if_false_label;
>> -                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
>> -					   size, dest_label, NULL, first_prob);
>> +
>> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
>> +					   val, mode, size, dest_label, NULL,
>> +					   first_prob);
>>  		}
>>  	      /* For !and_them we want to split:
>>  		 if (x) goto t; // prob;
>> @@ -1192,8 +1211,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>>                else
>>  		{
>>  		  profile_probability first_prob = prob.split (cprob);
>> -		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
>> -					   size, NULL, if_true_label, first_prob);
>> +		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
>> +					   val, mode, size, NULL,
>> +					   if_true_label, first_prob);
>>  		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
>>  		    {
>>  		      /* x != y can be split into x unord y || x ltgt y
>> @@ -1215,7 +1235,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
>>  	    }
>>  	}
>>  
>> -      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
>> +      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
>>  			       if_true_label, prob);
>>      }
>>  
>> @@ -1289,9 +1309,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
>>        op1 = new_op1;
>>      }
>>  
>> -  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
>> -                           ((mode == BLKmode)
>> -                            ? expr_size (treeop0) : NULL_RTX),
>> +  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
>> +			   ((mode == BLKmode)
>> +			    ? expr_size (treeop0) : NULL_RTX),
>>  			   if_false_label, if_true_label, prob);
>>  }
>>  
>> diff --git a/gcc/optabs.cc b/gcc/optabs.cc
>> index 31b15fd3df5fa88119867a23d2abbed139a05115..303b4fd2def9278ddbc3d586103ac8274e73a982 100644
>> --- a/gcc/optabs.cc
>> +++ b/gcc/optabs.cc
>> @@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "libfuncs.h"
>>  #include "internal-fn.h"
>>  #include "langhooks.h"
>> +#include "gimple.h"
>> +#include "ssa.h"
>>  
>>  static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
>>  				   machine_mode *);
>> @@ -4623,7 +4625,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
>>  
>>  static void
>>  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>> -			  profile_probability prob)
>> +			  direct_optab cmp_optab, profile_probability prob,
>> +			  bool test_branch)
>>  {
>>    machine_mode optab_mode;
>>    enum mode_class mclass;
>> @@ -4632,12 +4635,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>>  
>>    mclass = GET_MODE_CLASS (mode);
>>    optab_mode = (mclass == MODE_CC) ? CCmode : mode;
>> -  icode = optab_handler (cbranch_optab, optab_mode);
>> +  icode = optab_handler (cmp_optab, optab_mode);
>>  
>>    gcc_assert (icode != CODE_FOR_nothing);
>> -  gcc_assert (insn_operand_matches (icode, 0, test));
>> -  insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
>> -                                          XEXP (test, 1), label));
>> +  gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
>> +  if (test_branch)
>> +    insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
>> +					    XEXP (test, 1), label));
>> +  else
>> +    insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
>> +					    XEXP (test, 1), label));
>> +
>>    if (prob.initialized_p ()
>>        && profile_status_for_fn (cfun) != PROFILE_ABSENT
>>        && insn
>> @@ -4647,6 +4655,63 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>>      add_reg_br_prob_note (insn, prob);
>>  }
>>  
>> +/* Check to see if the supplied comparison in PTEST can be performed as a
>> +   bit-test-and-branch instead.  VAL must contain the original tree
>> +   expression of the non-zero operand which will be used to rewrite the
>> +   comparison in PTEST.
>> +
>> +   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
>> +   else FALSE.  */
>
> The function now returns an icode rather than true/false.  I think it'd
> also be good to clarify what *PTEST means for the tbranch case.  How about:
>
> /* PTEST points to a comparison that compares its first operand with zero.
>    Check to see if it can be performed as a bit-test-and-branch instead.
>    On success, return the instruction that performs the bit-and-test-and-branch

(bit-test-and-branch)

>    and replace the second operand of *PTEST with the bit number to test.
>    On failure, return CODE_FOR_nothing and leave *PTEST unchanged.
>
>    Note that the comparison described by *PTEST should not be taken
>    literally after a successful return.  *PTEST is just a convenient
>    place to store the two operands of the bit-and-test.
>
>    VAL must contain the original tree expression for the first operand
>    of *PTEST.  */
>
> Looks good to me otherwise.
>
> Thanks,
> Richard
>
>> +static enum insn_code
>> +validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res)
>> +{
>> +  if (!val || TREE_CODE (val) != SSA_NAME)
>> +    return CODE_FOR_nothing;
>> +
>> +  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
>> +  rtx test = *ptest;
>> +  direct_optab optab;
>> +
>> +  if (GET_CODE (test) == EQ)
>> +    optab = tbranch_eq_optab;
>> +  else if (GET_CODE (test) == NE)
>> +    optab = tbranch_ne_optab;
>> +  else
>> +    return CODE_FOR_nothing;
>> +
>> +  *res = optab;
>> +
>> +  /* If the target supports the testbit comparison directly, great.  */
>> +  auto icode = direct_optab_handler (optab, mode);
>> +  if (icode == CODE_FOR_nothing)
>> +    return icode;
>> +
>> +  if (tree_zero_one_valued_p (val))
>> +    {
>> +      auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
>> +      XEXP (test, 1) = gen_int_mode (pos, mode);
>> +      *ptest = test;
>> +      *pmode = mode;
>> +      return icode;
>> +    }
>> +
>> +  wide_int wcst = get_nonzero_bits (val);
>> +  if (wcst == -1)
>> +    return CODE_FOR_nothing;
>> +
>> +  int bitpos;
>> +
>> +  if ((bitpos = wi::exact_log2 (wcst)) == -1)
>> +    return CODE_FOR_nothing;
>> +
>> +  auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
>> +  XEXP (test, 1) = gen_int_mode (pos, mode);
>> +  *ptest = test;
>> +  *pmode = mode;
>> +  return icode;
>> +}
>> +
>>  /* Generate code to compare X with Y so that the condition codes are
>>     set and to jump to LABEL if the condition is true.  If X is a
>>     constant and Y is not a constant, then the comparison is swapped to
>> @@ -4664,11 +4729,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
>>     It will be potentially converted into an unsigned variant based on
>>     UNSIGNEDP to select a proper jump instruction.
>>     
>> -   PROB is the probability of jumping to LABEL.  */
>> +   PROB is the probability of jumping to LABEL.  If the comparison is against
>> +   zero then VAL contains the expression from which the non-zero RTL is
>> +   derived.  */
>>  
>>  void
>>  emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>> -			 machine_mode mode, int unsignedp, rtx label,
>> +			 machine_mode mode, int unsignedp, tree val, rtx label,
>>                           profile_probability prob)
>>  {
>>    rtx op0 = x, op1 = y;
>> @@ -4693,10 +4760,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>>  
>>    prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
>>  		    &test, &mode);
>> -  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
>> +
>> +  /* Check if we're comparing a truth type with 0, and if so check if
>> +     the target supports tbranch.  */
>> +  machine_mode tmode = mode;
>> +  direct_optab optab;
>> +  if (op1 == CONST0_RTX (GET_MODE (op1))
>> +      && validate_test_and_branch (val, &test, &tmode,
>> +				   &optab) != CODE_FOR_nothing)
>> +    {
>> +      emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
>> +      return;
>> +    }
>> +
>> +  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
>>  }
>>  
>> -
>>
>> +/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
>> +
>> +void
>> +emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
>> +			 machine_mode mode, int unsignedp, rtx label,
>> +			 profile_probability prob)
>> +{
>> +  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
>> +			   label, prob);
>> +}
>> +
>> +
>>  /* Emit a library call comparison between floating point X and Y.
>>     COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
>>  
>> diff --git a/gcc/optabs.def b/gcc/optabs.def
>> index a6db2342bed6baf13ecbd84112c8432c6972e6fe..3199b05e90d6b9b9c6fb3c0353db3db02321e964 100644
>> --- a/gcc/optabs.def
>> +++ b/gcc/optabs.def
>> @@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a")
>>  OPTAB_D (reload_out_optab, "reload_out$a")
>>  
>>  OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
>> +OPTAB_D (tbranch_eq_optab, "tbranch_eq$a4")
>> +OPTAB_D (tbranch_ne_optab, "tbranch_ne$a4")
>>  OPTAB_D (addcc_optab, "add$acc")
>>  OPTAB_D (negcc_optab, "neg$acc")
>>  OPTAB_D (notcc_optab, "not$acc")
>> diff --git a/gcc/optabs.h b/gcc/optabs.h
>> index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
>> --- a/gcc/optabs.h
>> +++ b/gcc/optabs.h
>> @@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>>  				     machine_mode, int, rtx,
>>  				     profile_probability prob
>>  					= profile_probability::uninitialized ());
>> +extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
>> +				     machine_mode, int, tree, rtx,
>> +				     profile_probability prob
>> +					= profile_probability::uninitialized ());
>>  
>>  /* Generate code to indirectly jump to a location given in the rtx LOC.  */
>>  extern void emit_indirect_jump (rtx);
>> diff --git a/gcc/tree.h b/gcc/tree.h
>> index a863d2e50e5ecafa3f5da4dda98d9637261d07a9..abedaa80a3983ebb6f9ac733b2eaa8d039688f0a 100644
>> --- a/gcc/tree.h
>> +++ b/gcc/tree.h
>> @@ -4726,6 +4726,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
>>  extern tree signed_type_for (tree);
>>  extern tree unsigned_type_for (tree);
>>  extern bool is_truth_type_for (tree, tree);
>> +extern bool tree_zero_one_valued_p (tree);
>>  extern tree truth_type_for (tree);
>>  extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
>>  extern tree build_pointer_type (tree);
  

Patch

--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6972,6 +6972,13 @@  case, you can and should make operand 1's predicate reject some operators
 in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
 from the machine description.
 
+@cindex @code{tbranch@var{mode}4} instruction pattern
+@item @samp{tbranch@var{mode}4}
+Conditional branch instruction combined with a bit test-and-compare
+instruction. Operand 0 is a comparison operator.  Operand 1 is the
+operand of the comparison. Operand 2 is the bit position of Operand 1 to test.
+Operand 3 is the @code{code_label} to jump to.
+
 @cindex @code{cbranch@var{mode}4} instruction pattern
 @item @samp{cbranch@var{mode}4}
 Conditional branch instruction combined with a compare instruction.
diff --git a/gcc/dojump.h b/gcc/dojump.h
index e379cceb34bb1765cb575636e4c05b61501fc2cf..d1d79c490c420a805fe48d58740a79c1f25fb839 100644
--- a/gcc/dojump.h
+++ b/gcc/dojump.h
@@ -71,6 +71,10 @@  extern void jumpifnot (tree exp, rtx_code_label *label,
 extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
 			 profile_probability);
 
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
+				     machine_mode, rtx, rtx_code_label *,
+				     rtx_code_label *, profile_probability);
+
 extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
 				     machine_mode, rtx, rtx_code_label *,
 				     rtx_code_label *, profile_probability);
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
index 2af0cd1aca3b6af13d5d8799094ee93f18022296..190324f36f1a31990f8c49bc8c0f45c23da5c31e 100644
--- a/gcc/dojump.cc
+++ b/gcc/dojump.cc
@@ -619,7 +619,7 @@  do_jump (tree exp, rtx_code_label *if_false_label,
 	}
       do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
 			       NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
-			       GET_MODE (temp), NULL_RTX,
+			       exp, GET_MODE (temp), NULL_RTX,
 			       if_false_label, if_true_label, prob);
     }
 
@@ -687,7 +687,7 @@  do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 
       /* All but high-order word must be compared as unsigned.  */
       do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
-			       word_mode, NULL_RTX, NULL, if_true_label,
+			       NULL, word_mode, NULL_RTX, NULL, if_true_label,
 			       prob);
 
       /* Emit only one comparison for 0.  Do not emit the last cond jump.  */
@@ -695,8 +695,8 @@  do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
 	break;
 
       /* Consider lower words only if these are equal.  */
-      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
-			       NULL_RTX, NULL, if_false_label,
+      do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
+			       word_mode, NULL_RTX, NULL, if_false_label,
 			       prob.invert ());
     }
 
@@ -755,7 +755,7 @@  do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   if (part != 0)
     {
-      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
+      do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
 			       NULL_RTX, if_false_label, if_true_label, prob);
       return;
     }
@@ -766,7 +766,7 @@  do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             const0_rtx, EQ, 1, word_mode, NULL_RTX,
+			     const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -809,8 +809,8 @@  do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
 
   for (i = 0; i < nwords; i++)
     do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
-                             operand_subword_force (op1, i, mode),
-                             EQ, 0, word_mode, NULL_RTX,
+			     operand_subword_force (op1, i, mode),
+			     EQ, 0, NULL, word_mode, NULL_RTX,
 			     if_false_label, NULL, prob);
 
   if (if_true_label)
@@ -962,6 +962,23 @@  do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 			 rtx_code_label *if_false_label,
 			 rtx_code_label *if_true_label,
 			 profile_probability prob)
+{
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
+			  if_false_label, if_true_label, prob);
+}
+
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
+   The decision as to signed or unsigned comparison must be made by the caller.
+
+   If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
+   compared.  */
+
+void
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
+			 tree val, machine_mode mode, rtx size,
+			 rtx_code_label *if_false_label,
+			 rtx_code_label *if_true_label,
+			 profile_probability prob)
 {
   rtx tem;
   rtx_code_label *dummy_label = NULL;
@@ -1177,8 +1194,10 @@  do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 		    }
 		  else
 		    dest_label = if_false_label;
-                  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, dest_label, NULL, first_prob);
+
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, dest_label, NULL,
+					   first_prob);
 		}
 	      /* For !and_them we want to split:
 		 if (x) goto t; // prob;
@@ -1192,8 +1211,9 @@  do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
               else
 		{
 		  profile_probability first_prob = prob.split (cprob);
-		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
-					   size, NULL, if_true_label, first_prob);
+		  do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
+					   val, mode, size, NULL,
+					   if_true_label, first_prob);
 		  if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
 		    {
 		      /* x != y can be split into x unord y || x ltgt y
@@ -1215,7 +1235,7 @@  do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
 	    }
 	}
 
-      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
+      emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
 			       if_true_label, prob);
     }
 
@@ -1289,9 +1309,9 @@  do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
       op1 = new_op1;
     }
 
-  do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
-                           ((mode == BLKmode)
-                            ? expr_size (treeop0) : NULL_RTX),
+  do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
+			   ((mode == BLKmode)
+			    ? expr_size (treeop0) : NULL_RTX),
 			   if_false_label, if_true_label, prob);
 }
 
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index cff37ccb0dfc3dd79b97d0abfd872f340855dc96..5b368f77e91d3fce29870f1a5b54a0301e6b7794 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -46,6 +46,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "libfuncs.h"
 #include "internal-fn.h"
 #include "langhooks.h"
+#include "gimple.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
 				   machine_mode *);
@@ -4620,7 +4621,7 @@  prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
 
 static void
 emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-			  profile_probability prob)
+			  direct_optab cmp_optab, profile_probability prob)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4629,7 +4630,7 @@  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
 
   mclass = GET_MODE_CLASS (mode);
   optab_mode = (mclass == MODE_CC) ? CCmode : mode;
-  icode = optab_handler (cbranch_optab, optab_mode);
+  icode = optab_handler (cmp_optab, optab_mode);
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (insn_operand_matches (icode, 0, test));
@@ -4644,6 +4645,71 @@  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
     add_reg_br_prob_note (insn, prob);
 }
 
+/* Check to see if the supplied comparison in PTEST can be performed as a
+   bit-test-and-branch instead.  VAL must contain the original tree
+   expression of the non-zero operand which will be used to rewrite the
+   comparison in PTEST.
+
+   Returns TRUE if operation succeeds and returns updated PMODE and PTEST,
+   else FALSE.  */
+
+bool
+static validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode)
+{
+  if (!val)
+    return false;
+
+  machine_mode mode = TYPE_MODE (TREE_TYPE (val));
+  rtx test = *ptest;
+
+  if (GET_CODE (test) != EQ && GET_CODE (test) != NE)
+    return false;
+
+  if (tree_zero_one_valued_p (val))
+    {
+      auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
+      XEXP (test, 1) = gen_int_mode (pos, mode);
+      *ptest = test;
+      *pmode = mode;
+      return true;
+    }
+
+  if (TREE_CODE (val) != SSA_NAME)
+    return false;
+
+  gimple *def = SSA_NAME_DEF_STMT (val);
+  if (!is_gimple_assign (def)
+      || gimple_assign_rhs_code (def) != BIT_AND_EXPR)
+    return false;
+
+  tree cst = gimple_assign_rhs2 (def);
+
+  if (!tree_fits_uhwi_p (cst))
+    return false;
+
+  tree op0 = gimple_assign_rhs1 (def);
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      def = SSA_NAME_DEF_STMT (op0);
+      if (gimple_assign_cast_p (def))
+	op0 = gimple_assign_rhs1 (def);
+    }
+
+  wide_int wcst = wi::uhwi (tree_to_uhwi (cst),
+			    TYPE_PRECISION (TREE_TYPE (op0)));
+  int bitpos;
+
+  if ((bitpos = wi::exact_log2 (wcst)) == -1)
+    return false;
+
+  mode = TYPE_MODE (TREE_TYPE (op0));
+  auto pos = BYTES_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
+  XEXP (test, 1) = gen_int_mode (pos, mode);
+  *ptest = test;
+  *pmode = mode;
+  return true;
+}
+
 /* Generate code to compare X with Y so that the condition codes are
    set and to jump to LABEL if the condition is true.  If X is a
    constant and Y is not a constant, then the comparison is swapped to
@@ -4661,15 +4727,18 @@  emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
    It will be potentially converted into an unsigned variant based on
    UNSIGNEDP to select a proper jump instruction.
    
-   PROB is the probability of jumping to LABEL.  */
+   PROB is the probability of jumping to LABEL.  If the comparison is against
+   zero then VAL contains the expression from which the non-zero RTL is
+   derived.  */
 
 void
 emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
-			 machine_mode mode, int unsignedp, rtx label,
+			 machine_mode mode, int unsignedp, tree val, rtx label,
                          profile_probability prob)
 {
   rtx op0 = x, op1 = y;
   rtx test;
+  enum insn_code icode;
 
   /* Swap operands and condition to ensure canonical RTL.  */
   if (swap_commutative_operands_p (x, y)
@@ -4690,10 +4759,37 @@  emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
 
   prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
 		    &test, &mode);
-  emit_cmp_and_jump_insn_1 (test, mode, label, prob);
+
+  /* Check if we're comparing a truth type with 0, and if so check if
+     the target supports tbranch.  */
+  machine_mode tmode = mode;
+  if (op1 == CONST0_RTX (GET_MODE (op1))
+      && validate_test_and_branch (val, &test, &tmode))
+    {
+      /* If the target supports the testbit comparison directly, great.  */
+      icode = direct_optab_handler (tbranch_optab, tmode);
+      if (icode != CODE_FOR_nothing)
+	{
+	  emit_cmp_and_jump_insn_1 (test, tmode, label, tbranch_optab, prob);
+	  return;
+	}
+    }
+
+  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob);
 }
 
-
+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
+
+void
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
+			 machine_mode mode, int unsignedp, rtx label,
+			 profile_probability prob)
+{
+  emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
+			   label, prob);
+}
+
+
 /* Emit a library call comparison between floating point X and Y.
    COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).  */
 
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 9947aed67fb8a3b675cb0aab9aeb059f89644106..623a596aca2f538a03602e02e6ac12f43f3303c4 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -220,6 +220,7 @@  OPTAB_D (reload_in_optab, "reload_in$a")
 OPTAB_D (reload_out_optab, "reload_out$a")
 
 OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
+OPTAB_D (tbranch_optab, "tbranch$a4")
 OPTAB_D (addcc_optab, "add$acc")
 OPTAB_D (negcc_optab, "neg$acc")
 OPTAB_D (notcc_optab, "not$acc")
diff --git a/gcc/optabs.h b/gcc/optabs.h
index cfd7c742d2d21b0539f5227c22a94f32c793d6f7..cd55604bc3d452d7e28c5530bb4793d481766f4f 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -268,6 +268,10 @@  extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
 				     machine_mode, int, rtx,
 				     profile_probability prob
 					= profile_probability::uninitialized ());
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
+				     machine_mode, int, tree, rtx,
+				     profile_probability prob
+					= profile_probability::uninitialized ());
 
 /* Generate code to indirectly jump to a location given in the rtx LOC.  */
 extern void emit_indirect_jump (rtx);
diff --git a/gcc/tree.h b/gcc/tree.h
index 8f8a9660c9e0605eb516de194640b8c1b531b798..be3d2dee82f692e81082cf21c878c10f9fe9e1f1 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4690,6 +4690,7 @@  extern tree signed_or_unsigned_type_for (int, tree);
 extern tree signed_type_for (tree);
 extern tree unsigned_type_for (tree);
 extern bool is_truth_type_for (tree, tree);
+extern bool tree_zero_one_valued_p (tree);
 extern tree truth_type_for (tree);
 extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
 extern tree build_pointer_type (tree);