expand: Add trivial folding for bit query builtins at expansion time [PR114044]

Message ID Zd2akG9XxLXMWdk2@tucnak
State New
Headers
Series expand: Add trivial folding for bit query builtins at expansion time [PR114044] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Jakub Jelinek Feb. 27, 2024, 8:17 a.m. UTC
  Hi!

While it seems a lot of places in various optimization passes fold
bit query internal functions with INTEGER_CST arguments to INTEGER_CST
when there is a lhs, when lhs is missing, all the removals of such dead
stmts are guarded with -ftree-dce, so with -fno-tree-dce those unfolded
ifn calls remain in the IL until expansion.  If they have large/huge
BITINT_TYPE arguments, there is no BLKmode optab and so expansion ICEs,
and bitint lowering doesn't touch such calls because it doesn't know they
need touching, functions only containing those will not even be further
processed by the pass because there are no non-small BITINT_TYPE SSA_NAMEs
+ the 2 exceptions (stores of BITINT_TYPE INTEGER_CSTs and conversions
from BITINT_TYPE INTEGER_CSTs to floating point SSA_NAMEs) and when walking
there is no special case for calls with BITINT_TYPE INTEGER_CSTs either,
those are for normal calls normally handled at expansion time.

So, the following patch adjust the expansion of these 6 ifns, by doing
nothing if there is no lhs, and also just in case and user disabled all
possible passes that would fold this handles the case of setting lhs
to ifn call with INTEGER_CST argument.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-02-27  Jakub Jelinek  <jakub@redhat.com>

	PR rtl-optimization/114044
	* internal-fn.def (CLRSB, CLZ, CTZ, FFS, PARITY): Use
	DEF_INTERNAL_INT_EXT_FN macro rather than DEF_INTERNAL_INT_FN.
	* internal-fn.h (expand_CLRSB, expand_CLZ, expand_CTZ, expand_FFS,
	expand_PARITY): Declare.
	* internal-fn.cc (expand_bitquery, expand_CLRSB, expand_CLZ,
	expand_CTZ, expand_FFS, expand_PARITY): New functions.
	(expand_POPCOUNT): Use expand_bitquery.

	* gcc.dg/bitint-95.c: New test.


	Jakub
  

Comments

Richard Biener Feb. 27, 2024, 8:35 a.m. UTC | #1
On Tue, 27 Feb 2024, Jakub Jelinek wrote:

> Hi!
> 
> While it seems a lot of places in various optimization passes fold
> bit query internal functions with INTEGER_CST arguments to INTEGER_CST
> when there is a lhs, when lhs is missing, all the removals of such dead
> stmts are guarded with -ftree-dce, so with -fno-tree-dce those unfolded
> ifn calls remain in the IL until expansion.  If they have large/huge
> BITINT_TYPE arguments, there is no BLKmode optab and so expansion ICEs,
> and bitint lowering doesn't touch such calls because it doesn't know they
> need touching, functions only containing those will not even be further
> processed by the pass because there are no non-small BITINT_TYPE SSA_NAMEs
> + the 2 exceptions (stores of BITINT_TYPE INTEGER_CSTs and conversions
> from BITINT_TYPE INTEGER_CSTs to floating point SSA_NAMEs) and when walking
> there is no special case for calls with BITINT_TYPE INTEGER_CSTs either,
> those are for normal calls normally handled at expansion time.
> 
> So, the following patch adjust the expansion of these 6 ifns, by doing
> nothing if there is no lhs, and also just in case and user disabled all
> possible passes that would fold this handles the case of setting lhs
> to ifn call with INTEGER_CST argument.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

I do wonder whether we can handle the missing LHS case generically
in the direct optab expander for fns that are PURE or CONST?

Thanks,
Richard.

> 2024-02-27  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR rtl-optimization/114044
> 	* internal-fn.def (CLRSB, CLZ, CTZ, FFS, PARITY): Use
> 	DEF_INTERNAL_INT_EXT_FN macro rather than DEF_INTERNAL_INT_FN.
> 	* internal-fn.h (expand_CLRSB, expand_CLZ, expand_CTZ, expand_FFS,
> 	expand_PARITY): Declare.
> 	* internal-fn.cc (expand_bitquery, expand_CLRSB, expand_CLZ,
> 	expand_CTZ, expand_FFS, expand_PARITY): New functions.
> 	(expand_POPCOUNT): Use expand_bitquery.
> 
> 	* gcc.dg/bitint-95.c: New test.
> 
> --- gcc/internal-fn.def.jj	2024-01-03 11:51:32.411718977 +0100
> +++ gcc/internal-fn.def	2024-02-26 13:20:51.682005459 +0100
> @@ -440,11 +440,11 @@ DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS, ECF_
>  DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS_CONJ, ECF_CONST, cmls_conj, ternary)
>  
>  /* Unary integer ops.  */
> -DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary)
> -DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary)
> -DEF_INTERNAL_INT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary)
> -DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary)
> -DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
> +DEF_INTERNAL_INT_EXT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary)
> +DEF_INTERNAL_INT_EXT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary)
> +DEF_INTERNAL_INT_EXT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary)
> +DEF_INTERNAL_INT_EXT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary)
> +DEF_INTERNAL_INT_EXT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
>  DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
>  
>  DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
> --- gcc/internal-fn.h.jj	2024-01-03 11:51:28.313775852 +0100
> +++ gcc/internal-fn.h	2024-02-26 13:22:09.532917080 +0100
> @@ -262,6 +262,11 @@ extern void expand_MULBITINT (internal_f
>  extern void expand_DIVMODBITINT (internal_fn, gcall *);
>  extern void expand_FLOATTOBITINT (internal_fn, gcall *);
>  extern void expand_BITINTTOFLOAT (internal_fn, gcall *);
> +extern void expand_CLRSB (internal_fn, gcall *);
> +extern void expand_CLZ (internal_fn, gcall *);
> +extern void expand_CTZ (internal_fn, gcall *);
> +extern void expand_FFS (internal_fn, gcall *);
> +extern void expand_PARITY (internal_fn, gcall *);
>  extern void expand_POPCOUNT (internal_fn, gcall *);
>  
>  extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
> --- gcc/internal-fn.cc.jj	2024-01-05 11:16:49.568173526 +0100
> +++ gcc/internal-fn.cc	2024-02-26 14:03:58.265848223 +0100
> @@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.
>  #include "explow.h"
>  #include "rtl-iter.h"
>  #include "gimple-range.h"
> +#include "fold-const-call.h"
>  
>  /* For lang_hooks.types.type_for_mode.  */
>  #include "langhooks.h"
> @@ -5107,9 +5108,63 @@ expand_BITINTTOFLOAT (internal_fn, gcall
>      emit_move_insn (target, val);
>  }
>  
> +static bool
> +expand_bitquery (internal_fn fn, gcall *stmt)
> +{
> +  tree lhs = gimple_call_lhs (stmt);
> +  if (lhs == NULL_TREE)
> +    return false;
> +  tree arg = gimple_call_arg (stmt, 0);
> +  if (TREE_CODE (arg) == INTEGER_CST)
> +    {
> +      tree ret = fold_const_call (as_combined_fn (fn), TREE_TYPE (arg), arg);
> +      gcc_checking_assert (ret && TREE_CODE (ret) == INTEGER_CST);
> +      expand_assignment (lhs, ret, false);
> +      return false;
> +    }
> +  return true;
> +}
> +
> +void
> +expand_CLRSB (internal_fn fn, gcall *stmt)
> +{
> +  if (expand_bitquery (fn, stmt))
> +    expand_unary_optab_fn (fn, stmt, clrsb_optab);
> +}
> +
> +void
> +expand_CLZ (internal_fn fn, gcall *stmt)
> +{
> +  if (expand_bitquery (fn, stmt))
> +    expand_unary_optab_fn (fn, stmt, clz_optab);
> +}
> +
> +void
> +expand_CTZ (internal_fn fn, gcall *stmt)
> +{
> +  if (expand_bitquery (fn, stmt))
> +    expand_unary_optab_fn (fn, stmt, ctz_optab);
> +}
> +
> +void
> +expand_FFS (internal_fn fn, gcall *stmt)
> +{
> +  if (expand_bitquery (fn, stmt))
> +    expand_unary_optab_fn (fn, stmt, ffs_optab);
> +}
> +
> +void
> +expand_PARITY (internal_fn fn, gcall *stmt)
> +{
> +  if (expand_bitquery (fn, stmt))
> +    expand_unary_optab_fn (fn, stmt, parity_optab);
> +}
> +
>  void
>  expand_POPCOUNT (internal_fn fn, gcall *stmt)
>  {
> +  if (!expand_bitquery (fn, stmt))
> +    return;
>    if (gimple_call_num_args (stmt) == 1)
>      {
>        expand_unary_optab_fn (fn, stmt, popcount_optab);
> --- gcc/testsuite/gcc.dg/bitint-95.c.jj	2024-02-26 16:16:09.192065858 +0100
> +++ gcc/testsuite/gcc.dg/bitint-95.c	2024-02-26 14:19:30.079824133 +0100
> @@ -0,0 +1,45 @@
> +/* PR rtl-optimization/114044 */
> +/* { dg-do compile { target bitint575 } } */
> +/* { dg-options "-O -fno-tree-dce" } */
> +
> +void
> +foo (void)
> +{
> +  unsigned _BitInt (575) a = 3;
> +  __builtin_clzg (a);
> +}
> +
> +void
> +bar (void)
> +{
> +  unsigned _BitInt (575) a = 3;
> +  __builtin_ctzg (a);
> +}
> +
> +void
> +baz (void)
> +{
> +  signed _BitInt (575) a = 3;
> +  __builtin_clrsbg (a);
> +}
> +
> +void
> +qux (void)
> +{
> +  signed _BitInt (575) a = 3;
> +  __builtin_ffsg (a);
> +}
> +
> +void
> +garply (void)
> +{
> +  unsigned _BitInt (575) a = 3;
> +  __builtin_parityg (a);
> +}
> +
> +void
> +corge (void)
> +{
> +  unsigned _BitInt (575) a = 3;
> +  __builtin_popcountg (a);
> +}
> 
> 	Jakub
> 
>
  
Jakub Jelinek Feb. 27, 2024, 8:44 a.m. UTC | #2
On Tue, Feb 27, 2024 at 09:35:43AM +0100, Richard Biener wrote:
> I do wonder whether we can handle the missing LHS case generically
> in the direct optab expander for fns that are PURE or CONST?

Maybe the 2 operand expand_internal_call could do it before handing over
to individual expanders.  Can that wait for stage1?

	Jakub
  
Richard Biener Feb. 27, 2024, 8:51 a.m. UTC | #3
On Tue, 27 Feb 2024, Jakub Jelinek wrote:

> On Tue, Feb 27, 2024 at 09:35:43AM +0100, Richard Biener wrote:
> > I do wonder whether we can handle the missing LHS case generically
> > in the direct optab expander for fns that are PURE or CONST?
> 
> Maybe the 2 operand expand_internal_call could do it before handing over
> to individual expanders.  Can that wait for stage1?

Sure, unless you think this is a better fix at this stage (not doing
the constant folding).

Richard.
  
Jakub Jelinek Feb. 27, 2024, 8:54 a.m. UTC | #4
On Tue, Feb 27, 2024 at 09:51:05AM +0100, Richard Biener wrote:
> On Tue, 27 Feb 2024, Jakub Jelinek wrote:
> 
> > On Tue, Feb 27, 2024 at 09:35:43AM +0100, Richard Biener wrote:
> > > I do wonder whether we can handle the missing LHS case generically
> > > in the direct optab expander for fns that are PURE or CONST?
> > 
> > Maybe the 2 operand expand_internal_call could do it before handing over
> > to individual expanders.  Can that wait for stage1?
> 
> Sure, unless you think this is a better fix at this stage (not doing
> the constant folding).

The constant folding is there for the case where there is a lhs.
So, if expand_internal_call gets added NULL lhs handling,
expand_bitquery could just remove it, but the rest would stay.
And for other ifns, there is no known issue with the NULL lhs, so that
would be just an cleanup with some risks.

	Jakub
  

Patch

--- gcc/internal-fn.def.jj	2024-01-03 11:51:32.411718977 +0100
+++ gcc/internal-fn.def	2024-02-26 13:20:51.682005459 +0100
@@ -440,11 +440,11 @@  DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS, ECF_
 DEF_INTERNAL_OPTAB_FN (COMPLEX_FMS_CONJ, ECF_CONST, cmls_conj, ternary)
 
 /* Unary integer ops.  */
-DEF_INTERNAL_INT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary)
-DEF_INTERNAL_INT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary)
-DEF_INTERNAL_INT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary)
-DEF_INTERNAL_INT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary)
-DEF_INTERNAL_INT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
+DEF_INTERNAL_INT_EXT_FN (CLRSB, ECF_CONST | ECF_NOTHROW, clrsb, unary)
+DEF_INTERNAL_INT_EXT_FN (CLZ, ECF_CONST | ECF_NOTHROW, clz, unary)
+DEF_INTERNAL_INT_EXT_FN (CTZ, ECF_CONST | ECF_NOTHROW, ctz, unary)
+DEF_INTERNAL_INT_EXT_FN (FFS, ECF_CONST | ECF_NOTHROW, ffs, unary)
+DEF_INTERNAL_INT_EXT_FN (PARITY, ECF_CONST | ECF_NOTHROW, parity, unary)
 DEF_INTERNAL_INT_EXT_FN (POPCOUNT, ECF_CONST | ECF_NOTHROW, popcount, unary)
 
 DEF_INTERNAL_FN (GOMP_TARGET_REV, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
--- gcc/internal-fn.h.jj	2024-01-03 11:51:28.313775852 +0100
+++ gcc/internal-fn.h	2024-02-26 13:22:09.532917080 +0100
@@ -262,6 +262,11 @@  extern void expand_MULBITINT (internal_f
 extern void expand_DIVMODBITINT (internal_fn, gcall *);
 extern void expand_FLOATTOBITINT (internal_fn, gcall *);
 extern void expand_BITINTTOFLOAT (internal_fn, gcall *);
+extern void expand_CLRSB (internal_fn, gcall *);
+extern void expand_CLZ (internal_fn, gcall *);
+extern void expand_CTZ (internal_fn, gcall *);
+extern void expand_FFS (internal_fn, gcall *);
+extern void expand_PARITY (internal_fn, gcall *);
 extern void expand_POPCOUNT (internal_fn, gcall *);
 
 extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
--- gcc/internal-fn.cc.jj	2024-01-05 11:16:49.568173526 +0100
+++ gcc/internal-fn.cc	2024-02-26 14:03:58.265848223 +0100
@@ -52,6 +52,7 @@  along with GCC; see the file COPYING3.
 #include "explow.h"
 #include "rtl-iter.h"
 #include "gimple-range.h"
+#include "fold-const-call.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -5107,9 +5108,63 @@  expand_BITINTTOFLOAT (internal_fn, gcall
     emit_move_insn (target, val);
 }
 
+static bool
+expand_bitquery (internal_fn fn, gcall *stmt)
+{
+  tree lhs = gimple_call_lhs (stmt);
+  if (lhs == NULL_TREE)
+    return false;
+  tree arg = gimple_call_arg (stmt, 0);
+  if (TREE_CODE (arg) == INTEGER_CST)
+    {
+      tree ret = fold_const_call (as_combined_fn (fn), TREE_TYPE (arg), arg);
+      gcc_checking_assert (ret && TREE_CODE (ret) == INTEGER_CST);
+      expand_assignment (lhs, ret, false);
+      return false;
+    }
+  return true;
+}
+
+void
+expand_CLRSB (internal_fn fn, gcall *stmt)
+{
+  if (expand_bitquery (fn, stmt))
+    expand_unary_optab_fn (fn, stmt, clrsb_optab);
+}
+
+void
+expand_CLZ (internal_fn fn, gcall *stmt)
+{
+  if (expand_bitquery (fn, stmt))
+    expand_unary_optab_fn (fn, stmt, clz_optab);
+}
+
+void
+expand_CTZ (internal_fn fn, gcall *stmt)
+{
+  if (expand_bitquery (fn, stmt))
+    expand_unary_optab_fn (fn, stmt, ctz_optab);
+}
+
+void
+expand_FFS (internal_fn fn, gcall *stmt)
+{
+  if (expand_bitquery (fn, stmt))
+    expand_unary_optab_fn (fn, stmt, ffs_optab);
+}
+
+void
+expand_PARITY (internal_fn fn, gcall *stmt)
+{
+  if (expand_bitquery (fn, stmt))
+    expand_unary_optab_fn (fn, stmt, parity_optab);
+}
+
 void
 expand_POPCOUNT (internal_fn fn, gcall *stmt)
 {
+  if (!expand_bitquery (fn, stmt))
+    return;
   if (gimple_call_num_args (stmt) == 1)
     {
       expand_unary_optab_fn (fn, stmt, popcount_optab);
--- gcc/testsuite/gcc.dg/bitint-95.c.jj	2024-02-26 16:16:09.192065858 +0100
+++ gcc/testsuite/gcc.dg/bitint-95.c	2024-02-26 14:19:30.079824133 +0100
@@ -0,0 +1,45 @@ 
+/* PR rtl-optimization/114044 */
+/* { dg-do compile { target bitint575 } } */
+/* { dg-options "-O -fno-tree-dce" } */
+
+void
+foo (void)
+{
+  unsigned _BitInt (575) a = 3;
+  __builtin_clzg (a);
+}
+
+void
+bar (void)
+{
+  unsigned _BitInt (575) a = 3;
+  __builtin_ctzg (a);
+}
+
+void
+baz (void)
+{
+  signed _BitInt (575) a = 3;
+  __builtin_clrsbg (a);
+}
+
+void
+qux (void)
+{
+  signed _BitInt (575) a = 3;
+  __builtin_ffsg (a);
+}
+
+void
+garply (void)
+{
+  unsigned _BitInt (575) a = 3;
+  __builtin_parityg (a);
+}
+
+void
+corge (void)
+{
+  unsigned _BitInt (575) a = 3;
+  __builtin_popcountg (a);
+}