[v3] Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]

Message ID d0d0e0d8-8b94-a84c-cb67-c94e772b766b@linux.ibm.com
State New
Headers
Series [v3] Modify combine pattern by a pseudo AND with its nonzero bits [PR93453] |

Commit Message

HAO CHEN GUI July 22, 2022, 7:07 a.m. UTC
  Hi,
  This patch creates a new function - change_pseudo_and_mask. If recog fails,
the function converts a single pseudo to the pseudo AND with a mask if the
outer operator is IOR/XOR/PLUS and inner operator is ASHIFT or AND. The
conversion helps pattern to match rotate and mask insn on some targets.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-07-22  Haochen Gui  <guihaoc@linux.ibm.com>

gcc/
	PR target/93453
	* combine.cc (change_pseudo_and_mask): New.
	(recog_for_combine): If recog fails, try again with the pattern
	modified by change_pseudo_and_mask.
	* config/rs6000/rs6000.md (plus_ior_xor): Remove.
	(anonymous split pattern for plus_ior_xor): Remove.

gcc/testsuite/
	PR target/93453
	* gcc.target/powerpc/pr93453-2.c: New.
	* gcc.target/powerpc/rlwimi-2.c: Both 32/64 bit platforms generate the
	same number of rlwimi.  Reset the counter.

patch.diff
  

Comments

HAO CHEN GUI Aug. 1, 2022, 2 a.m. UTC | #1
Hi,
   Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598685.html
Thanks.

On 22/7/2022 下午 3:07, HAO CHEN GUI wrote:
> Hi,
>   This patch creates a new function - change_pseudo_and_mask. If recog fails,
> the function converts a single pseudo to the pseudo AND with a mask if the
> outer operator is IOR/XOR/PLUS and inner operator is ASHIFT or AND. The
> conversion helps pattern to match rotate and mask insn on some targets.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-07-22  Haochen Gui  <guihaoc@linux.ibm.com>
> 
> gcc/
> 	PR target/93453
> 	* combine.cc (change_pseudo_and_mask): New.
> 	(recog_for_combine): If recog fails, try again with the pattern
> 	modified by change_pseudo_and_mask.
> 	* config/rs6000/rs6000.md (plus_ior_xor): Remove.
> 	(anonymous split pattern for plus_ior_xor): Remove.
> 
> gcc/testsuite/
> 	PR target/93453
> 	* gcc.target/powerpc/pr93453-2.c: New.
> 	* gcc.target/powerpc/rlwimi-2.c: Both 32/64 bit platforms generate the
> 	same number of rlwimi.  Reset the counter.
> 
> patch.diff
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index a5fabf397f7..e1c1aa7da1c 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -11599,6 +11599,48 @@ change_zero_ext (rtx pat)
>    return changed;
>  }
> 
> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
> +   ASHIFT/AND, convert a pseudo to pseudo AND with a mask if its nonzero_bits
> +   is less than its mode mask.  The nonzero_bits in later passes is not a
> +   superset of what is known in combine pass.  So an insn with nonzero_bits
> +   can't be recoged later.  */
> +static bool
> +change_pseudo_and_mask (rtx pat)
> +{
> +  rtx src = SET_SRC (pat);
> +  if ((GET_CODE (src) == IOR
> +       || GET_CODE (src) == XOR
> +       || GET_CODE (src) == PLUS)
> +      && (((GET_CODE (XEXP (src, 0)) == ASHIFT
> +	    || GET_CODE (XEXP (src, 0)) == AND)
> +	   && REG_P (XEXP (src, 1)))))
> +    {
> +      rtx reg = XEXP (src, 1);
> +      machine_mode mode = GET_MODE (reg);
> +      unsigned HOST_WIDE_INT nonzero = nonzero_bits (reg, mode);
> +      if (nonzero < GET_MODE_MASK (mode))
> +	{
> +	  int shift;
> +
> +	  if (GET_CODE (XEXP (src, 0)) == ASHIFT)
> +	    shift = INTVAL (XEXP (XEXP (src, 0), 1));
> +	  else
> +	    shift = ctz_hwi (INTVAL (XEXP (XEXP (src, 0), 1)));
> +
> +	  if (shift > 0
> +	      && (HOST_WIDE_INT_1U << shift) - 1 >= nonzero)
> +	    {
> +	      unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << shift) - 1;
> +	      rtx x = gen_rtx_AND (mode, reg, GEN_INT (mask));
> +	      SUBST (XEXP (SET_SRC (pat), 1), x);
> +	      maybe_swap_commutative_operands (SET_SRC (pat));
> +	      return true;
> +	    }
> +	}
> +    }
> +  return false;
> +}
> +
>  /* Like recog, but we receive the address of a pointer to a new pattern.
>     We try to match the rtx that the pointer points to.
>     If that fails, we may try to modify or replace the pattern,
> @@ -11646,7 +11688,10 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
>  	    }
>  	}
>        else
> -	changed = change_zero_ext (pat);
> +	{
> +	  changed = change_pseudo_and_mask (pat);
> +	  changed |= change_zero_ext (pat);
> +	}
>      }
>    else if (GET_CODE (pat) == PARALLEL)
>      {
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 1367a2cb779..2bd6bd5f908 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -4207,24 +4207,6 @@ (define_insn_and_split "*rotl<mode>3_insert_3_<code>"
>  	(ior:GPR (and:GPR (match_dup 3) (match_dup 4))
>  		 (ashift:GPR (match_dup 1) (match_dup 2))))])
> 
> -(define_code_iterator plus_ior_xor [plus ior xor])
> -
> -(define_split
> -  [(set (match_operand:GPR 0 "gpc_reg_operand")
> -	(plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
> -				      (match_operand:SI 2 "const_int_operand"))
> -			  (match_operand:GPR 3 "gpc_reg_operand")))]
> -  "nonzero_bits (operands[3], <MODE>mode)
> -   < HOST_WIDE_INT_1U << INTVAL (operands[2])"
> -  [(set (match_dup 0)
> -	(ior:GPR (and:GPR (match_dup 3)
> -			  (match_dup 4))
> -		 (ashift:GPR (match_dup 1)
> -			     (match_dup 2))))]
> -{
> -  operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
> -})
> -
>  (define_insn "*rotlsi3_insert_4"
>    [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
>  	(ior:SI (and:SI (match_operand:SI 3 "gpc_reg_operand" "0")
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-2.c b/gcc/testsuite/gcc.target/powerpc/pr93453-2.c
> new file mode 100644
> index 00000000000..a83a6511653
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr93453-2.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +long foo (char a, long b)
> +{
> +  long c = a;
> +  c = c | (b << 12);
> +  return c;
> +}
> +
> +long bar (long b, char a)
> +{
> +  long c = a;
> +  long m = -4096;
> +  c = c | (b & m);
> +  return c;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mrl[wd]imi\M} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> index bafa371db73..d4dadacc6cc 100644
> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> @@ -8,8 +8,7 @@
>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */
> 
> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 } } */
> 
>  /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */
> 
> 
>
  
Segher Boessenkool Aug. 10, 2022, 5:38 p.m. UTC | #2
Hi!

Sorry for the tardiness.

On Fri, Jul 22, 2022 at 03:07:55PM +0800, HAO CHEN GUI wrote:
>   This patch creates a new function - change_pseudo_and_mask. If recog fails,
> the function converts a single pseudo to the pseudo AND with a mask if the
> outer operator is IOR/XOR/PLUS and inner operator is ASHIFT or AND. The
> conversion helps pattern to match rotate and mask insn on some targets.

The name isn't so clear.  It isn't changing a mask, to start with.

> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
> +   ASHIFT/AND,

"When the outercode of the SET_SRC of PAT is ..."

> convert a pseudo to pseudo AND with a mask if its nonzero_bits
> +   is less than its mode mask.  The nonzero_bits in later passes is not a
> +   superset of what is known in combine pass.  So an insn with nonzero_bits
> +   can't be recoged later.  */

Can this not be done with a splitter in the machine description?


Segher
  
HAO CHEN GUI Aug. 11, 2022, 2:11 a.m. UTC | #3
Hi Segher,
  Really appreciate your review comments.

On 11/8/2022 上午 1:38, Segher Boessenkool wrote:
> Hi!
> 
> Sorry for the tardiness.
> 
> On Fri, Jul 22, 2022 at 03:07:55PM +0800, HAO CHEN GUI wrote:
>>   This patch creates a new function - change_pseudo_and_mask. If recog fails,
>> the function converts a single pseudo to the pseudo AND with a mask if the
>> outer operator is IOR/XOR/PLUS and inner operator is ASHIFT or AND. The
>> conversion helps pattern to match rotate and mask insn on some targets.
> 
> The name isn't so clear.  It isn't changing a mask, to start with.
How about setting function name to change_pseudo? It converts a pseudo to
the pseudo AND with a mask in a particular situation.

> 
>> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
>> +   ASHIFT/AND,
> 
> "When the outercode of the SET_SRC of PAT is ..."
Yeah, I will change it.

> 
>> convert a pseudo to pseudo AND with a mask if its nonzero_bits
>> +   is less than its mode mask.  The nonzero_bits in later passes is not a
>> +   superset of what is known in combine pass.  So an insn with nonzero_bits
>> +   can't be recoged later.  */
> 
> Can this not be done with a splitter in the machine description?
> 
Sorry, I don't quite understand it. Do you mean if the conversion can be done in
split pass?

If a pseudo has DImode and stem from a char, we get nonzero_bits as 0xff in combine
pass. But in split pass, it's nonzero_bits is 0xffffffffffffffff. So the conversion
can only be done in combine pass.

Thanks
Gui Haochen
  
Segher Boessenkool Aug. 11, 2022, 5:40 p.m. UTC | #4
Hi!

On Thu, Aug 11, 2022 at 10:11:45AM +0800, HAO CHEN GUI wrote:
> On 11/8/2022 上午 1:38, Segher Boessenkool wrote:
> > On Fri, Jul 22, 2022 at 03:07:55PM +0800, HAO CHEN GUI wrote:
> >>   This patch creates a new function - change_pseudo_and_mask. If recog fails,
> >> the function converts a single pseudo to the pseudo AND with a mask if the
> >> outer operator is IOR/XOR/PLUS and inner operator is ASHIFT or AND. The
> >> conversion helps pattern to match rotate and mask insn on some targets.
> > 
> > The name isn't so clear.  It isn't changing a mask, to start with.
> How about setting function name to change_pseudo? It converts a pseudo to
> the pseudo AND with a mask in a particular situation.

Change pseudo to what?  It remains a pseudo, the same pseudo even.

"add_mask_to_pseudo"...  Well, that says "add", which is like "plus",
not a great name either -- but something in that direction?

> >> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
> >> +   ASHIFT/AND,
> > 
> > "When the outercode of the SET_SRC of PAT is ..."
> Yeah, I will change it.

"outer code" of course, sorry, I typed it wrong :-/

> >> convert a pseudo to pseudo AND with a mask if its nonzero_bits
> >> +   is less than its mode mask.  The nonzero_bits in later passes is not a
> >> +   superset of what is known in combine pass.  So an insn with nonzero_bits
> >> +   can't be recoged later.  */
> > 
> > Can this not be done with a splitter in the machine description?
> > 
> Sorry, I don't quite understand it. Do you mean if the conversion can be done in
> split pass?

In any splitter, also during combine for example.  Is this common enough
to warrant special code in combine to handle all tens of cases, or is it
better handled in separate patterns?

> If a pseudo has DImode and stem from a char, we get nonzero_bits as 0xff in combine
> pass. But in split pass, it's nonzero_bits is 0xffffffffffffffff. So the conversion
> can only be done in combine pass.

Yes, but combine will use splitters as well.

You can use nonzero_bits in the split condition (the second condition in
a define_split, or the sole condition in a define_split) just fine, as
long as the replacement RTL does not rely on the nonzero_bits itself.
You cannot use it in the insn condition (the first condition in a
define_insn_and_split, or the one condition in a define_insn) because
that RTL will survive past combine, and then nonzero_bits can have bits
cleared that were set before (that were determined to be always zero
during combine, but that knowledge is gone later).

(Insns can also be split in thread_pro_and_epilogue, during reorg, and
in final of course).


Segher
  
HAO CHEN GUI Aug. 12, 2022, 8:04 a.m. UTC | #5
Hi Segher,

On 12/8/2022 上午 1:40, Segher Boessenkool wrote:
> Yes, but combine will use splitters as well.
Combine pass invokes combine_split_insns for 3-insn combine. If we want
to do the split for 2-insn combine (like the test case in PR), we have to
add a special case?

> 
> You can use nonzero_bits in the split condition (the second condition in
> a define_split, or the sole condition in a define_split) just fine, as
> long as the replacement RTL does not rely on the nonzero_bits itself.
> You cannot use it in the insn condition (the first condition in a
> define_insn_and_split, or the one condition in a define_insn) because
> that RTL will survive past combine, and then nonzero_bits can have bits
> cleared that were set before (that were determined to be always zero
> during combine, but that knowledge is gone later).

I tried to add a define_insn_and split pattern in rs6000.md, just like the
following code. The nonzero_bits is used in insn condition (for combine)
and no condition for the split. I can't set nonzero_bits in split condition
as it never matches and cause ICE then.

I am not sure if it is safe. If such an insn doesn't stem from the combine,
there is no guarantee that the nonzero_bits condition matches.


(define_insn_and_split "*test"
  [(set (match_operand:GPR 0 "gpc_reg_operand")
        (plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
                                      (match_operand:SI 2 "const_int_operand"))
                          (match_operand:GPR 3 "gpc_reg_operand")))]
  "nonzero_bits (operands[3], <MODE>mode)
   < HOST_WIDE_INT_1U << INTVAL (operands[2])"
  "#"
  ""
  [(set (match_dup 0)
        (ior:GPR (and:GPR (match_dup 3)
                          (match_dup 4))
                 (ashift:GPR (match_dup 1)
                             (match_dup 2))))]
{
  operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
})

Thanks
Gui Haochen
  

Patch

diff --git a/gcc/combine.cc b/gcc/combine.cc
index a5fabf397f7..e1c1aa7da1c 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -11599,6 +11599,48 @@  change_zero_ext (rtx pat)
   return changed;
 }

+/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
+   ASHIFT/AND, convert a pseudo to pseudo AND with a mask if its nonzero_bits
+   is less than its mode mask.  The nonzero_bits in later passes is not a
+   superset of what is known in combine pass.  So an insn with nonzero_bits
+   can't be recoged later.  */
+static bool
+change_pseudo_and_mask (rtx pat)
+{
+  rtx src = SET_SRC (pat);
+  if ((GET_CODE (src) == IOR
+       || GET_CODE (src) == XOR
+       || GET_CODE (src) == PLUS)
+      && (((GET_CODE (XEXP (src, 0)) == ASHIFT
+	    || GET_CODE (XEXP (src, 0)) == AND)
+	   && REG_P (XEXP (src, 1)))))
+    {
+      rtx reg = XEXP (src, 1);
+      machine_mode mode = GET_MODE (reg);
+      unsigned HOST_WIDE_INT nonzero = nonzero_bits (reg, mode);
+      if (nonzero < GET_MODE_MASK (mode))
+	{
+	  int shift;
+
+	  if (GET_CODE (XEXP (src, 0)) == ASHIFT)
+	    shift = INTVAL (XEXP (XEXP (src, 0), 1));
+	  else
+	    shift = ctz_hwi (INTVAL (XEXP (XEXP (src, 0), 1)));
+
+	  if (shift > 0
+	      && (HOST_WIDE_INT_1U << shift) - 1 >= nonzero)
+	    {
+	      unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << shift) - 1;
+	      rtx x = gen_rtx_AND (mode, reg, GEN_INT (mask));
+	      SUBST (XEXP (SET_SRC (pat), 1), x);
+	      maybe_swap_commutative_operands (SET_SRC (pat));
+	      return true;
+	    }
+	}
+    }
+  return false;
+}
+
 /* Like recog, but we receive the address of a pointer to a new pattern.
    We try to match the rtx that the pointer points to.
    If that fails, we may try to modify or replace the pattern,
@@ -11646,7 +11688,10 @@  recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
 	    }
 	}
       else
-	changed = change_zero_ext (pat);
+	{
+	  changed = change_pseudo_and_mask (pat);
+	  changed |= change_zero_ext (pat);
+	}
     }
   else if (GET_CODE (pat) == PARALLEL)
     {
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 1367a2cb779..2bd6bd5f908 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -4207,24 +4207,6 @@  (define_insn_and_split "*rotl<mode>3_insert_3_<code>"
 	(ior:GPR (and:GPR (match_dup 3) (match_dup 4))
 		 (ashift:GPR (match_dup 1) (match_dup 2))))])

-(define_code_iterator plus_ior_xor [plus ior xor])
-
-(define_split
-  [(set (match_operand:GPR 0 "gpc_reg_operand")
-	(plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
-				      (match_operand:SI 2 "const_int_operand"))
-			  (match_operand:GPR 3 "gpc_reg_operand")))]
-  "nonzero_bits (operands[3], <MODE>mode)
-   < HOST_WIDE_INT_1U << INTVAL (operands[2])"
-  [(set (match_dup 0)
-	(ior:GPR (and:GPR (match_dup 3)
-			  (match_dup 4))
-		 (ashift:GPR (match_dup 1)
-			     (match_dup 2))))]
-{
-  operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
-})
-
 (define_insn "*rotlsi3_insert_4"
   [(set (match_operand:SI 0 "gpc_reg_operand" "=r")
 	(ior:SI (and:SI (match_operand:SI 3 "gpc_reg_operand" "0")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr93453-2.c b/gcc/testsuite/gcc.target/powerpc/pr93453-2.c
new file mode 100644
index 00000000000..a83a6511653
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr93453-2.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long foo (char a, long b)
+{
+  long c = a;
+  c = c | (b << 12);
+  return c;
+}
+
+long bar (long b, char a)
+{
+  long c = a;
+  long m = -4096;
+  c = c | (b & m);
+  return c;
+}
+
+/* { dg-final { scan-assembler-times {\mrl[wd]imi\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
index bafa371db73..d4dadacc6cc 100644
--- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
@@ -8,8 +8,7 @@ 
 /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */

-/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 } } */

 /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */