Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]

Message ID e85c2d1d-6884-15e2-8dd9-02005965aec8@linux.ibm.com
State New
Headers
Series Modify combine pattern by a pseudo AND with its nonzero bits [PR93453] |

Commit Message

HAO CHEN GUI Dec. 1, 2021, 9:01 a.m. UTC
  Hi,
  This patch modifies the combine pattern with a helper - change_pseudo_and_mask when recog fails.
The helper converts a single pseudo to the pseudo AND with a mask if the outer operator is IOR/XOR/PLUS
and the inner operator is ASHIFT/LSHIFTRT/AND. The conversion helps match shift + ior pattern.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk?
Any recommendations? Thanks a lot.

ChangeLog
2021-12-01 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	* combine.c (change_pseudo_and_mask): New.
	(recog_for_combine): If recog fails, try again with the pattern
	modified by change_pseudo_and_mask.

gcc/testsuite/
	* gcc.target/powerpc/20050603-3.c: Modify dump check conditions.
	* gcc.target/powerpc/rlwimi-2.c: Likewise.


patch.diff
  

Comments

Kewen.Lin Dec. 2, 2021, 2:21 a.m. UTC | #1
Hi Haochen,

on 2021/12/1 下午5:01, HAO CHEN GUI via Gcc-patches wrote:
> Hi,
>   This patch modifies the combine pattern with a helper - change_pseudo_and_mask when recog fails.
> The helper converts a single pseudo to the pseudo AND with a mask if the outer operator is IOR/XOR/PLUS
> and the inner operator is ASHIFT/LSHIFTRT/AND. The conversion helps match shift + ior pattern.
> 

Thanks for improving this!

I would expect this patch can make us remove the below existing splitting,
which was added for a similar purpose but only works for more insns
combination.

(define_split
  [(set (match_operand:GPR 0 "gpc_reg_operand")
	(plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
				      (match_operand:SI 2 "const_int_operand"))
			  (match_operand:GPR 3 "gpc_reg_operand")))]
  "nonzero_bits (operands[3], <MODE>mode)
   < HOST_WIDE_INT_1U << INTVAL (operands[2])"
  [(set (match_dup 0)
	(ior:GPR (and:GPR (match_dup 3)
			  (match_dup 4))
		 (ashift:GPR (match_dup 1)
			     (match_dup 2))))]
{
  operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
})


As the description, this seems to check some special operations and mainly
for those targets having Power style rotate* operations?  If the understanding
is correct, not sure it's a good idea to make this under a target hook/macro?


>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk?
> Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2021-12-01 Haochen Gui <guihaoc@linux.ibm.com>
> 
> gcc/
> 	* combine.c (change_pseudo_and_mask): New.
> 	(recog_for_combine): If recog fails, try again with the pattern
> 	modified by change_pseudo_and_mask.
> 
> gcc/testsuite/
> 	* gcc.target/powerpc/20050603-3.c: Modify dump check conditions.
> 	* gcc.target/powerpc/rlwimi-2.c: Likewise.
> 
> 
> patch.diff
> diff --git a/gcc/combine.c b/gcc/combine.c
> index 03e9a780919..82ee3b2e9db 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -11539,6 +11539,37 @@ change_zero_ext (rtx pat)
>    return changed;
>  }
> 
> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
> +   ASHIFT/LSHIFTRT/AND, convert a pseudo to psuedo AND with a mask if its
> +   nonzero_bits is less than its mode mask. The nonzero_bits in other pass
> +   doesn't return the same value as it does in combine pass.  */
> +static bool
> +change_pseudo_and_mask (rtx pat)
> +{
> +  bool changed = false;

Maybe we can remove this variable ...

> +
> +  rtx src = SET_SRC (pat);
> +  if ((GET_CODE (src) == IOR
> +       || GET_CODE (src) == XOR
> +       || GET_CODE (src) == PLUS)
> +      && (((GET_CODE (XEXP (src, 0)) == ASHIFT
> +	    || GET_CODE (XEXP (src, 0)) == LSHIFTRT
> +	    || GET_CODE (XEXP (src, 0)) == AND)
> +	   && REG_P (XEXP (src, 1)))))
> +    {
> +      rtx *reg = &XEXP (SET_SRC (pat), 1);
> +      machine_mode mode = GET_MODE (*reg);
> +      unsigned HOST_WIDE_INT nonzero = nonzero_bits (*reg, mode);
> +      if (nonzero < GET_MODE_MASK (mode))
> +	{
> +	  rtx x = gen_rtx_AND (mode, *reg, GEN_INT (nonzero));
> +	  SUBST (*reg, x);
> +	  changed = true;
    
..., directly return true here ...

> +	}
> +     }
> +  return changed;

and return false here.

> +}
> +
>  /* Like recog, but we receive the address of a pointer to a new pattern.
>     We try to match the rtx that the pointer points to.
>     If that fails, we may try to modify or replace the pattern,
> @@ -11586,7 +11617,14 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
>  	    }
>  	}
>        else
> -	changed = change_zero_ext (pat);
> +	{
> +	  if (change_pseudo_and_mask (pat))
> +	    {
> +	      maybe_swap_commutative_operands (SET_SRC (pat));

maybe we can call maybe_swap_commutative_operands directly
in function change_pseudo_and_mask and make the code here compact.

BR,
Kewen

> +	      changed = true;
> +	    }
> +	  changed |= change_zero_ext (pat);
> +	}
>      }
>    else if (GET_CODE (pat) == PARALLEL)
>      {
> diff --git a/gcc/testsuite/gcc.target/powerpc/20050603-3.c b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> index 4017d34f429..e628be11532 100644
> --- a/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> @@ -12,7 +12,7 @@ void rotins (unsigned int x)
>    b.y = (x<<12) | (x>>20);
>  }
> 
> -/* { dg-final { scan-assembler-not {\mrlwinm} } } */
> +/* { dg-final { scan-assembler-not {\mrlwinm} { target ilp32 } } } */
>  /* { dg-final { scan-assembler-not {\mrldic} } } */
>  /* { dg-final { scan-assembler-not {\mrot[lr]} } } */
>  /* { dg-final { scan-assembler-not {\ms[lr][wd]} } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> index bafa371db73..ffb5f9e450f 100644
> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> @@ -2,14 +2,14 @@
>  /* { dg-options "-O2" } */
> 
>  /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */
> 
>  /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target lp64 } } } */
> 
>  /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */
>
  
HAO CHEN GUI Dec. 2, 2021, 8:03 a.m. UTC | #2
Kewen,
	Many thanks for your comments.

On 2/12/2021 上午 10:21, Kewen.Lin wrote:
> Hi Haochen,
> 
> on 2021/12/1 下午5:01, HAO CHEN GUI via Gcc-patches wrote:
>> Hi,
>>   This patch modifies the combine pattern with a helper - change_pseudo_and_mask when recog fails.
>> The helper converts a single pseudo to the pseudo AND with a mask if the outer operator is IOR/XOR/PLUS
>> and the inner operator is ASHIFT/LSHIFTRT/AND. The conversion helps match shift + ior pattern.
>>
> 
> Thanks for improving this!
> 
> I would expect this patch can make us remove the below existing splitting,
> which was added for a similar purpose but only works for more insns
> combination.
> 
> (define_split
>   [(set (match_operand:GPR 0 "gpc_reg_operand")
> 	(plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
> 				      (match_operand:SI 2 "const_int_operand"))
> 			  (match_operand:GPR 3 "gpc_reg_operand")))]
>   "nonzero_bits (operands[3], <MODE>mode)
>    < HOST_WIDE_INT_1U << INTVAL (operands[2])"
>   [(set (match_dup 0)
> 	(ior:GPR (and:GPR (match_dup 3)
> 			  (match_dup 4))
> 		 (ashift:GPR (match_dup 1)
> 			     (match_dup 2))))]
> {
>   operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
> })
> 
> 
Just tested it. The split pattern should be removed.

> As the description, this seems to check some special operations and mainly
> for those targets having Power style rotate* operations?  If the understanding
> is correct, not sure it's a good idea to make this under a target hook/macro?
> 
I am not sure if it benefits others. Waiting for Segher's comment.
> 
>>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk?
>> Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2021-12-01 Haochen Gui <guihaoc@linux.ibm.com>
>>
>> gcc/
>> 	* combine.c (change_pseudo_and_mask): New.
>> 	(recog_for_combine): If recog fails, try again with the pattern
>> 	modified by change_pseudo_and_mask.
>>
>> gcc/testsuite/
>> 	* gcc.target/powerpc/20050603-3.c: Modify dump check conditions.
>> 	* gcc.target/powerpc/rlwimi-2.c: Likewise.
>>
>>
>> patch.diff
>> diff --git a/gcc/combine.c b/gcc/combine.c
>> index 03e9a780919..82ee3b2e9db 100644
>> --- a/gcc/combine.c
>> +++ b/gcc/combine.c
>> @@ -11539,6 +11539,37 @@ change_zero_ext (rtx pat)
>>    return changed;
>>  }
>>
>> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
>> +   ASHIFT/LSHIFTRT/AND, convert a pseudo to psuedo AND with a mask if its
>> +   nonzero_bits is less than its mode mask. The nonzero_bits in other pass
>> +   doesn't return the same value as it does in combine pass.  */
>> +static bool
>> +change_pseudo_and_mask (rtx pat)
>> +{
>> +  bool changed = false;
> 
> Maybe we can remove this variable ...
> 
>> +
>> +  rtx src = SET_SRC (pat);
>> +  if ((GET_CODE (src) == IOR
>> +       || GET_CODE (src) == XOR
>> +       || GET_CODE (src) == PLUS)
>> +      && (((GET_CODE (XEXP (src, 0)) == ASHIFT
>> +	    || GET_CODE (XEXP (src, 0)) == LSHIFTRT
>> +	    || GET_CODE (XEXP (src, 0)) == AND)
>> +	   && REG_P (XEXP (src, 1)))))
>> +    {
>> +      rtx *reg = &XEXP (SET_SRC (pat), 1);
>> +      machine_mode mode = GET_MODE (*reg);
>> +      unsigned HOST_WIDE_INT nonzero = nonzero_bits (*reg, mode);
>> +      if (nonzero < GET_MODE_MASK (mode))
>> +	{
>> +	  rtx x = gen_rtx_AND (mode, *reg, GEN_INT (nonzero));
>> +	  SUBST (*reg, x);
>> +	  changed = true;
>     
> ..., directly return true here ...
> 
>> +	}
>> +     }
>> +  return changed;
> 
> and return false here.

Yeah, it saves a variable. I changed it.
> 
>> +}
>> +
>>  /* Like recog, but we receive the address of a pointer to a new pattern.
>>     We try to match the rtx that the pointer points to.
>>     If that fails, we may try to modify or replace the pattern,
>> @@ -11586,7 +11617,14 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
>>  	    }
>>  	}
>>        else
>> -	changed = change_zero_ext (pat);
>> +	{
>> +	  if (change_pseudo_and_mask (pat))
>> +	    {
>> +	      maybe_swap_commutative_operands (SET_SRC (pat));
> 
> maybe we can call maybe_swap_commutative_operands directly
> in function change_pseudo_and_mask and make the code here compact.

Yes, I put the method into the helper. It looks good.
> 
> BR,
> Kewen
> 
>> +	      changed = true;
>> +	    }
>> +	  changed |= change_zero_ext (pat);
>> +	}
>>      }
>>    else if (GET_CODE (pat) == PARALLEL)
>>      {
>> diff --git a/gcc/testsuite/gcc.target/powerpc/20050603-3.c b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
>> index 4017d34f429..e628be11532 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/20050603-3.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
>> @@ -12,7 +12,7 @@ void rotins (unsigned int x)
>>    b.y = (x<<12) | (x>>20);
>>  }
>>
>> -/* { dg-final { scan-assembler-not {\mrlwinm} } } */
>> +/* { dg-final { scan-assembler-not {\mrlwinm} { target ilp32 } } } */
>>  /* { dg-final { scan-assembler-not {\mrldic} } } */
>>  /* { dg-final { scan-assembler-not {\mrot[lr]} } } */
>>  /* { dg-final { scan-assembler-not {\ms[lr][wd]} } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> index bafa371db73..ffb5f9e450f 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> @@ -2,14 +2,14 @@
>>  /* { dg-options "-O2" } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target lp64 } } } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */
>>
>
  

Patch

diff --git a/gcc/combine.c b/gcc/combine.c
index 03e9a780919..82ee3b2e9db 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -11539,6 +11539,37 @@  change_zero_ext (rtx pat)
   return changed;
 }

+/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
+   ASHIFT/LSHIFTRT/AND, convert a pseudo to psuedo AND with a mask if its
+   nonzero_bits is less than its mode mask. The nonzero_bits in other pass
+   doesn't return the same value as it does in combine pass.  */
+static bool
+change_pseudo_and_mask (rtx pat)
+{
+  bool changed = false;
+
+  rtx src = SET_SRC (pat);
+  if ((GET_CODE (src) == IOR
+       || GET_CODE (src) == XOR
+       || GET_CODE (src) == PLUS)
+      && (((GET_CODE (XEXP (src, 0)) == ASHIFT
+	    || GET_CODE (XEXP (src, 0)) == LSHIFTRT
+	    || GET_CODE (XEXP (src, 0)) == AND)
+	   && REG_P (XEXP (src, 1)))))
+    {
+      rtx *reg = &XEXP (SET_SRC (pat), 1);
+      machine_mode mode = GET_MODE (*reg);
+      unsigned HOST_WIDE_INT nonzero = nonzero_bits (*reg, mode);
+      if (nonzero < GET_MODE_MASK (mode))
+	{
+	  rtx x = gen_rtx_AND (mode, *reg, GEN_INT (nonzero));
+	  SUBST (*reg, x);
+	  changed = true;
+	}
+     }
+  return changed;
+}
+
 /* Like recog, but we receive the address of a pointer to a new pattern.
    We try to match the rtx that the pointer points to.
    If that fails, we may try to modify or replace the pattern,
@@ -11586,7 +11617,14 @@  recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
 	    }
 	}
       else
-	changed = change_zero_ext (pat);
+	{
+	  if (change_pseudo_and_mask (pat))
+	    {
+	      maybe_swap_commutative_operands (SET_SRC (pat));
+	      changed = true;
+	    }
+	  changed |= change_zero_ext (pat);
+	}
     }
   else if (GET_CODE (pat) == PARALLEL)
     {
diff --git a/gcc/testsuite/gcc.target/powerpc/20050603-3.c b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
index 4017d34f429..e628be11532 100644
--- a/gcc/testsuite/gcc.target/powerpc/20050603-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
@@ -12,7 +12,7 @@  void rotins (unsigned int x)
   b.y = (x<<12) | (x>>20);
 }

-/* { dg-final { scan-assembler-not {\mrlwinm} } } */
+/* { dg-final { scan-assembler-not {\mrlwinm} { target ilp32 } } } */
 /* { dg-final { scan-assembler-not {\mrldic} } } */
 /* { dg-final { scan-assembler-not {\mrot[lr]} } } */
 /* { dg-final { scan-assembler-not {\ms[lr][wd]} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
index bafa371db73..ffb5f9e450f 100644
--- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
@@ -2,14 +2,14 @@ 
 /* { dg-options "-O2" } */

 /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */

 /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target lp64 } } } */

 /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */