Modify combine pattern by anding a pseudo with its nonzero bits

Message ID fc49042f-a809-5e71-e50b-2d0ff1955465@linux.ibm.com
State New
Headers
Series Modify combine pattern by anding a pseudo with its nonzero bits |

Commit Message

HAO CHEN GUI Nov. 30, 2021, 8:46 a.m. UTC
  Hi,

    This patch modifies the combine pattern with a helper - change_pseudo_and_mask when recog fails. The helper converts a single pseudo to the pseudo and with a mask if the outer operator is IOR/XOR/PLUS and the inner operator is ASHIFT/LSHIFTRT/AND. The conversion helps match shift + ior pattern.

    Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog

2021-11-30 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
        * combine.c (change_pseudo_and_mask): New.
        (recog_for_combine): If recog fails, try again with the pattern
        modified by change_pseudo_and_mask.

gcc/testsuite/
        * gcc.target/powerpc/20050603-3.c: Modify the dump check conditions.
        * gcc.target/powerpc/rlwimi-2.c: Likewise.

patch.diff
  

Comments

David Edelsohn Nov. 30, 2021, 1:46 p.m. UTC | #1
On Tue, Nov 30, 2021 at 3:46 AM HAO CHEN GUI <guihaoc@linux.ibm.com> wrote:
>
> Hi,
>
>     This patch modifies the combine pattern with a helper - change_pseudo_and_mask when recog fails. The helper converts a single pseudo to the pseudo and with a mask if the outer operator is IOR/XOR/PLUS and the inner operator is ASHIFT/LSHIFTRT/AND. The conversion helps match shift + ior pattern.
>
>     Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>
> ChangeLog
>
> 2021-11-30 Haochen Gui <guihaoc@linux.ibm.com>
>
> gcc/
>         * combine.c (change_pseudo_and_mask): New.
>         (recog_for_combine): If recog fails, try again with the pattern
>         modified by change_pseudo_and_mask.
>
> gcc/testsuite/
>         * gcc.target/powerpc/20050603-3.c: Modify the dump check conditions.
>         * gcc.target/powerpc/rlwimi-2.c: Likewise.
>
> patch.diff
>
> diff --git a/gcc/combine.c b/gcc/combine.c
> index 03e9a780919..c83c0aceb57 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -11539,6 +11539,42 @@ change_zero_ext (rtx pat)
>    return changed;
>  }
>
> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
> +   ASHIFT/LSHIFTRT/AND, convert a psuedo to psuedo AND with a mask if its

^^^ spelling mistake in comment: pseudo not psuedo

Thanks, David

> +   nonzero_bits is less than its mode mask.  */
> +static bool
> +change_pseudo_and_mask (rtx pat)
> +{
> +  bool changed = false;
> +
> +  rtx src = SET_SRC (pat);
> +  if ((GET_CODE (src) == IOR
> +       || GET_CODE (src) == XOR
> +       || GET_CODE (src) == PLUS)
> +      && (((GET_CODE (XEXP (src, 0)) == ASHIFT
> +           || GET_CODE (XEXP (src, 0)) == LSHIFTRT
> +           || GET_CODE (XEXP (src, 0)) == AND)
> +          && REG_P (XEXP (src, 1)))
> +         || ((GET_CODE (XEXP (src, 1)) == ASHIFT
> +              || GET_CODE (XEXP (src, 1)) == LSHIFTRT
> +              || GET_CODE (XEXP (src, 1)) == AND)
> +             && REG_P (XEXP (src, 0)))))
> +    {
> +      rtx *reg = REG_P (XEXP (src, 0))
> +                ? &XEXP (SET_SRC (pat), 0)
> +                : &XEXP (SET_SRC (pat), 1);
> +      machine_mode mode = GET_MODE (*reg);
> +      unsigned HOST_WIDE_INT nonzero = nonzero_bits (*reg, mode);
> +      if (nonzero < GET_MODE_MASK (mode))
> +       {
> +         rtx x = gen_rtx_AND (mode, *reg, GEN_INT (nonzero));
> +         SUBST (*reg, x);
> +         changed = true;
> +       }
> +     }
> +  return changed;
> +}
> +
>  /* Like recog, but we receive the address of a pointer to a new pattern.
>     We try to match the rtx that the pointer points to.
>     If that fails, we may try to modify or replace the pattern,
> @@ -11586,7 +11622,14 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
>             }
>         }
>        else
> -       changed = change_zero_ext (pat);
> +       {
> +         if (change_pseudo_and_mask (pat))
> +           {
> +             maybe_swap_commutative_operands (SET_SRC (pat));
> +             changed = true;
> +           }
> +         changed |= change_zero_ext (pat);
> +       }
>      }
>    else if (GET_CODE (pat) == PARALLEL)
>      {
> diff --git a/gcc/testsuite/gcc.target/powerpc/20050603-3.c b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> index 4017d34f429..e628be11532 100644
> --- a/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> @@ -12,7 +12,7 @@ void rotins (unsigned int x)
>    b.y = (x<<12) | (x>>20);
>  }
>
> -/* { dg-final { scan-assembler-not {\mrlwinm} } } */
> +/* { dg-final { scan-assembler-not {\mrlwinm} { target ilp32 } } } */
>  /* { dg-final { scan-assembler-not {\mrldic} } } */
>  /* { dg-final { scan-assembler-not {\mrot[lr]} } } */
>  /* { dg-final { scan-assembler-not {\ms[lr][wd]} } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> index bafa371db73..ffb5f9e450f 100644
> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> @@ -2,14 +2,14 @@
>  /* { dg-options "-O2" } */
>
>  /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */
>
>  /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target lp64 } } } */
>
>  /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */
>
>
  
Segher Boessenkool Nov. 30, 2021, 6:11 p.m. UTC | #2
Hi!

On Tue, Nov 30, 2021 at 04:46:34PM +0800, HAO CHEN GUI wrote:
>     This patch modifies the combine pattern with a helper - change_pseudo_and_mask when recog fails. The helper converts a single pseudo to the pseudo and with a mask if the outer operator is IOR/XOR/PLUS and the inner operator is ASHIFT/LSHIFTRT/AND. The conversion helps match shift + ior pattern.
> 
>     Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

(Please make shorter lines in email.  70 chars is usual).

> gcc/
>         * combine.c (change_pseudo_and_mask): New.
>         (recog_for_combine): If recog fails, try again with the pattern
>         modified by change_pseudo_and_mask.
> 
> gcc/testsuite/
>         * gcc.target/powerpc/20050603-3.c: Modify the dump check conditions.
>         * gcc.target/powerpc/rlwimi-2.c: Likewise.

> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
> +   ASHIFT/LSHIFTRT/AND, convert a psuedo to psuedo AND with a mask if its
> +   nonzero_bits is less than its mode mask.  */

Please add some words *why* we do this (namely, because you cannot use
nonzero_bits in combine as well as after combine and expect the same
answer).

> +static bool
> +change_pseudo_and_mask (rtx pat)
> +{
> +  bool changed = false;
> +
> +  rtx src = SET_SRC (pat);
> +  if ((GET_CODE (src) == IOR
> +       || GET_CODE (src) == XOR
> +       || GET_CODE (src) == PLUS)
> +      && (((GET_CODE (XEXP (src, 0)) == ASHIFT
> +           || GET_CODE (XEXP (src, 0)) == LSHIFTRT
> +           || GET_CODE (XEXP (src, 0)) == AND)
> +          && REG_P (XEXP (src, 1)))
> +         || ((GET_CODE (XEXP (src, 1)) == ASHIFT
> +              || GET_CODE (XEXP (src, 1)) == LSHIFTRT
> +              || GET_CODE (XEXP (src, 1)) == AND)
> +             && REG_P (XEXP (src, 0)))))

If one arm is a pseudo and the other is compound, the compound one is
first always.  This is one of those canonicalisations that simplifies a
lot of code -- including this new code :-)

> +    {
> +      rtx *reg = REG_P (XEXP (src, 0))
> +                ? &XEXP (SET_SRC (pat), 0)
> +                : &XEXP (SET_SRC (pat), 1);

This is indented wrong.  But, in fact, all tabs are changed to spaces in
your patch?

> @@ -11586,7 +11622,14 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
>             }
>         }
>        else
> -       changed = change_zero_ext (pat);
> +       {
> +         if (change_pseudo_and_mask (pat))
> +           {
> +             maybe_swap_commutative_operands (SET_SRC (pat));
> +             changed = true;
> +           }
> +         changed |= change_zero_ext (pat);
> +       }
>      }
>    else if (GET_CODE (pat) == PARALLEL)
>      {


  changed = change_zero_ext (pat);
  if (!changed)
    changed = change_pseudo_and_mask (pat);

  if (changed)
    maybe_swap_commutative_operands (SET_SRC (pat));


> --- a/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> @@ -12,7 +12,7 @@ void rotins (unsigned int x)
>    b.y = (x<<12) | (x>>20);
>  }
> 
> -/* { dg-final { scan-assembler-not {\mrlwinm} } } */
> +/* { dg-final { scan-assembler-not {\mrlwinm} { target ilp32 } } } */
>  /* { dg-final { scan-assembler-not {\mrldic} } } */
>  /* { dg-final { scan-assembler-not {\mrot[lr]} } } */
>  /* { dg-final { scan-assembler-not {\ms[lr][wd]} } } */

Please show the -m32 code before and after the change?  Why is it okay
to get an rlwinm there?

> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> index bafa371db73..ffb5f9e450f 100644
> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> @@ -2,14 +2,14 @@
>  /* { dg-options "-O2" } */
> 
>  /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } } } */

No, it is not okay to generate worse code.  In what cases do you see
more insns now, and why?

>  /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
>  /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */
> 
>  /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target lp64 } } } */
> 
>  /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */

Are the new rlwimi's good to have, or can we do those with simpler or
fewer insns?


Segher
  
HAO CHEN GUI Dec. 1, 2021, 3:29 a.m. UTC | #3
Hi Segher,

   Thanks for your review. Please see my comments.

On 1/12/2021 上午 2:11, Segher Boessenkool wrote:
> Hi!
>
> On Tue, Nov 30, 2021 at 04:46:34PM +0800, HAO CHEN GUI wrote:
>>     This patch modifies the combine pattern with a helper - change_pseudo_and_mask when recog fails. The helper converts a single pseudo to the pseudo and with a mask if the outer operator is IOR/XOR/PLUS and the inner operator is ASHIFT/LSHIFTRT/AND. The conversion helps match shift + ior pattern.
>>
>>     Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
> (Please make shorter lines in email.  70 chars is usual).
>
>> gcc/
>>         * combine.c (change_pseudo_and_mask): New.
>>         (recog_for_combine): If recog fails, try again with the pattern
>>         modified by change_pseudo_and_mask.
>>
>> gcc/testsuite/
>>         * gcc.target/powerpc/20050603-3.c: Modify the dump check conditions.
>>         * gcc.target/powerpc/rlwimi-2.c: Likewise.
>> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
>> +   ASHIFT/LSHIFTRT/AND, convert a psuedo to psuedo AND with a mask if its
>> +   nonzero_bits is less than its mode mask.  */
> Please add some words *why* we do this (namely, because you cannot use
> nonzero_bits in combine as well as after combine and expect the same
> answer).
>
>> +static bool
>> +change_pseudo_and_mask (rtx pat)
>> +{
>> +  bool changed = false;
>> +
>> +  rtx src = SET_SRC (pat);
>> +  if ((GET_CODE (src) == IOR
>> +       || GET_CODE (src) == XOR
>> +       || GET_CODE (src) == PLUS)
>> +      && (((GET_CODE (XEXP (src, 0)) == ASHIFT
>> +           || GET_CODE (XEXP (src, 0)) == LSHIFTRT
>> +           || GET_CODE (XEXP (src, 0)) == AND)
>> +          && REG_P (XEXP (src, 1)))
>> +         || ((GET_CODE (XEXP (src, 1)) == ASHIFT
>> +              || GET_CODE (XEXP (src, 1)) == LSHIFTRT
>> +              || GET_CODE (XEXP (src, 1)) == AND)
>> +             && REG_P (XEXP (src, 0)))))
> If one arm is a pseudo and the other is compound, the compound one is
> first always.  This is one of those canonicalisations that simplifies a
> lot of code -- including this new code :-)
>
>> +    {
>> +      rtx *reg = REG_P (XEXP (src, 0))
>> +                ? &XEXP (SET_SRC (pat), 0)
>> +                : &XEXP (SET_SRC (pat), 1);
> This is indented wrong.  But, in fact, all tabs are changed to spaces in
> your patch?

When I paste the patch from terminal, the tab is automatically converted to 4 spaces.  I will

try to send patch via "git send-email" next time.

>> @@ -11586,7 +11622,14 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
>>             }
>>         }
>>        else
>> -       changed = change_zero_ext (pat);
>> +       {
>> +         if (change_pseudo_and_mask (pat))
>> +           {
>> +             maybe_swap_commutative_operands (SET_SRC (pat));
>> +             changed = true;
>> +           }
>> +         changed |= change_zero_ext (pat);
>> +       }
>>      }
>>    else if (GET_CODE (pat) == PARALLEL)
>>      {
>
>   changed = change_zero_ext (pat);
>   if (!changed)
>     changed = change_pseudo_and_mask (pat);
>
>   if (changed)
>     maybe_swap_commutative_operands (SET_SRC (pat));
>
>
>> --- a/gcc/testsuite/gcc.target/powerpc/20050603-3.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
>> @@ -12,7 +12,7 @@ void rotins (unsigned int x)
>>    b.y = (x<<12) | (x>>20);
>>  }
>>
>> -/* { dg-final { scan-assembler-not {\mrlwinm} } } */
>> +/* { dg-final { scan-assembler-not {\mrlwinm} { target ilp32 } } } */
>>  /* { dg-final { scan-assembler-not {\mrldic} } } */
>>  /* { dg-final { scan-assembler-not {\mrot[lr]} } } */
>>  /* { dg-final { scan-assembler-not {\ms[lr][wd]} } } */
> Please show the -m32 code before and after the change?  Why is it okay
> to get an rlwinm there?

The patch doesn't affect -m32 code. The original also fails with -m64 on "\mrldic" as it generates an "rldicl" instruction.

My patch fails with -m64 on "\mrlwinm" as it generates an "rlwinm" instruction. So I changed it.

original regression test

PASS: gcc.target/powerpc/20050603-3.c scan-assembler-not \\mrlwinm
FAIL: gcc.target/powerpc/20050603-3.c scan-assembler-not \\mrldic
PASS: gcc.target/powerpc/20050603-3.c scan-assembler-not \\mrot[lr]
PASS: gcc.target/powerpc/20050603-3.c scan-assembler-not \\ms[lr][wd]
PASS: gcc.target/powerpc/20050603-3.c scan-assembler-times \\mrl[wd]imi 1

original -m64 assembly

        addis 10,2,.LANCHOR0@toc@ha
        rldicl 3,3,52,32
        lwz 9,.LANCHOR0@toc@l(10)
        rlwimi 9,3,0,3840
        stw 9,.LANCHOR0@toc@l(10)
        blr

patch -m64 assembly

        addis 10,2,.LANCHOR0@toc@ha
        rlwinm 3,3,20,20,23
        lwz 9,.LANCHOR0@toc@l(10)
        rlwimi 9,3,0,3840
        stw 9,.LANCHOR0@toc@l(10)
        blr

-m32 assembly (both original and patch)

        lis 10,b@ha
        lwz 9,b@l(10)
        rlwimi 9,3,20,20,23
        stw 9,b@l(10)
        blr

>
>> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> index bafa371db73..ffb5f9e450f 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
>> @@ -2,14 +2,14 @@
>>  /* { dg-options "-O2" } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } } } */
> No, it is not okay to generate worse code.  In what cases do you see
> more insns now, and why?

My patch doesn't generate more insns. The original also fails with -m64 on "{(?n)^\s+[a-z]} 20217".

It generates 21305 insns while my patch generates 21279 insns.

original regression test
gcc.target/powerpc/rlwimi-2.c: (?n)^\\s+[a-z] found 21305 times
FAIL: gcc.target/powerpc/rlwimi-2.c scan-assembler-times (?n)^\\s+[a-z] 20217

>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
>>  /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
>> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
>> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target lp64 } } } */
>>
>>  /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */
> Are the new rlwimi's good to have, or can we do those with simpler or
> fewer insns?

The new rlwimi does good thing. It merges "rlwinm" and "or" just as we want.

It doesn't scan "rlwinm" and "or" so we don't see the number of them decreases.

But the total number of insns reduces.

original -m64

        rlwinm 4,4,0,0,0
        rldicl 3,3,63,32
        or 3,4,3
        rldicl 3,3,0,32

patch -m64

        rldicl 3,3,63,32
        rlwimi 3,4,0,-2147483648
        rldicl 3,3,0,32

>
>
> Segher
  

Patch

diff --git a/gcc/combine.c b/gcc/combine.c
index 03e9a780919..c83c0aceb57 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -11539,6 +11539,42 @@  change_zero_ext (rtx pat)
   return changed;
 }

+/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
+   ASHIFT/LSHIFTRT/AND, convert a psuedo to psuedo AND with a mask if its
+   nonzero_bits is less than its mode mask.  */
+static bool
+change_pseudo_and_mask (rtx pat)
+{
+  bool changed = false;
+
+  rtx src = SET_SRC (pat);
+  if ((GET_CODE (src) == IOR
+       || GET_CODE (src) == XOR
+       || GET_CODE (src) == PLUS)
+      && (((GET_CODE (XEXP (src, 0)) == ASHIFT
+           || GET_CODE (XEXP (src, 0)) == LSHIFTRT
+           || GET_CODE (XEXP (src, 0)) == AND)
+          && REG_P (XEXP (src, 1)))
+         || ((GET_CODE (XEXP (src, 1)) == ASHIFT
+              || GET_CODE (XEXP (src, 1)) == LSHIFTRT
+              || GET_CODE (XEXP (src, 1)) == AND)
+             && REG_P (XEXP (src, 0)))))
+    {
+      rtx *reg = REG_P (XEXP (src, 0))
+                ? &XEXP (SET_SRC (pat), 0)
+                : &XEXP (SET_SRC (pat), 1);
+      machine_mode mode = GET_MODE (*reg);
+      unsigned HOST_WIDE_INT nonzero = nonzero_bits (*reg, mode);
+      if (nonzero < GET_MODE_MASK (mode))
+       {
+         rtx x = gen_rtx_AND (mode, *reg, GEN_INT (nonzero));
+         SUBST (*reg, x);
+         changed = true;
+       }
+     }
+  return changed;
+}
+
 /* Like recog, but we receive the address of a pointer to a new pattern.
    We try to match the rtx that the pointer points to.
    If that fails, we may try to modify or replace the pattern,
@@ -11586,7 +11622,14 @@  recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
            }
        }
       else
-       changed = change_zero_ext (pat);
+       {
+         if (change_pseudo_and_mask (pat))
+           {
+             maybe_swap_commutative_operands (SET_SRC (pat));
+             changed = true;
+           }
+         changed |= change_zero_ext (pat);
+       }
     }
   else if (GET_CODE (pat) == PARALLEL)
     {
diff --git a/gcc/testsuite/gcc.target/powerpc/20050603-3.c b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
index 4017d34f429..e628be11532 100644
--- a/gcc/testsuite/gcc.target/powerpc/20050603-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
@@ -12,7 +12,7 @@  void rotins (unsigned int x)
   b.y = (x<<12) | (x>>20);
 }

-/* { dg-final { scan-assembler-not {\mrlwinm} } } */
+/* { dg-final { scan-assembler-not {\mrlwinm} { target ilp32 } } } */
 /* { dg-final { scan-assembler-not {\mrldic} } } */
 /* { dg-final { scan-assembler-not {\mrot[lr]} } } */
 /* { dg-final { scan-assembler-not {\ms[lr][wd]} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
index bafa371db73..ffb5f9e450f 100644
--- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
@@ -2,14 +2,14 @@ 
 /* { dg-options "-O2" } */

 /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
 /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */

 /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target lp64 } } } */

 /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */