Modify combine pattern by a pseudo AND with its nonzero bits [PR93453]
Commit Message
Hi,
This patch modifies the combine pattern with a helper - change_pseudo_and_mask when recog fails.
The helper converts a single pseudo to the pseudo AND with a mask if the outer operator is IOR/XOR/PLUS
and the inner operator is ASHIFT/LSHIFTRT/AND. The conversion helps on shift + ior pattern.
Not sure if the helper should be a target hook or not. Please advice.
Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk?
Any recommendations? Thanks a lot.
ChangeLog
2021-12-07 Haochen Gui <guihaoc@linux.ibm.com>
gcc/
* combine.c (change_pseudo_and_mask): New.
(recog_for_combine): If recog fails, try again with the pattern
modified by change_pseudo_and_mask.
* config/rs6000/rs6000.md (plus_ior_xor): Removed.
(anonymous split pattern for plus_ior_xor): Likewise.
gcc/testsuite/
* gcc.target/powerpc/20050603-3.c: Modify dump check conditions.
* gcc.target/powerpc/rlwimi-2.c: Likewise.
patch.diff
Comments
Hi,
Gentle ping this:
https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586304.html
Thanks
On 7/12/2021 下午 4:28, HAO CHEN GUI wrote:
> Hi,
> This patch modifies the combine pattern with a helper - change_pseudo_and_mask when recog fails.
> The helper converts a single pseudo to the pseudo AND with a mask if the outer operator is IOR/XOR/PLUS
> and the inner operator is ASHIFT/LSHIFTRT/AND. The conversion helps on shift + ior pattern.
>
> Not sure if the helper should be a target hook or not. Please advice.
>
> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Is this okay for trunk?
> Any recommendations? Thanks a lot.
>
> ChangeLog
> 2021-12-07 Haochen Gui <guihaoc@linux.ibm.com>
>
> gcc/
> * combine.c (change_pseudo_and_mask): New.
> (recog_for_combine): If recog fails, try again with the pattern
> modified by change_pseudo_and_mask.
> * config/rs6000/rs6000.md (plus_ior_xor): Removed.
> (anonymous split pattern for plus_ior_xor): Likewise.
>
> gcc/testsuite/
> * gcc.target/powerpc/20050603-3.c: Modify dump check conditions.
> * gcc.target/powerpc/rlwimi-2.c: Likewise.
>
> patch.diff
> diff --git a/gcc/combine.c b/gcc/combine.c
> index 03e9a780919..53500e8c6a0 100644
> --- a/gcc/combine.c
> +++ b/gcc/combine.c
> @@ -11539,6 +11539,36 @@ change_zero_ext (rtx pat)
> return changed;
> }
>
> +/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
> + ASHIFT/LSHIFTRT/AND, convert a pseudo to psuedo AND with a mask if its
> + nonzero_bits is less than its mode mask. The nonzero_bits in other pass
> + doesn't return the same value as it does in combine pass. */
> +static bool
> +change_pseudo_and_mask (rtx pat)
> +{
> + rtx src = SET_SRC (pat);
> + if ((GET_CODE (src) == IOR
> + || GET_CODE (src) == XOR
> + || GET_CODE (src) == PLUS)
> + && (((GET_CODE (XEXP (src, 0)) == ASHIFT
> + || GET_CODE (XEXP (src, 0)) == LSHIFTRT
> + || GET_CODE (XEXP (src, 0)) == AND)
> + && REG_P (XEXP (src, 1)))))
> + {
> + rtx *reg = &XEXP (SET_SRC (pat), 1);
> + machine_mode mode = GET_MODE (*reg);
> + unsigned HOST_WIDE_INT nonzero = nonzero_bits (*reg, mode);
> + if (nonzero < GET_MODE_MASK (mode))
> + {
> + rtx x = gen_rtx_AND (mode, *reg, GEN_INT (nonzero));
> + SUBST (*reg, x);
> + maybe_swap_commutative_operands (SET_SRC (pat));
> + return true;
> + }
> + }
> + return false;
> +}
> +
> /* Like recog, but we receive the address of a pointer to a new pattern.
> We try to match the rtx that the pointer points to.
> If that fails, we may try to modify or replace the pattern,
> @@ -11586,7 +11616,10 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
> }
> }
> else
> - changed = change_zero_ext (pat);
> + {
> + changed = change_pseudo_and_mask (pat);
> + changed |= change_zero_ext (pat);
> + }
> }
> else if (GET_CODE (pat) == PARALLEL)
> {
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 6bec2bddbde..5adbe9e9d27 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -4149,24 +4149,6 @@ (define_insn "rotl<mode>3_insert_3"
> }
> [(set_attr "type" "insert")])
>
> -(define_code_iterator plus_ior_xor [plus ior xor])
> -
> -(define_split
> - [(set (match_operand:GPR 0 "gpc_reg_operand")
> - (plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
> - (match_operand:SI 2 "const_int_operand"))
> - (match_operand:GPR 3 "gpc_reg_operand")))]
> - "nonzero_bits (operands[3], <MODE>mode)
> - < HOST_WIDE_INT_1U << INTVAL (operands[2])"
> - [(set (match_dup 0)
> - (ior:GPR (and:GPR (match_dup 3)
> - (match_dup 4))
> - (ashift:GPR (match_dup 1)
> - (match_dup 2))))]
> -{
> - operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
> -})
> -
> (define_insn "*rotl<mode>3_insert_4"
> [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
> (ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0")
> diff --git a/gcc/testsuite/gcc.target/powerpc/20050603-3.c b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> index 4017d34f429..e628be11532 100644
> --- a/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/20050603-3.c
> @@ -12,7 +12,7 @@ void rotins (unsigned int x)
> b.y = (x<<12) | (x>>20);
> }
>
> -/* { dg-final { scan-assembler-not {\mrlwinm} } } */
> +/* { dg-final { scan-assembler-not {\mrlwinm} { target ilp32 } } } */
> /* { dg-final { scan-assembler-not {\mrldic} } } */
> /* { dg-final { scan-assembler-not {\mrot[lr]} } } */
> /* { dg-final { scan-assembler-not {\ms[lr][wd]} } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> index bafa371db73..ffb5f9e450f 100644
> --- a/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/rlwimi-2.c
> @@ -2,14 +2,14 @@
> /* { dg-options "-O2" } */
>
> /* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } } } */
> /* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
> /* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
> /* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
> /* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */
>
> /* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
> -/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
> +/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target lp64 } } } */
>
> /* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */
>
@@ -11539,6 +11539,36 @@ change_zero_ext (rtx pat)
return changed;
}
+/* When the outer code of set_src is IOR/XOR/PLUS and the inner code is
+ ASHIFT/LSHIFTRT/AND, convert a pseudo to psuedo AND with a mask if its
+ nonzero_bits is less than its mode mask. The nonzero_bits in other pass
+ doesn't return the same value as it does in combine pass. */
+static bool
+change_pseudo_and_mask (rtx pat)
+{
+ rtx src = SET_SRC (pat);
+ if ((GET_CODE (src) == IOR
+ || GET_CODE (src) == XOR
+ || GET_CODE (src) == PLUS)
+ && (((GET_CODE (XEXP (src, 0)) == ASHIFT
+ || GET_CODE (XEXP (src, 0)) == LSHIFTRT
+ || GET_CODE (XEXP (src, 0)) == AND)
+ && REG_P (XEXP (src, 1)))))
+ {
+ rtx *reg = &XEXP (SET_SRC (pat), 1);
+ machine_mode mode = GET_MODE (*reg);
+ unsigned HOST_WIDE_INT nonzero = nonzero_bits (*reg, mode);
+ if (nonzero < GET_MODE_MASK (mode))
+ {
+ rtx x = gen_rtx_AND (mode, *reg, GEN_INT (nonzero));
+ SUBST (*reg, x);
+ maybe_swap_commutative_operands (SET_SRC (pat));
+ return true;
+ }
+ }
+ return false;
+}
+
/* Like recog, but we receive the address of a pointer to a new pattern.
We try to match the rtx that the pointer points to.
If that fails, we may try to modify or replace the pattern,
@@ -11586,7 +11616,10 @@ recog_for_combine (rtx *pnewpat, rtx_insn *insn, rtx *pnotes)
}
}
else
- changed = change_zero_ext (pat);
+ {
+ changed = change_pseudo_and_mask (pat);
+ changed |= change_zero_ext (pat);
+ }
}
else if (GET_CODE (pat) == PARALLEL)
{
@@ -4149,24 +4149,6 @@ (define_insn "rotl<mode>3_insert_3"
}
[(set_attr "type" "insert")])
-(define_code_iterator plus_ior_xor [plus ior xor])
-
-(define_split
- [(set (match_operand:GPR 0 "gpc_reg_operand")
- (plus_ior_xor:GPR (ashift:GPR (match_operand:GPR 1 "gpc_reg_operand")
- (match_operand:SI 2 "const_int_operand"))
- (match_operand:GPR 3 "gpc_reg_operand")))]
- "nonzero_bits (operands[3], <MODE>mode)
- < HOST_WIDE_INT_1U << INTVAL (operands[2])"
- [(set (match_dup 0)
- (ior:GPR (and:GPR (match_dup 3)
- (match_dup 4))
- (ashift:GPR (match_dup 1)
- (match_dup 2))))]
-{
- operands[4] = GEN_INT ((HOST_WIDE_INT_1U << INTVAL (operands[2])) - 1);
-})
-
(define_insn "*rotl<mode>3_insert_4"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(ior:GPR (and:GPR (match_operand:GPR 3 "gpc_reg_operand" "0")
@@ -12,7 +12,7 @@ void rotins (unsigned int x)
b.y = (x<<12) | (x>>20);
}
-/* { dg-final { scan-assembler-not {\mrlwinm} } } */
+/* { dg-final { scan-assembler-not {\mrlwinm} { target ilp32 } } } */
/* { dg-final { scan-assembler-not {\mrldic} } } */
/* { dg-final { scan-assembler-not {\mrot[lr]} } } */
/* { dg-final { scan-assembler-not {\ms[lr][wd]} } } */
@@ -2,14 +2,14 @@
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 14121 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 20217 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 21279 { target lp64 } } } */
/* { dg-final { scan-assembler-times {(?n)^\s+blr} 6750 } } */
/* { dg-final { scan-assembler-times {(?n)^\s+mr} 643 { target ilp32 } } } */
/* { dg-final { scan-assembler-times {(?n)^\s+mr} 11 { target lp64 } } } */
/* { dg-final { scan-assembler-times {(?n)^\s+rldicl} 7790 { target lp64 } } } */
/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target ilp32 } } } */
-/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1666 { target lp64 } } } */
+/* { dg-final { scan-assembler-times {(?n)^\s+rlwimi} 1692 { target lp64 } } } */
/* { dg-final { scan-assembler-times {(?n)^\s+mulli} 5036 } } */