match.pd: simplify "shift + reg EQ|NE reg"

Message ID 20260106214612.3756648-1-daniel.barboza@oss.qualcomm.com
State New
Headers
Series match.pd: simplify "shift + reg EQ|NE reg" |

Commit Message

Daniel Henrique Barboza Jan. 6, 2026, 9:46 p.m. UTC
  Add a transformation for a nested lshift/rshift inside a plus that compares for
equality with the same operand of the plus. In other words:

((a OP b) + c EQ|NE c) ? x : y

becomes, for OP = (lshift, rshift) and in a scenario without overflows:

a !=/== 0 ? x : y

This optimizes the following code, seen in perlbench:

long
frob(long x, long y, long z)
{
  long ret = (y << 2) + z;
  long cond = ret != z;
  if (cond == 0)
    ret = 0;
  return ret;
}

gcc/ChangeLog:

        * match.pd (`((a OP b) + c eq|ne c) ? x : y`): New pattern.

gcc/testsuite/ChangeLog:

        * gcc.dg/torture/ifcond-plus-shift-czero.c: New test.

Signed-off-by: Daniel Barboza <daniel.barboza@oss.qualcomm.com>
---
 gcc/match.pd                                  | 17 +++++++++++++
 .../gcc.dg/torture/ifcond-plus-shift-czero.c  | 25 +++++++++++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/torture/ifcond-plus-shift-czero.c
  

Comments

Andrew Pinski Jan. 6, 2026, 10:13 p.m. UTC | #1
On Tue, Jan 6, 2026 at 1:46 PM Daniel Barboza
<daniel.barboza@oss.qualcomm.com> wrote:
>
> Add a transformation for a nested lshift/rshift inside a plus that compares for
> equality with the same operand of the plus. In other words:
>
> ((a OP b) + c EQ|NE c) ? x : y
>
> becomes, for OP = (lshift, rshift) and in a scenario without overflows:
>
> a !=/== 0 ? x : y

I think we want the transformation even if it is used outside of a cond_expr.
Also the above is valid even for types that wrap; just not valid for
types that trap on overflow.

As we already have a pattern for `a + C == C`:
```
/* For equality, this is also true with wrapping overflow.  */
(for op (eq ne)
 (simplify
  (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
  (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
       && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
           || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
       && (CONSTANT_CLASS_P (@0) || (single_use (@2) && single_use (@3)))
       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
   (op @0 { build_zero_cst (TREE_TYPE (@0)); })))
```

The problem is the above does not work as there is an single_use check
on the plus. The single use check was there since the original match
pattern was added.
I am not sure if we should add a special case where in the above
pattern @0 is a shift.
Though changing that will have to wait for GCC 17 I think.

Jeff/Richard,
  What are your thoughts on this? I know Richard had thoughts on some
of the single_use in the match patterns before but I have not tracked
them that well.

Thanks,
Andrew


>
> This optimizes the following code, seen in perlbench:
>
> long
> frob(long x, long y, long z)
> {
>   long ret = (y << 2) + z;
>   long cond = ret != z;
>   if (cond == 0)
>     ret = 0;
>   return ret;
> }
>
> gcc/ChangeLog:
>
>         * match.pd (`((a OP b) + c eq|ne c) ? x : y`): New pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.dg/torture/ifcond-plus-shift-czero.c: New test.
>
> Signed-off-by: Daniel Barboza <daniel.barboza@oss.qualcomm.com>
> ---
>  gcc/match.pd                                  | 17 +++++++++++++
>  .../gcc.dg/torture/ifcond-plus-shift-czero.c  | 25 +++++++++++++++++++
>  2 files changed, 42 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/torture/ifcond-plus-shift-czero.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index ccdc1129e23..fdc18b1ef41 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -2824,6 +2824,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>         (le (minus (convert:etype @0) { lo; }) { hi; })
>         (gt (minus (convert:etype @0) { lo; }) { hi; })))))))))
>
> +/* Reduces ((A OP B) + C EQ|NE C) ? X : Y to
> +   A EQ|NE 0 ? x : y, for OP = (lshift, rshift).  */
> +(for cmp (eq ne)
> + (for op (lshift rshift)
> +  (simplify
> +   (cond (cmp:c (plus:c (op @1 @2) @0) @0) @3 @4)
> +    (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +        && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1)))
> +     (with
> +      {
> +       fold_overflow_warning ("assuming signed overflow does not occur "
> +                             "when changing (X << imm) + C1 cmp C1 to "
> +                             "X << imm cmp 0 and X << imm cmp 0 "
> +                             "to X cmp 0",
> +                             WARN_STRICT_OVERFLOW_COMPARISON);  }
> +     (cond (cmp @1 { build_zero_cst (type); }) @3 @4))))))
> +
>  /* X + Z < Y + Z is the same as X < Y when there is no overflow.  */
>  (for op (lt le ge gt)
>   (simplify
> diff --git a/gcc/testsuite/gcc.dg/torture/ifcond-plus-shift-czero.c b/gcc/testsuite/gcc.dg/torture/ifcond-plus-shift-czero.c
> new file mode 100644
> index 00000000000..769412202c3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/ifcond-plus-shift-czero.c
> @@ -0,0 +1,25 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-fdump-tree-phiopt2" } */
> +/* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects"  } { "" } } */
> +
> +long
> +f1 (long x, long y, long z)
> +{
> +  long ret = (y << 2) + z;
> +  long cond = ret != z;
> +  if (cond == 0)
> +    ret = 0;
> +  return ret;
> +}
> +
> +long
> +f2 (long x, long y, long z)
> +{
> +  long ret = (y >> 2) + z;
> +  long cond = ret != z;
> +  if (cond == 0)
> +    ret = 0;
> +  return ret;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "== 0" 2 "phiopt2" } } */
> --
> 2.43.0
>
  
Jeffrey Law Jan. 7, 2026, 2:44 p.m. UTC | #2
On 1/6/2026 3:13 PM, Andrew Pinski wrote:
> On Tue, Jan 6, 2026 at 1:46 PM Daniel Barboza
> <daniel.barboza@oss.qualcomm.com> wrote:
>> Add a transformation for a nested lshift/rshift inside a plus that compares for
>> equality with the same operand of the plus. In other words:
>>
>> ((a OP b) + c EQ|NE c) ? x : y
>>
>> becomes, for OP = (lshift, rshift) and in a scenario without overflows:
>>
>> a !=/== 0 ? x : y
> I think we want the transformation even if it is used outside of a cond_expr.
> Also the above is valid even for types that wrap; just not valid for
> types that trap on overflow.
>
> As we already have a pattern for `a + C == C`:
> ```
> /* For equality, this is also true with wrapping overflow.  */
> (for op (eq ne)
>   (simplify
>    (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
>    (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>         && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
>             || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>         && (CONSTANT_CLASS_P (@0) || (single_use (@2) && single_use (@3)))
>         && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
>         && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
>     (op @0 { build_zero_cst (TREE_TYPE (@0)); })))
> ```
>
> The problem is the above does not work as there is an single_use check
> on the plus. The single use check was there since the original match
> pattern was added.
> I am not sure if we should add a special case where in the above
> pattern @0 is a shift.
> Though changing that will have to wait for GCC 17 I think.
>
> Jeff/Richard,
>    What are your thoughts on this? I know Richard had thoughts on some
> of the single_use in the match patterns before but I have not tracked
> them that well.

I had no idea we had that match.pd pattern; clearly what Daniel is doing 
is closely related.

The single_use in those patterns comes up here:
https://gcc.gnu.org/pipermail/gcc-patches/2017-October/484606.html

Looks like single use is in there because of interactions with another 
pattern.

jeff
  
Daniel Henrique Barboza Jan. 7, 2026, 9:47 p.m. UTC | #3
On 1/7/2026 11:44 AM, Jeffrey Law wrote:
> 
> 
> On 1/6/2026 3:13 PM, Andrew Pinski wrote:
>> On Tue, Jan 6, 2026 at 1:46 PM Daniel Barboza
>> <daniel.barboza@oss.qualcomm.com> wrote:
>>> Add a transformation for a nested lshift/rshift inside a plus that 
>>> compares for
>>> equality with the same operand of the plus. In other words:
>>>
>>> ((a OP b) + c EQ|NE c) ? x : y
>>>
>>> becomes, for OP = (lshift, rshift) and in a scenario without overflows:
>>>
>>> a !=/== 0 ? x : y
>> I think we want the transformation even if it is used outside of a 
>> cond_expr.
>> Also the above is valid even for types that wrap; just not valid for
>> types that trap on overflow.
>>
>> As we already have a pattern for `a + C == C`:
>> ```
>> /* For equality, this is also true with wrapping overflow.  */
>> (for op (eq ne)
>>   (simplify
>>    (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
>>    (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>>         && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
>>             || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>>         && (CONSTANT_CLASS_P (@0) || (single_use (@2) && single_use 
>> (@3)))
>>         && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
>>         && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
>>     (op @0 { build_zero_cst (TREE_TYPE (@0)); })))
>> ```
>>
>> The problem is the above does not work as there is an single_use check
>> on the plus. The single use check was there since the original match
>> pattern was added.
>> I am not sure if we should add a special case where in the above
>> pattern @0 is a shift.
>> Though changing that will have to wait for GCC 17 I think.


I tried  variation of that pattern, taking into account the lshift and 
removing the single_use and constant_class_p conditionals:

/* Do the same but with lshift|rshift as @0.  */
(for op (eq ne)
  (simplify
   (op:c (nop_convert?@3 (plus:c@2 (lshift @0 @4) (convert1? @1))) 
(convert2? @1))
   (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
        && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
	   || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
        && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
        && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
    (op @0 { build_zero_cst (TREE_TYPE (@0)); }))))


And I got the following 'optimized' dump:


long int frob (long int x, long int y, long int z)
{
   long int ret;
   long int _1;

;;   basic block 2, loop depth 0, count 1073741824 (estimated locally, 
freq 1.0000), maybe hot
;;    prev block 0, next block 3, flags: (NEW, REACHABLE, VISITED)
;;    pred:       ENTRY [always]  count:1073741824 (estimated locally, 
freq 1.0000) (FALLTHRU,EXECUTABLE)
   if (y_3(D) == 0)
     goto <bb 4>; [50.00%]
   else
     goto <bb 3>; [50.00%]
;;    succ:       4 [50.0% (guessed)]  count:536870912 (estimated 
locally, freq 0.5000) (TRUE_VALUE,EXECUTABLE)
;;                3 [50.0% (guessed)]  count:536870912 (estimated 
locally, freq 0.5000) (FALSE_VALUE,EXECUTABLE)

;;   basic block 3, loop depth 0, count 536870912 (estimated locally, 
freq 0.5000), maybe hot
;;    prev block 2, next block 4, flags: (NEW, REACHABLE, VISITED)
;;    pred:       2 [50.0% (guessed)]  count:536870912 (estimated 
locally, freq 0.5000) (FALSE_VALUE,EXECUTABLE)
   _1 = y_3(D) << 2;
   ret_5 = _1 + z_4(D);
;;    succ:       4 [always]  count:536870912 (estimated locally, freq 
0.5000) (FALLTHRU,EXECUTABLE)

;;   basic block 4, loop depth 0, count 1073741824 (estimated locally, 
freq 1.0000), maybe hot
;;    prev block 3, next block 1, flags: (NEW, REACHABLE, VISITED)
;;    pred:       3 [always]  count:536870912 (estimated locally, freq 
0.5000) (FALLTHRU,EXECUTABLE)
;;                2 [50.0% (guessed)]  count:536870912 (estimated 
locally, freq 0.5000) (TRUE_VALUE,EXECUTABLE)
   # ret_2 = PHI <ret_5(3), y_3(D)(2)>
   # VUSE <.MEM_6(D)>
   return ret_2;
;;    succ:       EXIT [always]  count:1073741824 (estimated locally, 
freq 1.0000) (EXECUTABLE) test.c:8:10

}


With the pattern I sent this is the 'optimized' dump I get:


long int frob (long int x, long int y, long int z)
{
   long int ret;
   long int _1;
   _Bool _7;
   long int _8;

;;   basic block 2, loop depth 0, count 1073741824 (estimated locally, 
freq 1.0000), maybe hot
;;    prev block 0, next block 1, flags: (NEW, REACHABLE, VISITED)
;;    pred:       ENTRY [always]  count:1073741824 (estimated locally, 
freq 1.0000) (FALLTHRU,EXECUTABLE)
   _1 = y_3(D) << 2;
   ret_5 = _1 + z_4(D);
   _7 = y_3(D) == 0;
   _8 = _7 ? 0 : ret_5;
   # VUSE <.MEM_6(D)>
   return _8;
;;    succ:       EXIT [always]  count:1073741824 (estimated locally, 
freq 1.0000) (EXECUTABLE) test.c:8:10


I'm not sure why we're having this difference given that the patterns 
are quite similar.

By the way my initial idea for upstreaming was to split the pattern in 
two. One like this:

X << N EQ|NE 0 -> X EQ|NE 0

And another one like the original I sent but returning the full lshift 
comparison instead of doing the reduction, i.e.:

(a OP b) + c EQ|NE c -> (a OP b) EQ|NE 0


But I got a lot of trouble with errors like:

Analyzing compilation unit
Performing interprocedural optimizations
  <*free_lang_data> {heap 1028k} <visibility> {heap 1028k} 
<build_ssa_passes> {heap 1028k} <targetclone> {heap 1440k} <opt_lo
cal_passes> {heap 1440k}during GIMPLE pass: ccp
dump file: test.c.038t.ccp1
test.c: In function ‘frob’:
test.c:9:1: internal compiler error: in decompose, at wide-int.h:1049
     9 | }
       | ^
0x2ecc34f internal_error(char const*, ...)
         ../../gcc/diagnostic-global-context.cc:787
0xf0790f fancy_abort(char const*, int, char const*)
         ../../gcc/diagnostics/context.cc:1805
0xa5b276 wi::int_traits<generic_wide_int<wide_int_ref_storage<false, 
false> > >::decompose(long*, unsigned int, generic_wide
_int<wide_int_ref_storage<false, false> > const&)
         ../../gcc/wide-int.h:1049
0xa5d419 wi::int_traits<generic_wide_int<wide_int_storage> 
 >::decompose(long*, unsigned int, generic_wide_int<wide_int_stora
ge> const&)
         ../../gcc/tree.h:3906
0xa5d419 wide_int_ref_storage<true, 
false>::wide_int_ref_storage<generic_wide_int<wide_int_storage> 
 >(generic_wide_int<wide_
int_storage> const&, unsigned int)
         ../../gcc/wide-int.h:1099
0xa5d419 generic_wide_int<wide_int_ref_storage<true, false> 
 >::generic_wide_int<generic_wide_int<wide_int_storage> >(generic
_wide_int<wide_int_storage> const&, unsigned int)
         ../../gcc/wide-int.h:855

My guess is that I was messing up the relation between the 'type' 
precision and the precision used in build_zero_cst(). I gave up and
sent the full pattern, but I wonder if splitting it would be better.


Thanks,

Daniel




>>
>> Jeff/Richard,
>>    What are your thoughts on this? I know Richard had thoughts on some
>> of the single_use in the match patterns before but I have not tracked
>> them that well.
> 
> I had no idea we had that match.pd pattern; clearly what Daniel is doing 
> is closely related.
> 
> The single_use in those patterns comes up here:
> https://gcc.gnu.org/pipermail/gcc-patches/2017-October/484606.html
> 
> Looks like single use is in there because of interactions with another 
> pattern.
> 
> jeff
> 
> 
> 
> 
>
  
Andrew Pinski Jan. 7, 2026, 10:06 p.m. UTC | #4
On Wed, Jan 7, 2026 at 1:47 PM Daniel Henrique Barboza
<daniel.barboza@oss.qualcomm.com> wrote:
>
>
>
> On 1/7/2026 11:44 AM, Jeffrey Law wrote:
> >
> >
> > On 1/6/2026 3:13 PM, Andrew Pinski wrote:
> >> On Tue, Jan 6, 2026 at 1:46 PM Daniel Barboza
> >> <daniel.barboza@oss.qualcomm.com> wrote:
> >>> Add a transformation for a nested lshift/rshift inside a plus that
> >>> compares for
> >>> equality with the same operand of the plus. In other words:
> >>>
> >>> ((a OP b) + c EQ|NE c) ? x : y
> >>>
> >>> becomes, for OP = (lshift, rshift) and in a scenario without overflows:
> >>>
> >>> a !=/== 0 ? x : y
> >> I think we want the transformation even if it is used outside of a
> >> cond_expr.
> >> Also the above is valid even for types that wrap; just not valid for
> >> types that trap on overflow.
> >>
> >> As we already have a pattern for `a + C == C`:
> >> ```
> >> /* For equality, this is also true with wrapping overflow.  */
> >> (for op (eq ne)
> >>   (simplify
> >>    (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
> >>    (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> >>         && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> >>             || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> >>         && (CONSTANT_CLASS_P (@0) || (single_use (@2) && single_use
> >> (@3)))
> >>         && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
> >>         && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
> >>     (op @0 { build_zero_cst (TREE_TYPE (@0)); })))
> >> ```
> >>
> >> The problem is the above does not work as there is an single_use check
> >> on the plus. The single use check was there since the original match
> >> pattern was added.
> >> I am not sure if we should add a special case where in the above
> >> pattern @0 is a shift.
> >> Though changing that will have to wait for GCC 17 I think.
>
>
> I tried  variation of that pattern, taking into account the lshift and
> removing the single_use and constant_class_p conditionals:
>
> /* Do the same but with lshift|rshift as @0.  */
> (for op (eq ne)
>   (simplify
>    (op:c (nop_convert?@3 (plus:c@2 (lshift @0 @4) (convert1? @1)))
> (convert2? @1))
>    (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>         && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
>            || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>         && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
>         && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
>     (op @0 { build_zero_cst (TREE_TYPE (@0)); }))))
>
>
> And I got the following 'optimized' dump:
>
>
> long int frob (long int x, long int y, long int z)
> {
>    long int ret;
>    long int _1;
>
> ;;   basic block 2, loop depth 0, count 1073741824 (estimated locally,
> freq 1.0000), maybe hot
> ;;    prev block 0, next block 3, flags: (NEW, REACHABLE, VISITED)
> ;;    pred:       ENTRY [always]  count:1073741824 (estimated locally,
> freq 1.0000) (FALLTHRU,EXECUTABLE)
>    if (y_3(D) == 0)
>      goto <bb 4>; [50.00%]
>    else
>      goto <bb 3>; [50.00%]
> ;;    succ:       4 [50.0% (guessed)]  count:536870912 (estimated
> locally, freq 0.5000) (TRUE_VALUE,EXECUTABLE)
> ;;                3 [50.0% (guessed)]  count:536870912 (estimated
> locally, freq 0.5000) (FALSE_VALUE,EXECUTABLE)
>
> ;;   basic block 3, loop depth 0, count 536870912 (estimated locally,
> freq 0.5000), maybe hot
> ;;    prev block 2, next block 4, flags: (NEW, REACHABLE, VISITED)
> ;;    pred:       2 [50.0% (guessed)]  count:536870912 (estimated
> locally, freq 0.5000) (FALSE_VALUE,EXECUTABLE)
>    _1 = y_3(D) << 2;
>    ret_5 = _1 + z_4(D);
> ;;    succ:       4 [always]  count:536870912 (estimated locally, freq
> 0.5000) (FALLTHRU,EXECUTABLE)
>
> ;;   basic block 4, loop depth 0, count 1073741824 (estimated locally,
> freq 1.0000), maybe hot
> ;;    prev block 3, next block 1, flags: (NEW, REACHABLE, VISITED)
> ;;    pred:       3 [always]  count:536870912 (estimated locally, freq
> 0.5000) (FALLTHRU,EXECUTABLE)
> ;;                2 [50.0% (guessed)]  count:536870912 (estimated
> locally, freq 0.5000) (TRUE_VALUE,EXECUTABLE)
>    # ret_2 = PHI <ret_5(3), y_3(D)(2)>
>    # VUSE <.MEM_6(D)>
>    return ret_2;
> ;;    succ:       EXIT [always]  count:1073741824 (estimated locally,
> freq 1.0000) (EXECUTABLE) test.c:8:10
>
> }

Yes afterwards, the sink happens. And pushed ` y_3(D) << 2`/`_1 +
z_4(D)` into the branch.
And then uncprop changes `0(2)` into `y_3(D)(2)` in the PHI (sometimes
that is better; sometimes that is worse; for riscv with zicond it is
usually worse because ifcvt does not always convert back into 0).
I wonder if keeping `(lshift @0 @4)` as being compared to 0 if that
would be better, it would at least keep one expression out of the
branch.


>
>
> With the pattern I sent this is the 'optimized' dump I get:
>
>
> long int frob (long int x, long int y, long int z)
> {
>    long int ret;
>    long int _1;
>    _Bool _7;
>    long int _8;
>
> ;;   basic block 2, loop depth 0, count 1073741824 (estimated locally,
> freq 1.0000), maybe hot
> ;;    prev block 0, next block 1, flags: (NEW, REACHABLE, VISITED)
> ;;    pred:       ENTRY [always]  count:1073741824 (estimated locally,
> freq 1.0000) (FALLTHRU,EXECUTABLE)
>    _1 = y_3(D) << 2;
>    ret_5 = _1 + z_4(D);
>    _7 = y_3(D) == 0;
>    _8 = _7 ? 0 : ret_5;
>    # VUSE <.MEM_6(D)>
>    return _8;
> ;;    succ:       EXIT [always]  count:1073741824 (estimated locally,
> freq 1.0000) (EXECUTABLE) test.c:8:10
>
>
> I'm not sure why we're having this difference given that the patterns
> are quite similar.

Because when you have a cond_expr in the output of a match pattern, we
don't recreate an if/else branches.
In the pattern without the cond_expr, the sink pass sinks the other
expressions. While here we have an explicit COND_EXPR rather than
control flow.
The big question is, can ifcvt on the rtl level recover the first way
into the second way; I think currently it does not but I have not
fully looked; so we might to need help it on the gimple level (for GCC
17 though).

>
> By the way my initial idea for upstreaming was to split the pattern in
> two. One like this:
>
> X << N EQ|NE 0 -> X EQ|NE 0
>
> And another one like the original I sent but returning the full lshift
> comparison instead of doing the reduction, i.e.:
>
> (a OP b) + c EQ|NE c -> (a OP b) EQ|NE 0
>
>
> But I got a lot of trouble with errors like:
>
> Analyzing compilation unit
> Performing interprocedural optimizations
>   <*free_lang_data> {heap 1028k} <visibility> {heap 1028k}
> <build_ssa_passes> {heap 1028k} <targetclone> {heap 1440k} <opt_lo
> cal_passes> {heap 1440k}during GIMPLE pass: ccp
> dump file: test.c.038t.ccp1
> test.c: In function ‘frob’:
> test.c:9:1: internal compiler error: in decompose, at wide-int.h:1049
>      9 | }
>        | ^
> 0x2ecc34f internal_error(char const*, ...)
>          ../../gcc/diagnostic-global-context.cc:787
> 0xf0790f fancy_abort(char const*, int, char const*)
>          ../../gcc/diagnostics/context.cc:1805
> 0xa5b276 wi::int_traits<generic_wide_int<wide_int_ref_storage<false,
> false> > >::decompose(long*, unsigned int, generic_wide
> _int<wide_int_ref_storage<false, false> > const&)
>          ../../gcc/wide-int.h:1049
> 0xa5d419 wi::int_traits<generic_wide_int<wide_int_storage>
>  >::decompose(long*, unsigned int, generic_wide_int<wide_int_stora
> ge> const&)
>          ../../gcc/tree.h:3906
> 0xa5d419 wide_int_ref_storage<true,
> false>::wide_int_ref_storage<generic_wide_int<wide_int_storage>
>  >(generic_wide_int<wide_
> int_storage> const&, unsigned int)
>          ../../gcc/wide-int.h:1099
> 0xa5d419 generic_wide_int<wide_int_ref_storage<true, false>
>  >::generic_wide_int<generic_wide_int<wide_int_storage> >(generic
> _wide_int<wide_int_storage> const&, unsigned int)
>          ../../gcc/wide-int.h:855
>
> My guess is that I was messing up the relation between the 'type'
> precision and the precision used in build_zero_cst(). I gave up and
> sent the full pattern, but I wonder if splitting it would be better.

This usually does mean a type mismatch has happened or you are
comparing two `wi::to_wide` which come from different precision
INTEGER_CSTs.

Thanks,
Andrew

>
>
> Thanks,
>
> Daniel
>
>
>
>
> >>
> >> Jeff/Richard,
> >>    What are your thoughts on this? I know Richard had thoughts on some
> >> of the single_use in the match patterns before but I have not tracked
> >> them that well.
> >
> > I had no idea we had that match.pd pattern; clearly what Daniel is doing
> > is closely related.
> >
> > The single_use in those patterns comes up here:
> > https://gcc.gnu.org/pipermail/gcc-patches/2017-October/484606.html
> >
> > Looks like single use is in there because of interactions with another
> > pattern.
> >
> > jeff
> >
> >
> >
> >
> >
>
  
Jeffrey Law Jan. 8, 2026, 2:45 a.m. UTC | #5
On 1/7/2026 3:06 PM, Andrew Pinski wrote:
> Yes afterwards, the sink happens. And pushed ` y_3(D) << 2`/`_1 +
> z_4(D)` into the branch.
> And then uncprop changes `0(2)` into `y_3(D)(2)` in the PHI (sometimes
> that is better; sometimes that is worse; for riscv with zicond it is
> usually worse because ifcvt does not always convert back into 0).
In general uncprop should be profitable as it reduces the number of 
constant initializations created by conditional equivalences. 
Essentially it replaces a constant zero with an object known to have the 
value zero on the appropriate path.

RTL cse/gcse/cprop all know how to handle conditional equivalences and 
should push the 0 value back in.  It's also the cas that the zicond code 
knows how to handle cases where both arms are pseudos, but one of the 
arms is also used in the test and as a result we know its going to have 
the value zero in the context we care about.



> I wonder if keeping `(lshift @0 @4)` as being compared to 0 if that
> would be better, it would at least keep one expression out of the
> branch.
Dunno.  I'd probably need to see before/after RTL and assembly code.

>
>
>>
>> With the pattern I sent this is the 'optimized' dump I get:
>>
>>
>> long int frob (long int x, long int y, long int z)
>> {
>>     long int ret;
>>     long int _1;
>>     _Bool _7;
>>     long int _8;
>>
>> ;;   basic block 2, loop depth 0, count 1073741824 (estimated locally,
>> freq 1.0000), maybe hot
>> ;;    prev block 0, next block 1, flags: (NEW, REACHABLE, VISITED)
>> ;;    pred:       ENTRY [always]  count:1073741824 (estimated locally,
>> freq 1.0000) (FALLTHRU,EXECUTABLE)
>>     _1 = y_3(D) << 2;
>>     ret_5 = _1 + z_4(D);
>>     _7 = y_3(D) == 0;
>>     _8 = _7 ? 0 : ret_5;
>>     # VUSE <.MEM_6(D)>
>>     return _8;
>> ;;    succ:       EXIT [always]  count:1073741824 (estimated locally,
>> freq 1.0000) (EXECUTABLE) test.c:8:10
>>
>>
>> I'm not sure why we're having this difference given that the patterns
>> are quite similar.
> Because when you have a cond_expr in the output of a match pattern, we
> don't recreate an if/else branches.
> In the pattern without the cond_expr, the sink pass sinks the other
> expressions. While here we have an explicit COND_EXPR rather than
> control flow.
> The big question is, can ifcvt on the rtl level recover the first way
> into the second way; I think currently it does not but I have not
> fully looked; so we might to need help it on the gimple level (for GCC
> 17 though).
I would generally expect the form above to eventually collapse down to 
something ifcvt can convert using zicond.


Jeff
  
Daniel Henrique Barboza Jan. 8, 2026, 9:26 p.m. UTC | #6
On 1/7/2026 7:06 PM, Andrew Pinski wrote:
> On Wed, Jan 7, 2026 at 1:47 PM Daniel Henrique Barboza
> <daniel.barboza@oss.qualcomm.com> wrote:
>>
>>
>>
>> On 1/7/2026 11:44 AM, Jeffrey Law wrote:
>>>
>>>
>>> On 1/6/2026 3:13 PM, Andrew Pinski wrote:
>>>> On Tue, Jan 6, 2026 at 1:46 PM Daniel Barboza
>>>> <daniel.barboza@oss.qualcomm.com> wrote:
>>>>> Add a transformation for a nested lshift/rshift inside a plus that
>>>>> compares for
>>>>> equality with the same operand of the plus. In other words:
>>>>>
>>>>> ((a OP b) + c EQ|NE c) ? x : y
>>>>>
>>>>> becomes, for OP = (lshift, rshift) and in a scenario without overflows:
>>>>>
>>>>> a !=/== 0 ? x : y
>>>> I think we want the transformation even if it is used outside of a
>>>> cond_expr.
>>>> Also the above is valid even for types that wrap; just not valid for
>>>> types that trap on overflow.
>>>>
>>>> As we already have a pattern for `a + C == C`:
>>>> ```
>>>> /* For equality, this is also true with wrapping overflow.  */
>>>> (for op (eq ne)
>>>>    (simplify
>>>>     (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
>>>>     (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>>>>          && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
>>>>              || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>>>>          && (CONSTANT_CLASS_P (@0) || (single_use (@2) && single_use
>>>> (@3)))
>>>>          && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
>>>>          && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
>>>>      (op @0 { build_zero_cst (TREE_TYPE (@0)); })))
>>>> ```
>>>>
>>>> The problem is the above does not work as there is an single_use check
>>>> on the plus. The single use check was there since the original match
>>>> pattern was added.
>>>> I am not sure if we should add a special case where in the above
>>>> pattern @0 is a shift.
>>>> Though changing that will have to wait for GCC 17 I think.
>>
>>
>> I tried  variation of that pattern, taking into account the lshift and
>> removing the single_use and constant_class_p conditionals:
>>
>> /* Do the same but with lshift|rshift as @0.  */
>> (for op (eq ne)
>>    (simplify
>>     (op:c (nop_convert?@3 (plus:c@2 (lshift @0 @4) (convert1? @1)))
>> (convert2? @1))
>>     (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
>>          && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
>>             || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>>          && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
>>          && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
>>      (op @0 { build_zero_cst (TREE_TYPE (@0)); }))))
>>
>>
>> And I got the following 'optimized' dump:
>>
>>
>> long int frob (long int x, long int y, long int z)
>> {
>>     long int ret;
>>     long int _1;
>>
>> ;;   basic block 2, loop depth 0, count 1073741824 (estimated locally,
>> freq 1.0000), maybe hot
>> ;;    prev block 0, next block 3, flags: (NEW, REACHABLE, VISITED)
>> ;;    pred:       ENTRY [always]  count:1073741824 (estimated locally,
>> freq 1.0000) (FALLTHRU,EXECUTABLE)
>>     if (y_3(D) == 0)
>>       goto <bb 4>; [50.00%]
>>     else
>>       goto <bb 3>; [50.00%]
>> ;;    succ:       4 [50.0% (guessed)]  count:536870912 (estimated
>> locally, freq 0.5000) (TRUE_VALUE,EXECUTABLE)
>> ;;                3 [50.0% (guessed)]  count:536870912 (estimated
>> locally, freq 0.5000) (FALSE_VALUE,EXECUTABLE)
>>
>> ;;   basic block 3, loop depth 0, count 536870912 (estimated locally,
>> freq 0.5000), maybe hot
>> ;;    prev block 2, next block 4, flags: (NEW, REACHABLE, VISITED)
>> ;;    pred:       2 [50.0% (guessed)]  count:536870912 (estimated
>> locally, freq 0.5000) (FALSE_VALUE,EXECUTABLE)
>>     _1 = y_3(D) << 2;
>>     ret_5 = _1 + z_4(D);
>> ;;    succ:       4 [always]  count:536870912 (estimated locally, freq
>> 0.5000) (FALLTHRU,EXECUTABLE)
>>
>> ;;   basic block 4, loop depth 0, count 1073741824 (estimated locally,
>> freq 1.0000), maybe hot
>> ;;    prev block 3, next block 1, flags: (NEW, REACHABLE, VISITED)
>> ;;    pred:       3 [always]  count:536870912 (estimated locally, freq
>> 0.5000) (FALLTHRU,EXECUTABLE)
>> ;;                2 [50.0% (guessed)]  count:536870912 (estimated
>> locally, freq 0.5000) (TRUE_VALUE,EXECUTABLE)
>>     # ret_2 = PHI <ret_5(3), y_3(D)(2)>
>>     # VUSE <.MEM_6(D)>
>>     return ret_2;
>> ;;    succ:       EXIT [always]  count:1073741824 (estimated locally,
>> freq 1.0000) (EXECUTABLE) test.c:8:10
>>
>> }
> 
> Yes afterwards, the sink happens. And pushed ` y_3(D) << 2`/`_1 +
> z_4(D)` into the branch.
> And then uncprop changes `0(2)` into `y_3(D)(2)` in the PHI (sometimes
> that is better; sometimes that is worse; for riscv with zicond it is
> usually worse because ifcvt does not always convert back into 0).
> I wonder if keeping `(lshift @0 @4)` as being compared to 0 if that
> would be better, it would at least keep one expression out of the
> branch.


I did some tests with the RISC-V target to see the final .s for each 
(-march=rv64gcv_zba_zbb_zbc_zbs_zfa_zicond). For the pattern I sent:

+(for cmp (eq ne)
+ (for op (lshift rshift)
+  (simplify
+   (cond (cmp:c (plus:c (op @1 @2) @0) @0) @3 @4)
+    (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@1))
+	 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1)))
+     (with
+      {
+       fold_overflow_warning ("assuming signed overflow does not occur "
+			      "when changing (X << imm) + C1 cmp C1 to "
+			      "X << imm cmp 0 and X << imm cmp 0 "
+			      "to X cmp 0",
+			      WARN_STRICT_OVERFLOW_COMPARISON);  }
+     (cond (cmp @1 { build_zero_cst (type); }) @3 @4))))))

frob:
.LFB0:
         .cfi_startproc
         sh2add  a2,a1,a2
         czero.eqz       a0,a2,a1
         ret
         .cfi_endproc
.LFE0:


Using a variation of the existing pattern Andrew mentioned, returning a 
lshift czero compare:


+/* Do the same but with lshift|rshift as @0.  */
+(for cmp (eq ne)
+ (for op (lshift rshift)
+  (simplify
+   (cmp:c (nop_convert?@3 (plus:c@2 (op @0 @4) (convert1? @1))) 
(convert2? @1))
+   (with { tree type0 = TREE_TYPE (@0);  }
+   (if (ANY_INTEGRAL_TYPE_P (type0)
+       && (TYPE_OVERFLOW_UNDEFINED (type0)
+           || TYPE_OVERFLOW_WRAPS (type0))
+       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
+       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
+    (cmp (convert:type0 (op @0 @4)) { build_zero_cst (type0); }))))))


frob:
.LFB0:
         .cfi_startproc
         slli    a1,a1,2
         czero.eqz       a0,a2,a1
         add     a0,a1,a0
         ret
         .cfi_endproc
.LFE0:


Same thing, but returning a @0 czero compare instead:

+(for cmp (eq ne)
+ (for op (lshift rshift)
+  (simplify
+   (cmp:c (nop_convert?@3 (plus:c@2 (op @0 @4) (convert1? @1))) 
(convert2? @1))
+   (with { tree type0 = TREE_TYPE (@0);  }
+   (if (ANY_INTEGRAL_TYPE_P (type0)
+       && (TYPE_OVERFLOW_UNDEFINED (type0)
+           || TYPE_OVERFLOW_WRAPS (type0))
+       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
+       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
+    (cmp @0 { build_zero_cst (type0); }))))))


frob:
.LFB0:
         .cfi_startproc
         sh2add  a2,a1,a2
         czero.eqz       a0,a2,a1
         ret
         .cfi_endproc
.LFE0:


So it seems to me that yes, ifcvt can convert from the existing pattern 
to the new pattern I sent if we return a @0 czero compare.


I prefer adding a variation of an existing pattern instead of a brand 
new one so I would go with this approach. But it only works without the 
single_use() checks. Given that this pattern is rather specific, as long 
as we're not regressing existing tests, maybe it's ok?


Thanks,

Daniel




> 
> 
>>
>>
>> With the pattern I sent this is the 'optimized' dump I get:
>>
>>
>> long int frob (long int x, long int y, long int z)
>> {
>>     long int ret;
>>     long int _1;
>>     _Bool _7;
>>     long int _8;
>>
>> ;;   basic block 2, loop depth 0, count 1073741824 (estimated locally,
>> freq 1.0000), maybe hot
>> ;;    prev block 0, next block 1, flags: (NEW, REACHABLE, VISITED)
>> ;;    pred:       ENTRY [always]  count:1073741824 (estimated locally,
>> freq 1.0000) (FALLTHRU,EXECUTABLE)
>>     _1 = y_3(D) << 2;
>>     ret_5 = _1 + z_4(D);
>>     _7 = y_3(D) == 0;
>>     _8 = _7 ? 0 : ret_5;
>>     # VUSE <.MEM_6(D)>
>>     return _8;
>> ;;    succ:       EXIT [always]  count:1073741824 (estimated locally,
>> freq 1.0000) (EXECUTABLE) test.c:8:10
>>
>>
>> I'm not sure why we're having this difference given that the patterns
>> are quite similar.
> 
> Because when you have a cond_expr in the output of a match pattern, we
> don't recreate an if/else branches.
> In the pattern without the cond_expr, the sink pass sinks the other
> expressions. While here we have an explicit COND_EXPR rather than
> control flow.
> The big question is, can ifcvt on the rtl level recover the first way
> into the second way; I think currently it does not but I have not
> fully looked; so we might to need help it on the gimple level (for GCC
> 17 though).
> 
>>
>> By the way my initial idea for upstreaming was to split the pattern in
>> two. One like this:
>>
>> X << N EQ|NE 0 -> X EQ|NE 0
>>
>> And another one like the original I sent but returning the full lshift
>> comparison instead of doing the reduction, i.e.:
>>
>> (a OP b) + c EQ|NE c -> (a OP b) EQ|NE 0
>>
>>
>> But I got a lot of trouble with errors like:
>>
>> Analyzing compilation unit
>> Performing interprocedural optimizations
>>    <*free_lang_data> {heap 1028k} <visibility> {heap 1028k}
>> <build_ssa_passes> {heap 1028k} <targetclone> {heap 1440k} <opt_lo
>> cal_passes> {heap 1440k}during GIMPLE pass: ccp
>> dump file: test.c.038t.ccp1
>> test.c: In function ‘frob’:
>> test.c:9:1: internal compiler error: in decompose, at wide-int.h:1049
>>       9 | }
>>         | ^
>> 0x2ecc34f internal_error(char const*, ...)
>>           ../../gcc/diagnostic-global-context.cc:787
>> 0xf0790f fancy_abort(char const*, int, char const*)
>>           ../../gcc/diagnostics/context.cc:1805
>> 0xa5b276 wi::int_traits<generic_wide_int<wide_int_ref_storage<false,
>> false> > >::decompose(long*, unsigned int, generic_wide
>> _int<wide_int_ref_storage<false, false> > const&)
>>           ../../gcc/wide-int.h:1049
>> 0xa5d419 wi::int_traits<generic_wide_int<wide_int_storage>
>>   >::decompose(long*, unsigned int, generic_wide_int<wide_int_stora
>> ge> const&)
>>           ../../gcc/tree.h:3906
>> 0xa5d419 wide_int_ref_storage<true,
>> false>::wide_int_ref_storage<generic_wide_int<wide_int_storage>
>>   >(generic_wide_int<wide_
>> int_storage> const&, unsigned int)
>>           ../../gcc/wide-int.h:1099
>> 0xa5d419 generic_wide_int<wide_int_ref_storage<true, false>
>>   >::generic_wide_int<generic_wide_int<wide_int_storage> >(generic
>> _wide_int<wide_int_storage> const&, unsigned int)
>>           ../../gcc/wide-int.h:855
>>
>> My guess is that I was messing up the relation between the 'type'
>> precision and the precision used in build_zero_cst(). I gave up and
>> sent the full pattern, but I wonder if splitting it would be better.
> 
> This usually does mean a type mismatch has happened or you are
> comparing two `wi::to_wide` which come from different precision
> INTEGER_CSTs.
> 
> Thanks,
> Andrew
> 
>>
>>
>> Thanks,
>>
>> Daniel
>>
>>
>>
>>
>>>>
>>>> Jeff/Richard,
>>>>     What are your thoughts on this? I know Richard had thoughts on some
>>>> of the single_use in the match patterns before but I have not tracked
>>>> them that well.
>>>
>>> I had no idea we had that match.pd pattern; clearly what Daniel is doing
>>> is closely related.
>>>
>>> The single_use in those patterns comes up here:
>>> https://gcc.gnu.org/pipermail/gcc-patches/2017-October/484606.html
>>>
>>> Looks like single use is in there because of interactions with another
>>> pattern.
>>>
>>> jeff
>>>
>>>
>>>
>>>
>>>
>>
  
Richard Biener Jan. 9, 2026, 7:24 a.m. UTC | #7
On Thu, Jan 8, 2026 at 10:26 PM Daniel Henrique Barboza
<daniel.barboza@oss.qualcomm.com> wrote:
>
>
>
> On 1/7/2026 7:06 PM, Andrew Pinski wrote:
> > On Wed, Jan 7, 2026 at 1:47 PM Daniel Henrique Barboza
> > <daniel.barboza@oss.qualcomm.com> wrote:
> >>
> >>
> >>
> >> On 1/7/2026 11:44 AM, Jeffrey Law wrote:
> >>>
> >>>
> >>> On 1/6/2026 3:13 PM, Andrew Pinski wrote:
> >>>> On Tue, Jan 6, 2026 at 1:46 PM Daniel Barboza
> >>>> <daniel.barboza@oss.qualcomm.com> wrote:
> >>>>> Add a transformation for a nested lshift/rshift inside a plus that
> >>>>> compares for
> >>>>> equality with the same operand of the plus. In other words:
> >>>>>
> >>>>> ((a OP b) + c EQ|NE c) ? x : y
> >>>>>
> >>>>> becomes, for OP = (lshift, rshift) and in a scenario without overflows:
> >>>>>
> >>>>> a !=/== 0 ? x : y
> >>>> I think we want the transformation even if it is used outside of a
> >>>> cond_expr.
> >>>> Also the above is valid even for types that wrap; just not valid for
> >>>> types that trap on overflow.
> >>>>
> >>>> As we already have a pattern for `a + C == C`:
> >>>> ```
> >>>> /* For equality, this is also true with wrapping overflow.  */
> >>>> (for op (eq ne)
> >>>>    (simplify
> >>>>     (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
> >>>>     (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> >>>>          && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> >>>>              || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> >>>>          && (CONSTANT_CLASS_P (@0) || (single_use (@2) && single_use
> >>>> (@3)))
> >>>>          && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
> >>>>          && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
> >>>>      (op @0 { build_zero_cst (TREE_TYPE (@0)); })))
> >>>> ```
> >>>>
> >>>> The problem is the above does not work as there is an single_use check
> >>>> on the plus. The single use check was there since the original match
> >>>> pattern was added.
> >>>> I am not sure if we should add a special case where in the above
> >>>> pattern @0 is a shift.
> >>>> Though changing that will have to wait for GCC 17 I think.
> >>
> >>
> >> I tried  variation of that pattern, taking into account the lshift and
> >> removing the single_use and constant_class_p conditionals:
> >>
> >> /* Do the same but with lshift|rshift as @0.  */
> >> (for op (eq ne)
> >>    (simplify
> >>     (op:c (nop_convert?@3 (plus:c@2 (lshift @0 @4) (convert1? @1)))
> >> (convert2? @1))
> >>     (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
> >>          && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
> >>             || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
> >>          && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
> >>          && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
> >>      (op @0 { build_zero_cst (TREE_TYPE (@0)); }))))
> >>
> >>
> >> And I got the following 'optimized' dump:
> >>
> >>
> >> long int frob (long int x, long int y, long int z)
> >> {
> >>     long int ret;
> >>     long int _1;
> >>
> >> ;;   basic block 2, loop depth 0, count 1073741824 (estimated locally,
> >> freq 1.0000), maybe hot
> >> ;;    prev block 0, next block 3, flags: (NEW, REACHABLE, VISITED)
> >> ;;    pred:       ENTRY [always]  count:1073741824 (estimated locally,
> >> freq 1.0000) (FALLTHRU,EXECUTABLE)
> >>     if (y_3(D) == 0)
> >>       goto <bb 4>; [50.00%]
> >>     else
> >>       goto <bb 3>; [50.00%]
> >> ;;    succ:       4 [50.0% (guessed)]  count:536870912 (estimated
> >> locally, freq 0.5000) (TRUE_VALUE,EXECUTABLE)
> >> ;;                3 [50.0% (guessed)]  count:536870912 (estimated
> >> locally, freq 0.5000) (FALSE_VALUE,EXECUTABLE)
> >>
> >> ;;   basic block 3, loop depth 0, count 536870912 (estimated locally,
> >> freq 0.5000), maybe hot
> >> ;;    prev block 2, next block 4, flags: (NEW, REACHABLE, VISITED)
> >> ;;    pred:       2 [50.0% (guessed)]  count:536870912 (estimated
> >> locally, freq 0.5000) (FALSE_VALUE,EXECUTABLE)
> >>     _1 = y_3(D) << 2;
> >>     ret_5 = _1 + z_4(D);
> >> ;;    succ:       4 [always]  count:536870912 (estimated locally, freq
> >> 0.5000) (FALLTHRU,EXECUTABLE)
> >>
> >> ;;   basic block 4, loop depth 0, count 1073741824 (estimated locally,
> >> freq 1.0000), maybe hot
> >> ;;    prev block 3, next block 1, flags: (NEW, REACHABLE, VISITED)
> >> ;;    pred:       3 [always]  count:536870912 (estimated locally, freq
> >> 0.5000) (FALLTHRU,EXECUTABLE)
> >> ;;                2 [50.0% (guessed)]  count:536870912 (estimated
> >> locally, freq 0.5000) (TRUE_VALUE,EXECUTABLE)
> >>     # ret_2 = PHI <ret_5(3), y_3(D)(2)>
> >>     # VUSE <.MEM_6(D)>
> >>     return ret_2;
> >> ;;    succ:       EXIT [always]  count:1073741824 (estimated locally,
> >> freq 1.0000) (EXECUTABLE) test.c:8:10
> >>
> >> }
> >
> > Yes afterwards, the sink happens. And pushed ` y_3(D) << 2`/`_1 +
> > z_4(D)` into the branch.
> > And then uncprop changes `0(2)` into `y_3(D)(2)` in the PHI (sometimes
> > that is better; sometimes that is worse; for riscv with zicond it is
> > usually worse because ifcvt does not always convert back into 0).
> > I wonder if keeping `(lshift @0 @4)` as being compared to 0 if that
> > would be better, it would at least keep one expression out of the
> > branch.
>
>
> I did some tests with the RISC-V target to see the final .s for each
> (-march=rv64gcv_zba_zbb_zbc_zbs_zfa_zicond). For the pattern I sent:
>
> +(for cmp (eq ne)
> + (for op (lshift rshift)
> +  (simplify
> +   (cond (cmp:c (plus:c (op @1 @2) @0) @0) @3 @4)
> +    (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@1))
> +        && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1)))
> +     (with
> +      {
> +       fold_overflow_warning ("assuming signed overflow does not occur "
> +                             "when changing (X << imm) + C1 cmp C1 to "
> +                             "X << imm cmp 0 and X << imm cmp 0 "
> +                             "to X cmp 0",
> +                             WARN_STRICT_OVERFLOW_COMPARISON);  }

Please don't any new fold_overflow_warning things, those are borderly
useless (IMO all of them should be wiped).

No comments on the rest, I didn't review the discussion.

Richard.

> +     (cond (cmp @1 { build_zero_cst (type); }) @3 @4))))))
>
> frob:
> .LFB0:
>          .cfi_startproc
>          sh2add  a2,a1,a2
>          czero.eqz       a0,a2,a1
>          ret
>          .cfi_endproc
> .LFE0:
>
>
> Using a variation of the existing pattern Andrew mentioned, returning a
> lshift czero compare:
>
>
> +/* Do the same but with lshift|rshift as @0.  */
> +(for cmp (eq ne)
> + (for op (lshift rshift)
> +  (simplify
> +   (cmp:c (nop_convert?@3 (plus:c@2 (op @0 @4) (convert1? @1)))
> (convert2? @1))
> +   (with { tree type0 = TREE_TYPE (@0);  }
> +   (if (ANY_INTEGRAL_TYPE_P (type0)
> +       && (TYPE_OVERFLOW_UNDEFINED (type0)
> +           || TYPE_OVERFLOW_WRAPS (type0))
> +       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
> +       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
> +    (cmp (convert:type0 (op @0 @4)) { build_zero_cst (type0); }))))))
>
>
> frob:
> .LFB0:
>          .cfi_startproc
>          slli    a1,a1,2
>          czero.eqz       a0,a2,a1
>          add     a0,a1,a0
>          ret
>          .cfi_endproc
> .LFE0:
>
>
> Same thing, but returning a @0 czero compare instead:
>
> +(for cmp (eq ne)
> + (for op (lshift rshift)
> +  (simplify
> +   (cmp:c (nop_convert?@3 (plus:c@2 (op @0 @4) (convert1? @1)))
> (convert2? @1))
> +   (with { tree type0 = TREE_TYPE (@0);  }
> +   (if (ANY_INTEGRAL_TYPE_P (type0)
> +       && (TYPE_OVERFLOW_UNDEFINED (type0)
> +           || TYPE_OVERFLOW_WRAPS (type0))
> +       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
> +       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
> +    (cmp @0 { build_zero_cst (type0); }))))))
>
>
> frob:
> .LFB0:
>          .cfi_startproc
>          sh2add  a2,a1,a2
>          czero.eqz       a0,a2,a1
>          ret
>          .cfi_endproc
> .LFE0:
>
>
> So it seems to me that yes, ifcvt can convert from the existing pattern
> to the new pattern I sent if we return a @0 czero compare.
>
>
> I prefer adding a variation of an existing pattern instead of a brand
> new one so I would go with this approach. But it only works without the
> single_use() checks. Given that this pattern is rather specific, as long
> as we're not regressing existing tests, maybe it's ok?
>
>
> Thanks,
>
> Daniel
>
>
>
>
> >
> >
> >>
> >>
> >> With the pattern I sent this is the 'optimized' dump I get:
> >>
> >>
> >> long int frob (long int x, long int y, long int z)
> >> {
> >>     long int ret;
> >>     long int _1;
> >>     _Bool _7;
> >>     long int _8;
> >>
> >> ;;   basic block 2, loop depth 0, count 1073741824 (estimated locally,
> >> freq 1.0000), maybe hot
> >> ;;    prev block 0, next block 1, flags: (NEW, REACHABLE, VISITED)
> >> ;;    pred:       ENTRY [always]  count:1073741824 (estimated locally,
> >> freq 1.0000) (FALLTHRU,EXECUTABLE)
> >>     _1 = y_3(D) << 2;
> >>     ret_5 = _1 + z_4(D);
> >>     _7 = y_3(D) == 0;
> >>     _8 = _7 ? 0 : ret_5;
> >>     # VUSE <.MEM_6(D)>
> >>     return _8;
> >> ;;    succ:       EXIT [always]  count:1073741824 (estimated locally,
> >> freq 1.0000) (EXECUTABLE) test.c:8:10
> >>
> >>
> >> I'm not sure why we're having this difference given that the patterns
> >> are quite similar.
> >
> > Because when you have a cond_expr in the output of a match pattern, we
> > don't recreate an if/else branches.
> > In the pattern without the cond_expr, the sink pass sinks the other
> > expressions. While here we have an explicit COND_EXPR rather than
> > control flow.
> > The big question is, can ifcvt on the rtl level recover the first way
> > into the second way; I think currently it does not but I have not
> > fully looked; so we might to need help it on the gimple level (for GCC
> > 17 though).
> >
> >>
> >> By the way my initial idea for upstreaming was to split the pattern in
> >> two. One like this:
> >>
> >> X << N EQ|NE 0 -> X EQ|NE 0
> >>
> >> And another one like the original I sent but returning the full lshift
> >> comparison instead of doing the reduction, i.e.:
> >>
> >> (a OP b) + c EQ|NE c -> (a OP b) EQ|NE 0
> >>
> >>
> >> But I got a lot of trouble with errors like:
> >>
> >> Analyzing compilation unit
> >> Performing interprocedural optimizations
> >>    <*free_lang_data> {heap 1028k} <visibility> {heap 1028k}
> >> <build_ssa_passes> {heap 1028k} <targetclone> {heap 1440k} <opt_lo
> >> cal_passes> {heap 1440k}during GIMPLE pass: ccp
> >> dump file: test.c.038t.ccp1
> >> test.c: In function ‘frob’:
> >> test.c:9:1: internal compiler error: in decompose, at wide-int.h:1049
> >>       9 | }
> >>         | ^
> >> 0x2ecc34f internal_error(char const*, ...)
> >>           ../../gcc/diagnostic-global-context.cc:787
> >> 0xf0790f fancy_abort(char const*, int, char const*)
> >>           ../../gcc/diagnostics/context.cc:1805
> >> 0xa5b276 wi::int_traits<generic_wide_int<wide_int_ref_storage<false,
> >> false> > >::decompose(long*, unsigned int, generic_wide
> >> _int<wide_int_ref_storage<false, false> > const&)
> >>           ../../gcc/wide-int.h:1049
> >> 0xa5d419 wi::int_traits<generic_wide_int<wide_int_storage>
> >>   >::decompose(long*, unsigned int, generic_wide_int<wide_int_stora
> >> ge> const&)
> >>           ../../gcc/tree.h:3906
> >> 0xa5d419 wide_int_ref_storage<true,
> >> false>::wide_int_ref_storage<generic_wide_int<wide_int_storage>
> >>   >(generic_wide_int<wide_
> >> int_storage> const&, unsigned int)
> >>           ../../gcc/wide-int.h:1099
> >> 0xa5d419 generic_wide_int<wide_int_ref_storage<true, false>
> >>   >::generic_wide_int<generic_wide_int<wide_int_storage> >(generic
> >> _wide_int<wide_int_storage> const&, unsigned int)
> >>           ../../gcc/wide-int.h:855
> >>
> >> My guess is that I was messing up the relation between the 'type'
> >> precision and the precision used in build_zero_cst(). I gave up and
> >> sent the full pattern, but I wonder if splitting it would be better.
> >
> > This usually does mean a type mismatch has happened or you are
> > comparing two `wi::to_wide` which come from different precision
> > INTEGER_CSTs.
> >
> > Thanks,
> > Andrew
> >
> >>
> >>
> >> Thanks,
> >>
> >> Daniel
> >>
> >>
> >>
> >>
> >>>>
> >>>> Jeff/Richard,
> >>>>     What are your thoughts on this? I know Richard had thoughts on some
> >>>> of the single_use in the match patterns before but I have not tracked
> >>>> them that well.
> >>>
> >>> I had no idea we had that match.pd pattern; clearly what Daniel is doing
> >>> is closely related.
> >>>
> >>> The single_use in those patterns comes up here:
> >>> https://gcc.gnu.org/pipermail/gcc-patches/2017-October/484606.html
> >>>
> >>> Looks like single use is in there because of interactions with another
> >>> pattern.
> >>>
> >>> jeff
> >>>
> >>>
> >>>
> >>>
> >>>
> >>
>
  
Jeffrey Law Jan. 9, 2026, 1:44 p.m. UTC | #8
On 1/9/2026 12:24 AM, Richard Biener wrote:
> Please don't any new fold_overflow_warning things, those are borderly
> useless (IMO all of them should be wiped).
>
> No comments on the rest, I didn't review the discussion.
Blame me for that.  I saw a note somewhere in our docs that made me 
believe it was a requirement for these kinds of cases.  It's certainly 
simpler if we don't have to do that!

jeff
  
Daniel Henrique Barboza Jan. 9, 2026, 2:45 p.m. UTC | #9
On 1/9/2026 10:44 AM, Jeffrey Law wrote:
> 
> 
> On 1/9/2026 12:24 AM, Richard Biener wrote:
>> Please don't any new fold_overflow_warning things, those are borderly
>> useless (IMO all of them should be wiped).
>>
>> No comments on the rest, I didn't review the discussion.
> Blame me for that.  I saw a note somewhere in our docs that made me 
> believe it was a requirement for these kinds of cases.  It's certainly 
> simpler if we don't have to do that!

The note is on the docs of the macro in gcc/tree.h:

/* True if overflow is undefined for the given integral or pointer type.
    We may optimize on the assumption that values in the type never 
overflow.

    IMPORTANT NOTE: Any optimization based on TYPE_OVERFLOW_UNDEFINED
    must issue a warning based on warn_strict_overflow.  In some cases
    it will be appropriate to issue the warning immediately, and in
    other cases it will be appropriate to simply set a flag and let the
    caller decide whether a warning is appropriate or not.  */
#define TYPE_OVERFLOW_UNDEFINED(TYPE)				\


Daniel


> 
> jeff
  
Richard Biener Jan. 10, 2026, 11:36 a.m. UTC | #10
On Fri, Jan 9, 2026 at 3:45 PM Daniel Henrique Barboza
<daniel.barboza@oss.qualcomm.com> wrote:
>
>
>
> On 1/9/2026 10:44 AM, Jeffrey Law wrote:
> >
> >
> > On 1/9/2026 12:24 AM, Richard Biener wrote:
> >> Please don't any new fold_overflow_warning things, those are borderly
> >> useless (IMO all of them should be wiped).
> >>
> >> No comments on the rest, I didn't review the discussion.
> > Blame me for that.  I saw a note somewhere in our docs that made me
> > believe it was a requirement for these kinds of cases.  It's certainly
> > simpler if we don't have to do that!
>
> The note is on the docs of the macro in gcc/tree.h:
>
> /* True if overflow is undefined for the given integral or pointer type.
>     We may optimize on the assumption that values in the type never
> overflow.
>
>     IMPORTANT NOTE: Any optimization based on TYPE_OVERFLOW_UNDEFINED
>     must issue a warning based on warn_strict_overflow.  In some cases
>     it will be appropriate to issue the warning immediately, and in
>     other cases it will be appropriate to simply set a flag and let the
>     caller decide whether a warning is appropriate or not.  */
> #define TYPE_OVERFLOW_UNDEFINED(TYPE)                           \

Ah, we should remove that note.  -Wstrict-overflow proved useless IMO, it's way
too noisy as it diagnoses when the compiler relies on overflow not happening,
not diagnosing when it possibly happens.  That's not a very useful diagnostic
to have - it does not point to a possible problem in the code (we could as well
diagnose _all_ signed arithmetic operations for the same argument that we
might eventually rely on overflow not happening).

The documentation about -Wstrict-overflow actually says this.

UBSAN is a _much_ more useful tool given it diagnoses signed overflow
that actually happens!

Richard.

>
> Daniel
>
>
> >
> > jeff
>
  

Patch

diff --git a/gcc/match.pd b/gcc/match.pd
index ccdc1129e23..fdc18b1ef41 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2824,6 +2824,23 @@  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	(le (minus (convert:etype @0) { lo; }) { hi; })
 	(gt (minus (convert:etype @0) { lo; }) { hi; })))))))))
 
+/* Reduces ((A OP B) + C EQ|NE C) ? X : Y to
+   A EQ|NE 0 ? x : y, for OP = (lshift, rshift).  */
+(for cmp (eq ne)
+ (for op (lshift rshift)
+  (simplify
+   (cond (cmp:c (plus:c (op @1 @2) @0) @0) @3 @4)
+    (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@1))
+	 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@1)))
+     (with
+      {
+       fold_overflow_warning ("assuming signed overflow does not occur "
+			      "when changing (X << imm) + C1 cmp C1 to "
+			      "X << imm cmp 0 and X << imm cmp 0 "
+			      "to X cmp 0",
+			      WARN_STRICT_OVERFLOW_COMPARISON);  }
+     (cond (cmp @1 { build_zero_cst (type); }) @3 @4))))))
+
 /* X + Z < Y + Z is the same as X < Y when there is no overflow.  */
 (for op (lt le ge gt)
  (simplify
diff --git a/gcc/testsuite/gcc.dg/torture/ifcond-plus-shift-czero.c b/gcc/testsuite/gcc.dg/torture/ifcond-plus-shift-czero.c
new file mode 100644
index 00000000000..769412202c3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/ifcond-plus-shift-czero.c
@@ -0,0 +1,25 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-tree-phiopt2" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects"  } { "" } } */
+
+long
+f1 (long x, long y, long z)
+{
+  long ret = (y << 2) + z;
+  long cond = ret != z;
+  if (cond == 0) 
+    ret = 0;
+  return ret;
+}
+
+long
+f2 (long x, long y, long z)
+{
+  long ret = (y >> 2) + z;
+  long cond = ret != z;
+  if (cond == 0) 
+    ret = 0;
+  return ret;
+}
+
+/* { dg-final { scan-tree-dump-times "== 0" 2 "phiopt2" } } */