ifcvt: Improve `cmp?a&b:a` to try with -1 [PR123312]
Commit Message
After the current improvements to ifcvt, on some targets for
cmp?a&b:a it is better to produce `(cmp?b:-1) & a` rather than
`(!cmp?a:0)|(a & b)`. So this extends noce_try_cond_zero_arith (with
a rename to noce_try_cond_arith) to see if `cmp ? a : -1` is cheaper than
`!cmp?a:0`.
Bootstrapped and tested on x86_64-linux-gnu.
gcc/ChangeLog:
* ifcvt.cc (noce_try_cond_zero_arith): Rename to ...
(noce_try_cond_arith): This. For AND try `cmp ? a : -1`
also to see which one cost less.
(noce_process_if_block): Handle the rename.
Signed-off-by: Andrew Pinski <andrew.pinski@oss.qualcomm.com>
---
gcc/ifcvt.cc | 70 +++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 58 insertions(+), 12 deletions(-)
Comments
On 1/10/2026 6:44 AM, Andrew Pinski wrote:
> After the current improvements to ifcvt, on some targets for
> cmp?a&b:a it is better to produce `(cmp?b:-1) & a` rather than
> `(!cmp?a:0)|(a & b)`. So this extends noce_try_cond_zero_arith (with
> a rename to noce_try_cond_arith) to see if `cmp ? a : -1` is cheaper than
> `!cmp?a:0`.
>
> Bootstrapped and tested on x86_64-linux-gnu.
>
> gcc/ChangeLog:
>
> * ifcvt.cc (noce_try_cond_zero_arith): Rename to ...
> (noce_try_cond_arith): This. For AND try `cmp ? a : -1`
> also to see which one cost less.
> (noce_process_if_block): Handle the rename.
>
> Signed-off-by: Andrew Pinski <andrew.pinski@oss.qualcomm.com>
So on a testing note, I threw this into my tester overnight, so it's
been bootstrapped and regression tested on riscv and tested on most of
our embedded platforms without any regressions. I was briefly concerned
about the store-merging failures, but those were from a different patch
that went onto the trunk and were expected to flip behavior.
Some of the sequence juggling is annoying, but likely unavoidable to a
large extent. But I'm happy with the overall approach of just costing
the two approaches and selecting one.
OK for the trunk.
Thanks again for picking this up -- I had expected to cover it myself.
Jeff
On Tue, Jan 13, 2026 at 7:29 AM Jeffrey Law
<jeffrey.law@oss.qualcomm.com> wrote:
>
>
>
> On 1/10/2026 6:44 AM, Andrew Pinski wrote:
> > After the current improvements to ifcvt, on some targets for
> > cmp?a&b:a it is better to produce `(cmp?b:-1) & a` rather than
> > `(!cmp?a:0)|(a & b)`. So this extends noce_try_cond_zero_arith (with
> > a rename to noce_try_cond_arith) to see if `cmp ? a : -1` is cheaper than
> > `!cmp?a:0`.
> >
> > Bootstrapped and tested on x86_64-linux-gnu.
> >
> > gcc/ChangeLog:
> >
> > * ifcvt.cc (noce_try_cond_zero_arith): Rename to ...
> > (noce_try_cond_arith): This. For AND try `cmp ? a : -1`
> > also to see which one cost less.
> > (noce_process_if_block): Handle the rename.
> >
> > Signed-off-by: Andrew Pinski <andrew.pinski@oss.qualcomm.com>
> So on a testing note, I threw this into my tester overnight, so it's
> been bootstrapped and regression tested on riscv and tested on most of
> our embedded platforms without any regressions. I was briefly concerned
> about the store-merging failures, but those were from a different patch
> that went onto the trunk and were expected to flip behavior.
>
> Some of the sequence juggling is annoying, but likely unavoidable to a
> large extent. But I'm happy with the overall approach of just costing
> the two approaches and selecting one.
>
> OK for the trunk.
>
> Thanks again for picking this up -- I had expected to cover it myself.
I picked it up as I needed a break from looking into solving a gimple issue.
Anyways pushed as r16-6747-ge0a8b636253b21aae0.
Thanks,
Andrew Pinski
>
> Jeff
@@ -790,7 +790,6 @@ static rtx noce_get_alt_condition (struct noce_if_info *, rtx, rtx_insn **);
static bool noce_try_minmax (struct noce_if_info *);
static bool noce_try_abs (struct noce_if_info *);
static bool noce_try_sign_mask (struct noce_if_info *);
-static int noce_try_cond_zero_arith (struct noce_if_info *);
/* Return the comparison code for reversed condition for IF_INFO,
or UNKNOWN if reversing the condition is not possible. */
@@ -3135,10 +3134,15 @@ get_base_reg (rtx exp)
tmp = !cond ? y : 0
x = (y & z) | tmp
+ Also for AND try:
+ tmp = cond ? z : -1
+ x = y op tmp
+ To see if it is cheaper to produce `!cond ? y : 0`
+ or `cond ? z : -1`.
*/
static int
-noce_try_cond_zero_arith (struct noce_if_info *if_info)
+noce_try_cond_arith (struct noce_if_info *if_info)
{
rtx target, a, b, a_op0, a_op1;
rtx cond = if_info->cond;
@@ -3230,27 +3234,69 @@ noce_try_cond_zero_arith (struct noce_if_info *if_info)
target = rtl_hooks.gen_lowpart_no_emit (GET_MODE (XEXP (a, op != AND)), target);
gcc_assert (target);
}
- if (!target)
- goto end_seq_n_fail;
+ /* For AND, try `cond ? z : -1` to see if that is cheaper or the same cost.
+ In some cases it will be cheaper to produce the -1 rather than the 0 case. */
if (op == AND)
{
+ rtx_insn *seq0 = end_sequence ();
+ unsigned cost0 = seq_cost (seq0, if_info->speed_p);
+ if (!target)
+ cost0 = -1u;
+
+ /* Produce `cond ? z : -1`. */
+ rtx targetm1;
+ start_sequence ();
+ targetm1 = gen_reg_rtx (GET_MODE (XEXP (a, 1)));
+ targetm1 = noce_emit_cmove (if_info, targetm1, code,
+ XEXP (cond, 0), XEXP (cond, 1),
+ XEXP (a, 1), constm1_rtx);
+ rtx_insn *seqm1 = end_sequence ();
+ unsigned costm1 = seq_cost (seqm1, if_info->speed_p);
+ if (!targetm1)
+ costm1 = -1u;
+ /* If both fails, then there is no costing to be done. */
+ if (!targetm1 && !target)
+ return false;
+
+ /* If -1 is cheaper or the same cost to producing 0, then use that. */
+ if (costm1 <= cost0)
+ {
+ push_to_sequence (seqm1);
+ targetm1 = expand_simple_binop (mode, op, a_op0, targetm1,
+ if_info->x, 0, OPTAB_WIDEN);
+ if (targetm1)
+ {
+ target = targetm1;
+ goto success;
+ }
+ end_sequence ();
+ }
+ if (!target)
+ return false;
+ /* For 0 the produce sequence is:
+ tmp = !cond ? y : 0
+ x = (y & z) | tmp */
+ push_to_sequence (seq0);
rtx a_bin = gen_reg_rtx (mode);
noce_emit_move_insn (a_bin, a);
target = expand_simple_binop (mode, IOR, a_bin, target, if_info->x, 0,
OPTAB_WIDEN);
-
- }
- else
- {
- target = expand_simple_binop (mode, op, a_op0, target, if_info->x, 0,
- OPTAB_WIDEN);
+ if (!target)
+ goto end_seq_n_fail;
+ goto success;
}
+ if (!target)
+ goto end_seq_n_fail;
+
+ target = expand_simple_binop (mode, op, a_op0, target, if_info->x, 0,
+ OPTAB_WIDEN);
if (!target)
goto end_seq_n_fail;
+success:
if (target != if_info->x)
noce_emit_move_insn (if_info->x, target);
@@ -3259,7 +3305,7 @@ noce_try_cond_zero_arith (struct noce_if_info *if_info)
goto fail;
emit_insn_before_setloc (seq, if_info->jump, INSN_LOCATION (if_info->insn_a));
- if_info->transform_name = "noce_try_cond_zero_arith";
+ if_info->transform_name = "noce_try_cond_arith";
return true;
end_seq_n_fail:
@@ -4431,7 +4477,7 @@ noce_process_if_block (struct noce_if_info *if_info)
if (noce_try_store_flag_mask (if_info))
goto success;
if (HAVE_conditional_move
- && noce_try_cond_zero_arith (if_info))
+ && noce_try_cond_arith (if_info))
goto success;
if (HAVE_conditional_move
&& noce_try_cmove_arith (if_info))