ifcombine: For short circuit case, allow 2 defining statements [PR85605]
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
fail
|
Test failed
|
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
Commit Message
r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs
into using either AND or OR. But it only allowed the inner condition
basic block having the conditional only. This changes to allow up to 2 defining
statements as long as they are just nop conversions for either the lhs or rhs
of the conditional.
This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more than before.
Boootstrapped and tested on x86_64-linux-gnu.
PR tree-optimization/85605
gcc/ChangeLog:
* tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New function.
(ifcombine_ifandif): Use can_combine_bbs_with_short_circuit instead of checking
if iterator is one before the last statement.
gcc/testsuite/ChangeLog:
* g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test.
* gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test.
Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
.../g++.dg/tree-ssa/ifcombine-ccmp-1.C | 27 +++++++++++++
.../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c | 18 +++++++++
.../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c | 19 +++++++++
gcc/tree-ssa-ifcombine.cc | 39 ++++++++++++++++++-
4 files changed, 101 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
Comments
On Tue, Oct 29, 2024 at 4:29 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
>
> r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs
> into using either AND or OR. But it only allowed the inner condition
> basic block having the conditional only. This changes to allow up to 2 defining
> statements as long as they are just nop conversions for either the lhs or rhs
> of the conditional.
>
> This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more than before.
>
> Boootstrapped and tested on x86_64-linux-gnu.
>
> PR tree-optimization/85605
>
> gcc/ChangeLog:
>
> * tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New function.
> (ifcombine_ifandif): Use can_combine_bbs_with_short_circuit instead of checking
> if iterator is one before the last statement.
>
> gcc/testsuite/ChangeLog:
>
> * g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test.
> * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test.
> * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test.
>
> Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> ---
> .../g++.dg/tree-ssa/ifcombine-ccmp-1.C | 27 +++++++++++++
> .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c | 18 +++++++++
> .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c | 19 +++++++++
> gcc/tree-ssa-ifcombine.cc | 39 ++++++++++++++++++-
> 4 files changed, 101 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
>
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> new file mode 100644
> index 00000000000..282cec8c628
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> +
> +/* PR tree-optimization/85605 */
> +#include <stdint.h>
> +
> +template<class T,class T2>
> +inline bool cmp(T a, T2 b) {
> + return a<0 ? true : T2(a) < b;
> +}
> +
> +template<class T,class T2>
> +inline bool cmp2(T a, T2 b) {
> + return (a<0) | (T2(a) < b);
> +}
> +
> +bool f(int a, int b) {
> + return cmp(int64_t(a), unsigned(b));
> +}
> +
> +bool f2(int a, int b) {
> + return cmp2(int64_t(a), unsigned(b));
> +}
> +
> +
> +/* Both of these functions should be optimized to the same, and have an | in them. */
> +/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> new file mode 100644
> index 00000000000..1bdbb9358b4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> +
> +/* PR tree-optimization/85605 */
> +/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to signed in the
> + inner bb which should be able to move too. */
> +
> +int t (int a, unsigned b)
> +{
> + if (a > 0)
> + {
> + signed t = b;
> + if (t > 0)
> + return 0;
> + }
> + return 1;
> +}
> +/* { dg-final { scan-tree-dump "\&" "optimized" } } */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> new file mode 100644
> index 00000000000..8d74b4932c5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> +
> +/* PR tree-optimization/85605 */
> +/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to signed in the
> + inner bb which should be able to move too. */
> +
> +int t (int a, unsigned b)
> +{
> + if (a > 0)
> + goto L1;
> + signed t = b;
> + if (t > 0)
> + goto L1;
> + return 0;
> +L1:
> + return 1;
> +}
> +/* { dg-final { scan-tree-dump "\|" "optimized" } } */
> diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
> index 39702929fc0..3acecda31cc 100644
> --- a/gcc/tree-ssa-ifcombine.cc
> +++ b/gcc/tree-ssa-ifcombine.cc
> @@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
> outer2->probability = profile_probability::never ();
> }
>
> +/* Returns true if inner_cond_bb contains just the condition or 1/2 statements
> + that define lhs or rhs with a nop conversion. */
> +
> +static bool
> +can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree lhs, tree rhs)
> +{
> + gimple_stmt_iterator gsi;
> + gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb);
> + /* If only the condition, this should be allowed. */
> + if (gsi_one_before_end_p (gsi))
> + return true;
> + /* Can have up to 2 statements defining each of lhs/rhs. */
> + for (int i = 0; i < 2; i++)
> + {
> + gimple *stmt = gsi_stmt (gsi);
> + if (!gimple_assign_cast_p (stmt))
> + return false;
> + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
> + TREE_TYPE (gimple_assign_rhs1 (stmt))))
I would say non-nop conversions should be fine as well - not sure why
gimple_assign_cast_p covers FIX_TRUNC_EXPR but for example not
FLOAT_EXPR - so possibly is_gimple_assign && CONVERT_EXPR_CODE_P (..)
would be a better check?
> + return false;
> + /* The defining statement needs to match either the lhs or rhs of
> + the condition. */
> + if (!operand_equal_p (lhs, gimple_assign_lhs (stmt))
> + && !operand_equal_p (rhs, gimple_assign_lhs (stmt)))
I think you can use == here.
OK with that change but note this might conflict with the series from Alexandre.
Richard.
> + return false;
> + gsi_next_nondebug (&gsi);
> + if (gsi_one_before_end_p (gsi))
> + return true;
> + }
> + return false;
> +}
> +
> /* If-convert on a and pattern with a common else block. The inner
> if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
> inner_inv, outer_inv and result_inv indicate whether the conditions
> @@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
> = param_logical_op_non_short_circuit;
> if (!logical_op_non_short_circuit || sanitize_coverage_p ())
> return false;
> - /* Only do this optimization if the inner bb contains only the conditional. */
> - if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb)))
> + /* Only do this optimization if the inner bb contains only the conditional
> + or there is one or 2 statements which are nop conversion for the comparison. */
> + if (!can_combine_bbs_with_short_circuit (inner_cond_bb,
> + gimple_cond_lhs (inner_cond),
> + gimple_cond_rhs (inner_cond)))
> return false;
> t1 = fold_build2_loc (gimple_location (inner_cond),
> inner_cond_code,
> --
> 2.43.0
>
On Tue, Oct 29, 2024 at 5:59 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Tue, Oct 29, 2024 at 4:29 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
> >
> > r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs
> > into using either AND or OR. But it only allowed the inner condition
> > basic block having the conditional only. This changes to allow up to 2 defining
> > statements as long as they are just nop conversions for either the lhs or rhs
> > of the conditional.
> >
> > This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more than before.
> >
> > Boootstrapped and tested on x86_64-linux-gnu.
> >
> > PR tree-optimization/85605
> >
> > gcc/ChangeLog:
> >
> > * tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New function.
> > (ifcombine_ifandif): Use can_combine_bbs_with_short_circuit instead of checking
> > if iterator is one before the last statement.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test.
> > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test.
> > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test.
> >
> > Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> > ---
> > .../g++.dg/tree-ssa/ifcombine-ccmp-1.C | 27 +++++++++++++
> > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c | 18 +++++++++
> > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c | 19 +++++++++
> > gcc/tree-ssa-ifcombine.cc | 39 ++++++++++++++++++-
> > 4 files changed, 101 insertions(+), 2 deletions(-)
> > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> >
> > diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > new file mode 100644
> > index 00000000000..282cec8c628
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > @@ -0,0 +1,27 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> > +
> > +/* PR tree-optimization/85605 */
> > +#include <stdint.h>
> > +
> > +template<class T,class T2>
> > +inline bool cmp(T a, T2 b) {
> > + return a<0 ? true : T2(a) < b;
> > +}
> > +
> > +template<class T,class T2>
> > +inline bool cmp2(T a, T2 b) {
> > + return (a<0) | (T2(a) < b);
> > +}
> > +
> > +bool f(int a, int b) {
> > + return cmp(int64_t(a), unsigned(b));
> > +}
> > +
> > +bool f2(int a, int b) {
> > + return cmp2(int64_t(a), unsigned(b));
> > +}
> > +
> > +
> > +/* Both of these functions should be optimized to the same, and have an | in them. */
> > +/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > new file mode 100644
> > index 00000000000..1bdbb9358b4
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > @@ -0,0 +1,18 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> > +
> > +/* PR tree-optimization/85605 */
> > +/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to signed in the
> > + inner bb which should be able to move too. */
> > +
> > +int t (int a, unsigned b)
> > +{
> > + if (a > 0)
> > + {
> > + signed t = b;
> > + if (t > 0)
> > + return 0;
> > + }
> > + return 1;
> > +}
> > +/* { dg-final { scan-tree-dump "\&" "optimized" } } */
> > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > new file mode 100644
> > index 00000000000..8d74b4932c5
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > @@ -0,0 +1,19 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> > +
> > +/* PR tree-optimization/85605 */
> > +/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to signed in the
> > + inner bb which should be able to move too. */
> > +
> > +int t (int a, unsigned b)
> > +{
> > + if (a > 0)
> > + goto L1;
> > + signed t = b;
> > + if (t > 0)
> > + goto L1;
> > + return 0;
> > +L1:
> > + return 1;
> > +}
> > +/* { dg-final { scan-tree-dump "\|" "optimized" } } */
> > diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
> > index 39702929fc0..3acecda31cc 100644
> > --- a/gcc/tree-ssa-ifcombine.cc
> > +++ b/gcc/tree-ssa-ifcombine.cc
> > @@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
> > outer2->probability = profile_probability::never ();
> > }
> >
> > +/* Returns true if inner_cond_bb contains just the condition or 1/2 statements
> > + that define lhs or rhs with a nop conversion. */
> > +
> > +static bool
> > +can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree lhs, tree rhs)
> > +{
> > + gimple_stmt_iterator gsi;
> > + gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb);
> > + /* If only the condition, this should be allowed. */
> > + if (gsi_one_before_end_p (gsi))
> > + return true;
> > + /* Can have up to 2 statements defining each of lhs/rhs. */
> > + for (int i = 0; i < 2; i++)
> > + {
> > + gimple *stmt = gsi_stmt (gsi);
> > + if (!gimple_assign_cast_p (stmt))
> > + return false;
> > + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
> > + TREE_TYPE (gimple_assign_rhs1 (stmt))))
>
> I would say non-nop conversions should be fine as well - not sure why
> gimple_assign_cast_p covers FIX_TRUNC_EXPR but for example not
> FLOAT_EXPR - so possibly is_gimple_assign && CONVERT_EXPR_CODE_P (..)
> would be a better check?
I will check on that in a little bit but I think so.
>
> > + return false;
> > + /* The defining statement needs to match either the lhs or rhs of
> > + the condition. */
> > + if (!operand_equal_p (lhs, gimple_assign_lhs (stmt))
> > + && !operand_equal_p (rhs, gimple_assign_lhs (stmt)))
>
> I think you can use == here.
Yes I was thinking using `!=` here was fine but I was not 100% sure at
the time I wrote it.
>
> OK with that change but note this might conflict with the series from Alexandre.
I looked and I think it won't conflict, Alexandre's changes did not
touch the logical_op_non_short_circuit case as far as I could see.
Note I am going to wait on applying this until I get the patch for PR
117346 approved (I should be posting it later today) as this patch
exposed a missed optimization with ccmp on aarch64.
Thanks,
Andrew
>
> Richard.
>
> > + return false;
> > + gsi_next_nondebug (&gsi);
> > + if (gsi_one_before_end_p (gsi))
> > + return true;
> > + }
> > + return false;
> > +}
> > +
> > /* If-convert on a and pattern with a common else block. The inner
> > if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
> > inner_inv, outer_inv and result_inv indicate whether the conditions
> > @@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
> > = param_logical_op_non_short_circuit;
> > if (!logical_op_non_short_circuit || sanitize_coverage_p ())
> > return false;
> > - /* Only do this optimization if the inner bb contains only the conditional. */
> > - if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb)))
> > + /* Only do this optimization if the inner bb contains only the conditional
> > + or there is one or 2 statements which are nop conversion for the comparison. */
> > + if (!can_combine_bbs_with_short_circuit (inner_cond_bb,
> > + gimple_cond_lhs (inner_cond),
> > + gimple_cond_rhs (inner_cond)))
> > return false;
> > t1 = fold_build2_loc (gimple_location (inner_cond),
> > inner_cond_code,
> > --
> > 2.43.0
> >
On Tue, Oct 29, 2024 at 10:10 AM Andrew Pinski <pinskia@gmail.com> wrote:
>
> On Tue, Oct 29, 2024 at 5:59 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Tue, Oct 29, 2024 at 4:29 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
> > >
> > > r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs
> > > into using either AND or OR. But it only allowed the inner condition
> > > basic block having the conditional only. This changes to allow up to 2 defining
> > > statements as long as they are just nop conversions for either the lhs or rhs
> > > of the conditional.
> > >
> > > This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more than before.
> > >
> > > Boootstrapped and tested on x86_64-linux-gnu.
> > >
> > > PR tree-optimization/85605
> > >
> > > gcc/ChangeLog:
> > >
> > > * tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New function.
> > > (ifcombine_ifandif): Use can_combine_bbs_with_short_circuit instead of checking
> > > if iterator is one before the last statement.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test.
> > > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test.
> > > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test.
> > >
> > > Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> > > ---
> > > .../g++.dg/tree-ssa/ifcombine-ccmp-1.C | 27 +++++++++++++
> > > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c | 18 +++++++++
> > > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c | 19 +++++++++
> > > gcc/tree-ssa-ifcombine.cc | 39 ++++++++++++++++++-
> > > 4 files changed, 101 insertions(+), 2 deletions(-)
> > > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > >
> > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > new file mode 100644
> > > index 00000000000..282cec8c628
> > > --- /dev/null
> > > +++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > @@ -0,0 +1,27 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> > > +
> > > +/* PR tree-optimization/85605 */
> > > +#include <stdint.h>
> > > +
> > > +template<class T,class T2>
> > > +inline bool cmp(T a, T2 b) {
> > > + return a<0 ? true : T2(a) < b;
> > > +}
> > > +
> > > +template<class T,class T2>
> > > +inline bool cmp2(T a, T2 b) {
> > > + return (a<0) | (T2(a) < b);
> > > +}
> > > +
> > > +bool f(int a, int b) {
> > > + return cmp(int64_t(a), unsigned(b));
> > > +}
> > > +
> > > +bool f2(int a, int b) {
> > > + return cmp2(int64_t(a), unsigned(b));
> > > +}
> > > +
> > > +
> > > +/* Both of these functions should be optimized to the same, and have an | in them. */
> > > +/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > new file mode 100644
> > > index 00000000000..1bdbb9358b4
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > @@ -0,0 +1,18 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> > > +
> > > +/* PR tree-optimization/85605 */
> > > +/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to signed in the
> > > + inner bb which should be able to move too. */
> > > +
> > > +int t (int a, unsigned b)
> > > +{
> > > + if (a > 0)
> > > + {
> > > + signed t = b;
> > > + if (t > 0)
> > > + return 0;
> > > + }
> > > + return 1;
> > > +}
> > > +/* { dg-final { scan-tree-dump "\&" "optimized" } } */
> > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > new file mode 100644
> > > index 00000000000..8d74b4932c5
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > @@ -0,0 +1,19 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> > > +
> > > +/* PR tree-optimization/85605 */
> > > +/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to signed in the
> > > + inner bb which should be able to move too. */
> > > +
> > > +int t (int a, unsigned b)
> > > +{
> > > + if (a > 0)
> > > + goto L1;
> > > + signed t = b;
> > > + if (t > 0)
> > > + goto L1;
> > > + return 0;
> > > +L1:
> > > + return 1;
> > > +}
> > > +/* { dg-final { scan-tree-dump "\|" "optimized" } } */
> > > diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
> > > index 39702929fc0..3acecda31cc 100644
> > > --- a/gcc/tree-ssa-ifcombine.cc
> > > +++ b/gcc/tree-ssa-ifcombine.cc
> > > @@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
> > > outer2->probability = profile_probability::never ();
> > > }
> > >
> > > +/* Returns true if inner_cond_bb contains just the condition or 1/2 statements
> > > + that define lhs or rhs with a nop conversion. */
> > > +
> > > +static bool
> > > +can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree lhs, tree rhs)
> > > +{
> > > + gimple_stmt_iterator gsi;
> > > + gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb);
> > > + /* If only the condition, this should be allowed. */
> > > + if (gsi_one_before_end_p (gsi))
> > > + return true;
> > > + /* Can have up to 2 statements defining each of lhs/rhs. */
> > > + for (int i = 0; i < 2; i++)
> > > + {
> > > + gimple *stmt = gsi_stmt (gsi);
> > > + if (!gimple_assign_cast_p (stmt))
> > > + return false;
> > > + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
> > > + TREE_TYPE (gimple_assign_rhs1 (stmt))))
> >
> > I would say non-nop conversions should be fine as well - not sure why
> > gimple_assign_cast_p covers FIX_TRUNC_EXPR but for example not
> > FLOAT_EXPR - so possibly is_gimple_assign && CONVERT_EXPR_CODE_P (..)
> > would be a better check?
>
> I will check on that in a little bit but I think so.
Just a quick update on this. Without checking for a nop_conversion,
gcc.dg/tree-ssa/pr111456-1.c fails.
I am looking into a way of fixing that still. I think there is a
missed optimization still, plus the way ifcombine uses fold_build2_loc
to build the full tree and then does force_gimple_operand_gsi_1 also
seems like it is not helping (though I could be wrong).
Thanks,
Andrew
>
> >
> > > + return false;
> > > + /* The defining statement needs to match either the lhs or rhs of
> > > + the condition. */
> > > + if (!operand_equal_p (lhs, gimple_assign_lhs (stmt))
> > > + && !operand_equal_p (rhs, gimple_assign_lhs (stmt)))
> >
> > I think you can use == here.
>
> Yes I was thinking using `!=` here was fine but I was not 100% sure at
> the time I wrote it.
>
> >
> > OK with that change but note this might conflict with the series from Alexandre.
>
> I looked and I think it won't conflict, Alexandre's changes did not
> touch the logical_op_non_short_circuit case as far as I could see.
>
> Note I am going to wait on applying this until I get the patch for PR
> 117346 approved (I should be posting it later today) as this patch
> exposed a missed optimization with ccmp on aarch64.
>
> Thanks,
> Andrew
>
> >
> > Richard.
> >
> > > + return false;
> > > + gsi_next_nondebug (&gsi);
> > > + if (gsi_one_before_end_p (gsi))
> > > + return true;
> > > + }
> > > + return false;
> > > +}
> > > +
> > > /* If-convert on a and pattern with a common else block. The inner
> > > if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
> > > inner_inv, outer_inv and result_inv indicate whether the conditions
> > > @@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
> > > = param_logical_op_non_short_circuit;
> > > if (!logical_op_non_short_circuit || sanitize_coverage_p ())
> > > return false;
> > > - /* Only do this optimization if the inner bb contains only the conditional. */
> > > - if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb)))
> > > + /* Only do this optimization if the inner bb contains only the conditional
> > > + or there is one or 2 statements which are nop conversion for the comparison. */
> > > + if (!can_combine_bbs_with_short_circuit (inner_cond_bb,
> > > + gimple_cond_lhs (inner_cond),
> > > + gimple_cond_rhs (inner_cond)))
> > > return false;
> > > t1 = fold_build2_loc (gimple_location (inner_cond),
> > > inner_cond_code,
> > > --
> > > 2.43.0
> > >
On Fri, Nov 1, 2024 at 4:06 PM Andrew Pinski <pinskia@gmail.com> wrote:
>
> On Tue, Oct 29, 2024 at 10:10 AM Andrew Pinski <pinskia@gmail.com> wrote:
> >
> > On Tue, Oct 29, 2024 at 5:59 AM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Tue, Oct 29, 2024 at 4:29 AM Andrew Pinski <quic_apinski@quicinc.com> wrote:
> > > >
> > > > r0-126134-g5d2a9da9a7f7c1 added support for circuiting and combing the ifs
> > > > into using either AND or OR. But it only allowed the inner condition
> > > > basic block having the conditional only. This changes to allow up to 2 defining
> > > > statements as long as they are just nop conversions for either the lhs or rhs
> > > > of the conditional.
> > > >
> > > > This should allow to use ccmp on aarch64 and x86_64 (APX) slightly more than before.
> > > >
> > > > Boootstrapped and tested on x86_64-linux-gnu.
> > > >
> > > > PR tree-optimization/85605
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > > * tree-ssa-ifcombine.cc (can_combine_bbs_with_short_circuit): New function.
> > > > (ifcombine_ifandif): Use can_combine_bbs_with_short_circuit instead of checking
> > > > if iterator is one before the last statement.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > > * g++.dg/tree-ssa/ifcombine-ccmp-1.C: New test.
> > > > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c: New test.
> > > > * gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c: New test.
> > > >
> > > > Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
> > > > ---
> > > > .../g++.dg/tree-ssa/ifcombine-ccmp-1.C | 27 +++++++++++++
> > > > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c | 18 +++++++++
> > > > .../gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c | 19 +++++++++
> > > > gcc/tree-ssa-ifcombine.cc | 39 ++++++++++++++++++-
> > > > 4 files changed, 101 insertions(+), 2 deletions(-)
> > > > create mode 100644 gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > >
> > > > diff --git a/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > > new file mode 100644
> > > > index 00000000000..282cec8c628
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/g++.dg/tree-ssa/ifcombine-ccmp-1.C
> > > > @@ -0,0 +1,27 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> > > > +
> > > > +/* PR tree-optimization/85605 */
> > > > +#include <stdint.h>
> > > > +
> > > > +template<class T,class T2>
> > > > +inline bool cmp(T a, T2 b) {
> > > > + return a<0 ? true : T2(a) < b;
> > > > +}
> > > > +
> > > > +template<class T,class T2>
> > > > +inline bool cmp2(T a, T2 b) {
> > > > + return (a<0) | (T2(a) < b);
> > > > +}
> > > > +
> > > > +bool f(int a, int b) {
> > > > + return cmp(int64_t(a), unsigned(b));
> > > > +}
> > > > +
> > > > +bool f2(int a, int b) {
> > > > + return cmp2(int64_t(a), unsigned(b));
> > > > +}
> > > > +
> > > > +
> > > > +/* Both of these functions should be optimized to the same, and have an | in them. */
> > > > +/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */
> > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > > new file mode 100644
> > > > index 00000000000..1bdbb9358b4
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-7.c
> > > > @@ -0,0 +1,18 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> > > > +
> > > > +/* PR tree-optimization/85605 */
> > > > +/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to signed in the
> > > > + inner bb which should be able to move too. */
> > > > +
> > > > +int t (int a, unsigned b)
> > > > +{
> > > > + if (a > 0)
> > > > + {
> > > > + signed t = b;
> > > > + if (t > 0)
> > > > + return 0;
> > > > + }
> > > > + return 1;
> > > > +}
> > > > +/* { dg-final { scan-tree-dump "\&" "optimized" } } */
> > > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > > new file mode 100644
> > > > index 00000000000..8d74b4932c5
> > > > --- /dev/null
> > > > +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ifcombine-ccmp-8.c
> > > > @@ -0,0 +1,19 @@
> > > > +/* { dg-do compile } */
> > > > +/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
> > > > +
> > > > +/* PR tree-optimization/85605 */
> > > > +/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to signed in the
> > > > + inner bb which should be able to move too. */
> > > > +
> > > > +int t (int a, unsigned b)
> > > > +{
> > > > + if (a > 0)
> > > > + goto L1;
> > > > + signed t = b;
> > > > + if (t > 0)
> > > > + goto L1;
> > > > + return 0;
> > > > +L1:
> > > > + return 1;
> > > > +}
> > > > +/* { dg-final { scan-tree-dump "\|" "optimized" } } */
> > > > diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
> > > > index 39702929fc0..3acecda31cc 100644
> > > > --- a/gcc/tree-ssa-ifcombine.cc
> > > > +++ b/gcc/tree-ssa-ifcombine.cc
> > > > @@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
> > > > outer2->probability = profile_probability::never ();
> > > > }
> > > >
> > > > +/* Returns true if inner_cond_bb contains just the condition or 1/2 statements
> > > > + that define lhs or rhs with a nop conversion. */
> > > > +
> > > > +static bool
> > > > +can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree lhs, tree rhs)
> > > > +{
> > > > + gimple_stmt_iterator gsi;
> > > > + gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb);
> > > > + /* If only the condition, this should be allowed. */
> > > > + if (gsi_one_before_end_p (gsi))
> > > > + return true;
> > > > + /* Can have up to 2 statements defining each of lhs/rhs. */
> > > > + for (int i = 0; i < 2; i++)
> > > > + {
> > > > + gimple *stmt = gsi_stmt (gsi);
> > > > + if (!gimple_assign_cast_p (stmt))
> > > > + return false;
> > > > + if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
> > > > + TREE_TYPE (gimple_assign_rhs1 (stmt))))
> > >
> > > I would say non-nop conversions should be fine as well - not sure why
> > > gimple_assign_cast_p covers FIX_TRUNC_EXPR but for example not
> > > FLOAT_EXPR - so possibly is_gimple_assign && CONVERT_EXPR_CODE_P (..)
> > > would be a better check?
> >
> > I will check on that in a little bit but I think so.
>
> Just a quick update on this. Without checking for a nop_conversion,
> gcc.dg/tree-ssa/pr111456-1.c fails.
> I am looking into a way of fixing that still. I think there is a
> missed optimization still, plus the way ifcombine uses fold_build2_loc
> to build the full tree and then does force_gimple_operand_gsi_1 also
> seems like it is not helping (though I could be wrong).
This is what I committed in the end now all of the known regressions
are fixed. It just checks CONVERT_EXPR_CODE_P rather than a
nop_conversion.
Also uses `!=` rather than `!operand_equal_p`.
Thanks,
Andrew Pinski
>
> Thanks,
> Andrew
>
> >
> > >
> > > > + return false;
> > > > + /* The defining statement needs to match either the lhs or rhs of
> > > > + the condition. */
> > > > + if (!operand_equal_p (lhs, gimple_assign_lhs (stmt))
> > > > + && !operand_equal_p (rhs, gimple_assign_lhs (stmt)))
> > >
> > > I think you can use == here.
> >
> > Yes I was thinking using `!=` here was fine but I was not 100% sure at
> > the time I wrote it.
> >
> > >
> > > OK with that change but note this might conflict with the series from Alexandre.
> >
> > I looked and I think it won't conflict, Alexandre's changes did not
> > touch the logical_op_non_short_circuit case as far as I could see.
> >
> > Note I am going to wait on applying this until I get the patch for PR
> > 117346 approved (I should be posting it later today) as this patch
> > exposed a missed optimization with ccmp on aarch64.
> >
> > Thanks,
> > Andrew
> >
> > >
> > > Richard.
> > >
> > > > + return false;
> > > > + gsi_next_nondebug (&gsi);
> > > > + if (gsi_one_before_end_p (gsi))
> > > > + return true;
> > > > + }
> > > > + return false;
> > > > +}
> > > > +
> > > > /* If-convert on a and pattern with a common else block. The inner
> > > > if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
> > > > inner_inv, outer_inv and result_inv indicate whether the conditions
> > > > @@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
> > > > = param_logical_op_non_short_circuit;
> > > > if (!logical_op_non_short_circuit || sanitize_coverage_p ())
> > > > return false;
> > > > - /* Only do this optimization if the inner bb contains only the conditional. */
> > > > - if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb)))
> > > > + /* Only do this optimization if the inner bb contains only the conditional
> > > > + or there is one or 2 statements which are nop conversion for the comparison. */
> > > > + if (!can_combine_bbs_with_short_circuit (inner_cond_bb,
> > > > + gimple_cond_lhs (inner_cond),
> > > > + gimple_cond_rhs (inner_cond)))
> > > > return false;
> > > > t1 = fold_build2_loc (gimple_location (inner_cond),
> > > > inner_cond_code,
> > > > --
> > > > 2.43.0
> > > >
new file mode 100644
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
+
+/* PR tree-optimization/85605 */
+#include <stdint.h>
+
+template<class T,class T2>
+inline bool cmp(T a, T2 b) {
+ return a<0 ? true : T2(a) < b;
+}
+
+template<class T,class T2>
+inline bool cmp2(T a, T2 b) {
+ return (a<0) | (T2(a) < b);
+}
+
+bool f(int a, int b) {
+ return cmp(int64_t(a), unsigned(b));
+}
+
+bool f2(int a, int b) {
+ return cmp2(int64_t(a), unsigned(b));
+}
+
+
+/* Both of these functions should be optimized to the same, and have an | in them. */
+/* { dg-final { scan-tree-dump-times " \\\| " 2 "optimized" } } */
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
+
+/* PR tree-optimization/85605 */
+/* Like ssa-ifcombine-ccmp-1.c but with conversion from unsigned to signed in the
+ inner bb which should be able to move too. */
+
+int t (int a, unsigned b)
+{
+ if (a > 0)
+ {
+ signed t = b;
+ if (t > 0)
+ return 0;
+ }
+ return 1;
+}
+/* { dg-final { scan-tree-dump "\&" "optimized" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g -fdump-tree-optimized --param logical-op-non-short-circuit=1" } */
+
+/* PR tree-optimization/85605 */
+/* Like ssa-ifcombine-ccmp-2.c but with conversion from unsigned to signed in the
+ inner bb which should be able to move too. */
+
+int t (int a, unsigned b)
+{
+ if (a > 0)
+ goto L1;
+ signed t = b;
+ if (t > 0)
+ goto L1;
+ return 0;
+L1:
+ return 1;
+}
+/* { dg-final { scan-tree-dump "\|" "optimized" } } */
@@ -400,6 +400,38 @@ update_profile_after_ifcombine (basic_block inner_cond_bb,
outer2->probability = profile_probability::never ();
}
+/* Returns true if inner_cond_bb contains just the condition or 1/2 statements
+ that define lhs or rhs with a nop conversion. */
+
+static bool
+can_combine_bbs_with_short_circuit (basic_block inner_cond_bb, tree lhs, tree rhs)
+{
+ gimple_stmt_iterator gsi;
+ gsi = gsi_start_nondebug_after_labels_bb (inner_cond_bb);
+ /* If only the condition, this should be allowed. */
+ if (gsi_one_before_end_p (gsi))
+ return true;
+ /* Can have up to 2 statements defining each of lhs/rhs. */
+ for (int i = 0; i < 2; i++)
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (!gimple_assign_cast_p (stmt))
+ return false;
+ if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (stmt)),
+ TREE_TYPE (gimple_assign_rhs1 (stmt))))
+ return false;
+ /* The defining statement needs to match either the lhs or rhs of
+ the condition. */
+ if (!operand_equal_p (lhs, gimple_assign_lhs (stmt))
+ && !operand_equal_p (rhs, gimple_assign_lhs (stmt)))
+ return false;
+ gsi_next_nondebug (&gsi);
+ if (gsi_one_before_end_p (gsi))
+ return true;
+ }
+ return false;
+}
+
/* If-convert on a and pattern with a common else block. The inner
if is specified by its INNER_COND_BB, the outer by OUTER_COND_BB.
inner_inv, outer_inv and result_inv indicate whether the conditions
@@ -610,8 +642,11 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
= param_logical_op_non_short_circuit;
if (!logical_op_non_short_circuit || sanitize_coverage_p ())
return false;
- /* Only do this optimization if the inner bb contains only the conditional. */
- if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb)))
+ /* Only do this optimization if the inner bb contains only the conditional
+ or there is one or 2 statements which are nop conversion for the comparison. */
+ if (!can_combine_bbs_with_short_circuit (inner_cond_bb,
+ gimple_cond_lhs (inner_cond),
+ gimple_cond_rhs (inner_cond)))
return false;
t1 = fold_build2_loc (gimple_location (inner_cond),
inner_cond_code,