diff mbox series

[v2,1/4] Fix loop split incorrect count and probability

Message ID	20211027063448.1844771-2-luoxhu@linux.ibm.com
State	New
Headers	DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org A84FF3858429 To: gcc-patches@gcc.gnu.org Subject: [PATCH v2 1/4] Fix loop split incorrect count and probability Date: Wed, 27 Oct 2021 01:34:45 -0500 Message-Id: <20211027063448.1844771-2-luoxhu@linux.ibm.com> In-Reply-To: <20211027063448.1844771-1-luoxhu@linux.ibm.com> References: <20211027063448.1844771-1-luoxhu@linux.ibm.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: list From: Xionghu Luo via Gcc-patches <gcc-patches@gcc.gnu.org> Reply-To: Xionghu Luo <luoxhu@linux.ibm.com> Cc: rguenther@suse.de, segher@kernel.crashing.org, Xionghu Luo <luoxhu@linux.ibm.com>, hubicka@kam.mff.cuni.cz, wschmidt@linux.ibm.com, linkw@gcc.gnu.org, dje.gcc@gmail.com Errors-To: gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org Sender: "Gcc-patches" <gcc-patches-bounces+patchwork=sourceware.org@gcc.gnu.org>
Series	loop split fix and functions renaming \| [v2,0/4] loop split fix and functions renaming [v2,1/4] Fix loop split incorrect count and probability [v2,2/4] Refactor loop_version [v2,3/4] Rename loop_version to clone_loop_to_header_edge. [v2,4/4] Rename duplicate_loop_to_header_edge to duplicate_loop_body_to_header_edge

Commit Message

Xionghu Luo Oct. 27, 2021, 6:34 a.m. UTC

  loop split condition is moved between loop1 and loop2, the split bb's
count and probability should also be duplicated instead of (100% vs INV),
secondly, the original loop1 and loop2 count need be propotional from the
original loop.

Regression tested pass, OK for master?

Comments

Jan Hubicka Oct. 27, 2021, 7:07 a.m. UTC | #1

> 
> gcc/ChangeLog:
> 
> 	* tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
> 	(do_split_loop_on_cond): Likewise.
> ---
>  gcc/tree-ssa-loop-split.c | 25 ++++++++++++++++---------
>  1 file changed, 16 insertions(+), 9 deletions(-)
> 
> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> index 3f6ad046623..d30782888f3 100644
> --- a/gcc/tree-ssa-loop-split.c
> +++ b/gcc/tree-ssa-loop-split.c
> @@ -575,7 +575,11 @@ split_loop (class loop *loop1)
>  					    stmts2);
>  	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
>  	if (!initial_true)
> -	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
> +	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
> +
> +	edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
> +			   ? EDGE_SUCC (bbs[i], 0)
> +			   : EDGE_SUCC (bbs[i], 1);
>  
>  	/* Now version the loop, placing loop2 after loop1 connecting
>  	   them, and fix up SSA form for that.  */
> @@ -583,10 +587,10 @@ split_loop (class loop *loop1)
>  	basic_block cond_bb;
>  
>  	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
> -					   profile_probability::always (),
> -					   profile_probability::always (),
> -					   profile_probability::always (),
> -					   profile_probability::always (),
> +					   true_edge->probability,
> +					   true_edge->probability.invert (),
> +					   true_edge->probability,
> +					   true_edge->probability.invert (),
>  					   true);

As discussed yesterday, for loop of form

for (...)
  if (cond)
    cond = something();
  else
    something2

Split as

loop1:
for (...)
  if (true)
    cond = something();
    if (!cond)
      break
  else
    something2 ();

loop2:
for (...)
  if (false)
    cond = something();
  else
    something2 ();

If "if (cond)" has probability p, you want to scale loop1 by p
and loop2 by 1-p as your patch does, but you need to exclude the basic
blocks guarded by the condition.

One way is to break out loop_version and implement it inline, other
option (perhaps leading to less code duplication) is to add argument listing
basic blocks that should not be scaled, which would be set to both arms
of the if.

Are there other profile patches of your I should look at?
Honza
>  	gcc_assert (loop2);
>  
> @@ -1486,10 +1490,10 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
>    initialize_original_copy_tables ();
>  
>    struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> -				     profile_probability::always (),
> -				     profile_probability::never (),
> -				     profile_probability::always (),
> -				     profile_probability::always (),
> +				     invar_branch->probability.invert (),
> +				     invar_branch->probability,
> +				     invar_branch->probability.invert (),
> +				     invar_branch->probability,
>  				     true);
>    if (!loop2)
>      {
> @@ -1530,6 +1534,9 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
>    to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
>    to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
>  
> +  to_loop1->probability = invar_branch->probability.invert ();
> +  to_loop2->probability = invar_branch->probability;
> +
>    /* Due to introduction of a control flow edge from loop1 latch to loop2
>       pre-header, we should update PHIs in loop2 to reflect this connection
>       between loop1 and loop2.  */
> -- 
> 2.27.0.90.geebb51ba8c
>

Jan Hubicka Oct. 27, 2021, 7:23 a.m. UTC | #2

> As discussed yesterday, for loop of form
> 
> for (...)
>   if (cond)
>     cond = something();
>   else
>     something2
> 
> Split as
> 

Say "if (cond)" has probability p, then individual statements scale as
follows:

  loop1:
p    for (...)
p      if (true)
1        cond = something();
1        if (!cond)
x          break
0      else
0        something2 ();
     
     loop2:
1-p  for (...)
1-p    if (false)
0        cond = something();
1      else
1        something2 ();

Block with x has same count as preheader of the loop.

Your patch does
  loop1:
p    for (...)
p      if (true)
p        cond = something();
p        if (!cond)
x          break
p      else
p        something2 ();
     
     loop2:
1-p  for (...)
1-p    if (false)
1-p      cond = something();
1-p    else
1-p      something2 ();

One does not need set 0 correctly (blocks will be removed), but it would
be nice to avoid dropping 1s down. Looking at this, things will go well
with your change if we are guaranteed that something() and something2()
is always 1 bb becuase block merging happening after turning condiitonal
to constant will remove the misupdated count. Your profile scales after merging
is:
  loop1:
p    for (...)
1        cond = something();
1        if (!cond)
x          break
     
     loop2:
1-p  for (...)
1       something2 ();

assuming that profile was sane and frequency of something() is
p*frequency of the conditional and similarly for something2().
This is why you see final profile correct.  So if we split only loops
with one BB then/else arms, the patch is OK with comment explaining
this.

Also I wonder, do we correctly duplicate&update known bounds on
iteration counts attached to the loop struccture?

Honza
> 
> If "if (cond)" has probability p, you want to scale loop1 by p
> and loop2 by 1-p as your patch does, but you need to exclude the basic
> blocks guarded by the condition.
> 
> One way is to break out loop_version and implement it inline, other
> option (perhaps leading to less code duplication) is to add argument listing
> basic blocks that should not be scaled, which would be set to both arms
> of the if.
> 
> Are there other profile patches of your I should look at?
> Honza
> >  	gcc_assert (loop2);
> >  
> > @@ -1486,10 +1490,10 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> >    initialize_original_copy_tables ();
> >  
> >    struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> > -				     profile_probability::always (),
> > -				     profile_probability::never (),
> > -				     profile_probability::always (),
> > -				     profile_probability::always (),
> > +				     invar_branch->probability.invert (),
> > +				     invar_branch->probability,
> > +				     invar_branch->probability.invert (),
> > +				     invar_branch->probability,
> >  				     true);
> >    if (!loop2)
> >      {
> > @@ -1530,6 +1534,9 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> >    to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
> >    to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
> >  
> > +  to_loop1->probability = invar_branch->probability.invert ();
> > +  to_loop2->probability = invar_branch->probability;
> > +
> >    /* Due to introduction of a control flow edge from loop1 latch to loop2
> >       pre-header, we should update PHIs in loop2 to reflect this connection
> >       between loop1 and loop2.  */
> > -- 
> > 2.27.0.90.geebb51ba8c
> >

Richard Biener Oct. 27, 2021, 7:29 a.m. UTC | #3

On Wed, 27 Oct 2021, Jan Hubicka wrote:

> > 
> > gcc/ChangeLog:
> > 
> > 	* tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
> > 	(do_split_loop_on_cond): Likewise.
> > ---
> >  gcc/tree-ssa-loop-split.c | 25 ++++++++++++++++---------
> >  1 file changed, 16 insertions(+), 9 deletions(-)
> > 
> > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> > index 3f6ad046623..d30782888f3 100644
> > --- a/gcc/tree-ssa-loop-split.c
> > +++ b/gcc/tree-ssa-loop-split.c
> > @@ -575,7 +575,11 @@ split_loop (class loop *loop1)
> >  					    stmts2);
> >  	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
> >  	if (!initial_true)
> > -	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
> > +	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
> > +
> > +	edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
> > +			   ? EDGE_SUCC (bbs[i], 0)
> > +			   : EDGE_SUCC (bbs[i], 1);
> >  
> >  	/* Now version the loop, placing loop2 after loop1 connecting
> >  	   them, and fix up SSA form for that.  */
> > @@ -583,10 +587,10 @@ split_loop (class loop *loop1)
> >  	basic_block cond_bb;
> >  
> >  	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
> > -					   profile_probability::always (),
> > -					   profile_probability::always (),
> > -					   profile_probability::always (),
> > -					   profile_probability::always (),
> > +					   true_edge->probability,
> > +					   true_edge->probability.invert (),
> > +					   true_edge->probability,
> > +					   true_edge->probability.invert (),
> >  					   true);
> 
> As discussed yesterday, for loop of form
> 
> for (...)
>   if (cond)
>     cond = something();
>   else
>     something2
> 
> Split as

Note that you are missing to conditionalize loop1 execution
on 'cond' (not sure if that makes a difference).

> loop1:
if (cond)
> for (...)
>   if (true)
>     cond = something();
>     if (!cond)
>       break
>   else
>     something2 ();
> 
> loop2:
> for (...)
>   if (false)
>     cond = something();
>   else
>     something2 ();
> 
> If "if (cond)" has probability p, you want to scale loop1 by p
> and loop2 by 1-p as your patch does, but you need to exclude the basic
> blocks guarded by the condition.
> 
> One way is to break out loop_version and implement it inline, other
> option (perhaps leading to less code duplication) is to add argument listing
> basic blocks that should not be scaled, which would be set to both arms
> of the if.
> 
> Are there other profile patches of your I should look at?
> Honza
> >  	gcc_assert (loop2);
> >  
> > @@ -1486,10 +1490,10 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> >    initialize_original_copy_tables ();
> >  
> >    struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> > -				     profile_probability::always (),
> > -				     profile_probability::never (),
> > -				     profile_probability::always (),
> > -				     profile_probability::always (),
> > +				     invar_branch->probability.invert (),
> > +				     invar_branch->probability,
> > +				     invar_branch->probability.invert (),
> > +				     invar_branch->probability,
> >  				     true);
> >    if (!loop2)
> >      {
> > @@ -1530,6 +1534,9 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> >    to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
> >    to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
> >  
> > +  to_loop1->probability = invar_branch->probability.invert ();
> > +  to_loop2->probability = invar_branch->probability;
> > +
> >    /* Due to introduction of a control flow edge from loop1 latch to loop2
> >       pre-header, we should update PHIs in loop2 to reflect this connection
> >       between loop1 and loop2.  */
> > -- 
> > 2.27.0.90.geebb51ba8c
> > 
>

Jan Hubicka Oct. 27, 2021, 7:44 a.m. UTC | #4

> On Wed, 27 Oct 2021, Jan Hubicka wrote:
> 
> > > 
> > > gcc/ChangeLog:
> > > 
> > > 	* tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
> > > 	(do_split_loop_on_cond): Likewise.
> > > ---
> > >  gcc/tree-ssa-loop-split.c | 25 ++++++++++++++++---------
> > >  1 file changed, 16 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> > > index 3f6ad046623..d30782888f3 100644
> > > --- a/gcc/tree-ssa-loop-split.c
> > > +++ b/gcc/tree-ssa-loop-split.c
> > > @@ -575,7 +575,11 @@ split_loop (class loop *loop1)
> > >  					    stmts2);
> > >  	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
> > >  	if (!initial_true)
> > > -	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
> > > +	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
> > > +
> > > +	edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
> > > +			   ? EDGE_SUCC (bbs[i], 0)
> > > +			   : EDGE_SUCC (bbs[i], 1);
> > >  
> > >  	/* Now version the loop, placing loop2 after loop1 connecting
> > >  	   them, and fix up SSA form for that.  */
> > > @@ -583,10 +587,10 @@ split_loop (class loop *loop1)
> > >  	basic_block cond_bb;
> > >  
> > >  	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
> > > -					   profile_probability::always (),
> > > -					   profile_probability::always (),
> > > -					   profile_probability::always (),
> > > -					   profile_probability::always (),
> > > +					   true_edge->probability,
> > > +					   true_edge->probability.invert (),
> > > +					   true_edge->probability,
> > > +					   true_edge->probability.invert (),
> > >  					   true);
> > 
> > As discussed yesterday, for loop of form
> > 
> > for (...)
> >   if (cond)
> >     cond = something();
> >   else
> >     something2
> > 
> > Split as
> 
> Note that you are missing to conditionalize loop1 execution
> on 'cond' (not sure if that makes a difference).
You are right - forgot to mention that.

Entry conditional makes no difference on scaling stmts inside loop but
affects its header and expected trip count. We however need to set up
probability of this conditional (and preheader count if it exists)
There is no general way to read the probability of this initial
conditional from cfg profile.  So I guess we are stuck with guessing
some arbitrary value. I guess common case is that cond is true first
iteration tough and often we can easily see that fromo PHI node
initializing the test variable.

Other thing that changes is expected number of iterations of the split
loops, so we may want to update the exit conditinal probability
accordingly...

Honza
> 
> > loop1:
> if (cond)
> > for (...)
> >   if (true)
> >     cond = something();
> >     if (!cond)
> >       break
> >   else
> >     something2 ();
> > 
> > loop2:
> > for (...)
> >   if (false)
> >     cond = something();
> >   else
> >     something2 ();
> > 
> > If "if (cond)" has probability p, you want to scale loop1 by p
> > and loop2 by 1-p as your patch does, but you need to exclude the basic
> > blocks guarded by the condition.
> > 
> > One way is to break out loop_version and implement it inline, other
> > option (perhaps leading to less code duplication) is to add argument listing
> > basic blocks that should not be scaled, which would be set to both arms
> > of the if.
> > 
> > Are there other profile patches of your I should look at?
> > Honza
> > >  	gcc_assert (loop2);
> > >  
> > > @@ -1486,10 +1490,10 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> > >    initialize_original_copy_tables ();
> > >  
> > >    struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> > > -				     profile_probability::always (),
> > > -				     profile_probability::never (),
> > > -				     profile_probability::always (),
> > > -				     profile_probability::always (),
> > > +				     invar_branch->probability.invert (),
> > > +				     invar_branch->probability,
> > > +				     invar_branch->probability.invert (),
> > > +				     invar_branch->probability,
> > >  				     true);
> > >    if (!loop2)
> > >      {
> > > @@ -1530,6 +1534,9 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
> > >    to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
> > >    to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
> > >  
> > > +  to_loop1->probability = invar_branch->probability.invert ();
> > > +  to_loop2->probability = invar_branch->probability;
> > > +
> > >    /* Due to introduction of a control flow edge from loop1 latch to loop2
> > >       pre-header, we should update PHIs in loop2 to reflect this connection
> > >       between loop1 and loop2.  */
> > > -- 
> > > 2.27.0.90.geebb51ba8c
> > > 
> > 
> 
> -- 
> Richard Biener <rguenther@suse.de>
> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
> Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Xionghu Luo Nov. 8, 2021, 6:09 a.m. UTC | #5

On 2021/10/27 15:44, Jan Hubicka wrote:
>> On Wed, 27 Oct 2021, Jan Hubicka wrote:
>>
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>> 	* tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
>>>> 	(do_split_loop_on_cond): Likewise.
>>>> ---
>>>>  gcc/tree-ssa-loop-split.c | 25 ++++++++++++++++---------
>>>>  1 file changed, 16 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
>>>> index 3f6ad046623..d30782888f3 100644
>>>> --- a/gcc/tree-ssa-loop-split.c
>>>> +++ b/gcc/tree-ssa-loop-split.c
>>>> @@ -575,7 +575,11 @@ split_loop (class loop *loop1)
>>>>  					    stmts2);
>>>>  	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
>>>>  	if (!initial_true)
>>>> -	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
>>>> +	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
>>>> +
>>>> +	edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
>>>> +			   ? EDGE_SUCC (bbs[i], 0)
>>>> +			   : EDGE_SUCC (bbs[i], 1);
>>>>  
>>>>  	/* Now version the loop, placing loop2 after loop1 connecting
>>>>  	   them, and fix up SSA form for that.  */
>>>> @@ -583,10 +587,10 @@ split_loop (class loop *loop1)
>>>>  	basic_block cond_bb;
>>>>  
>>>>  	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
>>>> -					   profile_probability::always (),
>>>> -					   profile_probability::always (),
>>>> -					   profile_probability::always (),
>>>> -					   profile_probability::always (),
>>>> +					   true_edge->probability,
>>>> +					   true_edge->probability.invert (),
>>>> +					   true_edge->probability,
>>>> +					   true_edge->probability.invert (),
>>>>  					   true);
>>>
>>> As discussed yesterday, for loop of form
>>>
>>> for (...)
>>>   if (cond)
>>>     cond = something();
>>>   else
>>>     something2
>>>
>>> Split as
>>
>> Note that you are missing to conditionalize loop1 execution
>> on 'cond' (not sure if that makes a difference).
> You are right - forgot to mention that.
> 
> Entry conditional makes no difference on scaling stmts inside loop but
> affects its header and expected trip count. We however need to set up
> probability of this conditional (and preheader count if it exists)
> There is no general way to read the probability of this initial
> conditional from cfg profile.  So I guess we are stuck with guessing
> some arbitrary value. I guess common case is that cond is true first
> iteration tough and often we can easily see that fromo PHI node
> initializing the test variable.
> 
> Other thing that changes is expected number of iterations of the split
> loops, so we may want to update the exit conditinal probability
> accordingly...
> 
Sorry for the late reply.  The below updated patch mainly solves the issues
you pointed out:
  - profile count proportion for both original loop and copied loop
without dropping down the true branch's count;
  - probability update in the two loops and between the two loops;
  - number of iterations update/check for split_loop.


[PATCH v3] Fix loop split incorrect count and probability

In tree-ssa-loop-split.c, split_loop and split_loop_on_cond does two
kind of split. split_loop only works for single loop and insert edge at
exit when split, while split_loop_on_cond is not limited to single loop
and insert edge at latch when split.  Both split behavior should consider
loop count and probability update.  For split_loop, loop split condition
is moved in front of loop1 and loop2; But split_loop_on_cond moves the
condition between loop1 and loop2, this patch does:
1) profile count proportion for both original loop and copied loop
without dropping down the true branch's count;
2) probability update in and between the two loops;
3) number of iterations update for split_loop.

Regression tested pass, OK for master?

Changes diff for split_loop and split_loop_on_cond cases:

1) diff base/loop-split.c.151t.lsplit patched/loop-split.c.152t.lsplit
...
   <bb 2> [local count: 118111600]:
   if (beg_5(D) < end_8(D))
     goto <bb 14>; [89.00%]
   else
     goto <bb 6>; [11.00%]

   <bb 14> [local count: 105119324]:
   if (beg2_6(D) < c_9(D))
-    goto <bb 15>; [100.00%]
+    goto <bb 15>; [33.00%]
   else
-    goto <bb 16>; [100.00%]
+    goto <bb 16>; [67.00%]

-  <bb 15> [local count: 105119324]:
+  <bb 15> [local count: 34689377]:
   _25 = beg_5(D) + 1;
   _26 = end_8(D) - beg_5(D);
   _27 = beg2_6(D) + _26;
   _28 = MIN_EXPR <c_9(D), _27>;

-  <bb 3> [local count: 955630225]:
+  <bb 3> [local count: 315357973]:
   # i_16 = PHI <i_11(8), beg_5(D)(15)>
   # j_17 = PHI <j_12(8), beg2_6(D)(15)>
   printf ("a: %d %d\n", i_16, j_17);
   i_11 = i_16 + 1;
   j_12 = j_17 + 1;
   if (j_12 < _28)
-    goto <bb 8>; [89.00%]
+    goto <bb 8>; [29.37%]
   else
-    goto <bb 17>; [11.00%]
+    goto <bb 17>; [70.63%]

-  <bb 8> [local count: 850510901]:
+  <bb 8> [local count: 280668596]:
   goto <bb 3>; [100.00%]

-  <bb 16> [local count: 105119324]:
+  <bb 16> [local count: 70429947]:
   # i_22 = PHI <beg_5(D)(14), i_29(17)>
   # j_23 = PHI <beg2_6(D)(14), j_30(17)>

   <bb 10> [local count: 955630225]:
   # i_2 = PHI <i_22(16), i_20(13)>
   # j_1 = PHI <j_23(16), j_21(13)>
   i_20 = i_2 + 1;
   j_21 = j_1 + 1;
   if (end_8(D) > i_20)
-    goto <bb 13>; [89.00%]
+    goto <bb 13>; [59.63%]
   else
-    goto <bb 9>; [11.00%]
+    goto <bb 9>; [40.37%]

-  <bb 13> [local count: 850510901]:
+  <bb 13> [local count: 569842305]:
   goto <bb 10>; [100.00%]

   <bb 17> [local count: 105119324]:
   # i_29 = PHI <i_11(3)>
   # j_30 = PHI <j_12(3)>
   if (end_8(D) > i_29)
     goto <bb 16>; [80.00%]
   else
     goto <bb 9>; [20.00%]

   <bb 9> [local count: 105119324]:

   <bb 6> [local count: 118111600]:
   return 0;

 }

2) diff base/loop-cond-split-1.c.151t.lsplit  patched/loop-cond-split-1.c.151t.lsplit:
...
   <bb 2> [local count: 118111600]:
   if (n_7(D) > 0)
     goto <bb 4>; [89.00%]
   else
     goto <bb 3>; [11.00%]

   <bb 3> [local count: 118111600]:
   return;

   <bb 4> [local count: 105119324]:
   pretmp_3 = ga;

-  <bb 5> [local count: 955630225]:
+  <bb 5> [local count: 315357973]:
   # i_13 = PHI <i_10(20), 0(4)>
   # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)>
   if (prephitmp_12 != 0)
     goto <bb 6>; [33.00%]
   else
     goto <bb 7>; [67.00%]

   <bb 6> [local count: 315357972]:
   _2 = do_something ();
   ga = _2;

-  <bb 7> [local count: 955630225]:
+  <bb 7> [local count: 315357973]:
   # prephitmp_5 = PHI <prephitmp_12(5), _2(6)>
   i_10 = inc (i_13);
   if (n_7(D) > i_10)
     goto <bb 21>; [89.00%]
   else
     goto <bb 11>; [11.00%]

   <bb 11> [local count: 105119324]:
   goto <bb 3>; [100.00%]

-  <bb 21> [local count: 850510901]:
+  <bb 21> [local count: 280668596]:
   if (prephitmp_12 != 0)
-    goto <bb 20>; [100.00%]
+    goto <bb 20>; [33.00%]
   else
-    goto <bb 19>; [INV]
+    goto <bb 19>; [67.00%]

-  <bb 20> [local count: 850510901]:
+  <bb 20> [local count: 280668596]:
   goto <bb 5>; [100.00%]

-  <bb 19> [count: 0]:
+  <bb 19> [local count: 70429947]:
   # i_23 = PHI <i_10(21)>
   # prephitmp_25 = PHI <prephitmp_5(21)>

-  <bb 12> [local count: 955630225]:
+  <bb 12> [local count: 640272252]:
   # i_15 = PHI <i_23(19), i_22(16)>
   # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)>
   i_22 = inc (i_15);
   if (n_7(D) > i_22)
-    goto <bb 16>; [89.00%]
+    goto <bb 16>; [59.63%]
   else
-    goto <bb 11>; [11.00%]
+    goto <bb 11>; [40.37%]

-  <bb 16> [local count: 850510901]:
+  <bb 16> [local count: 569842305]:
   goto <bb 12>; [100.00%]

 }

gcc/ChangeLog:

	* tree-ssa-loop-split.c (split_loop): Fix incorrect
	profile_count and probability.
	(do_split_loop_on_cond): Likewise.
---
 gcc/tree-ssa-loop-split.c | 110 +++++++++++++++++++++++++++++++++++---
 1 file changed, 102 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3f6ad046623..102766241fb 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -575,7 +575,10 @@ split_loop (class loop *loop1)
 					    stmts2);
 	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
 	if (!initial_true)
-	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
+	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
+
+	edge true_edge, false_edge;
+	extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
 
 	/* Now version the loop, placing loop2 after loop1 connecting
 	   them, and fix up SSA form for that.  */
@@ -583,11 +586,11 @@ split_loop (class loop *loop1)
 	basic_block cond_bb;
 
 	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   true);
+					  true_edge->probability,
+					  true_edge->probability.invert (),
+					  profile_probability::always (),
+					  profile_probability::always (),
+					  true);
 	gcc_assert (loop2);
 
 	edge new_e = connect_loops (loop1, loop2);
@@ -607,6 +610,53 @@ split_loop (class loop *loop1)
 	tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
 	patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
 
+	/* Check first loop's number of iterations.  */
+	update_ssa (TODO_update_ssa);
+	gcc_assert (number_of_iterations_exit (loop1, single_exit (loop1),
+					       &niter, false, true));
+
+	/* Proportion first loop's bb counts except those dominated by true
+	   branch to avoid drop 1s down.  */
+	basic_block *bbs1, *bbs2;
+	bbs1 = get_loop_body (loop1);
+	unsigned j;
+	for (j = 0; j < loop1->num_nodes; j++)
+	  if (bbs1[j] == loop1->latch
+	      || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
+	    bbs1[j]->count
+	      = bbs1[j]->count.apply_probability (true_edge->probability);
+	free (bbs1);
+
+	/* Fix first loop's exit probability after scaling.  */
+	edge exit_to_latch1 = single_pred_edge (loop1->latch);
+	exit_to_latch1->probability = exit_to_latch1->probability.apply_scale (
+	  true_edge->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE);
+	single_exit (loop1)->probability
+	  = exit_to_latch1->probability.invert ();
+
+	/* Check second loop's number of iterations.  */
+	class tree_niter_desc niter2;
+	gcc_assert (number_of_iterations_exit (loop2, single_exit (loop2),
+					       &niter2, false, true));
+
+	/* Proportion second loop's bb counts except those dominated by false
+	   branch to avoid drop 1s down.  */
+	basic_block bbi_copy = get_bb_copy (false_edge->dest);
+	bbs2 = get_loop_body (loop2);
+	for (j = 0; j < loop2->num_nodes; j++)
+	  if (bbs2[j] == loop2->latch
+	      || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
+	    bbs2[j]->count = bbs2[j]->count.apply_probability (
+	      true_edge->probability.invert ());
+	free (bbs2);
+
+	/* Fix second loop's exit probability after scaling.  */
+	edge exit_to_latch2 = single_pred_edge (loop2->latch);
+	exit_to_latch2->probability = exit_to_latch2->probability.apply_scale (
+	  false_edge->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE);
+	single_exit (loop2)->probability
+	  = exit_to_latch2->probability.invert ();
+
 	/* Finally patch out the two copies of the condition to be always
 	   true/false (or opposite).  */
 	gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
@@ -1486,8 +1536,8 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
   initialize_original_copy_tables ();
 
   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
-				     profile_probability::always (),
-				     profile_probability::never (),
+				     invar_branch->probability.invert (),
+				     invar_branch->probability,
 				     profile_probability::always (),
 				     profile_probability::always (),
 				     true);
@@ -1535,6 +1585,50 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
      between loop1 and loop2.  */
   connect_loop_phis (loop1, loop2, to_loop2);
 
+  update_ssa (TODO_update_ssa);
+
+  edge true_edge, false_edge, skip_edge1, skip_edge2;
+  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
+
+  /* Proportion first loop's bb counts except those dominated by true
+     branch to avoid drop 1s down.  */
+  skip_edge1 = true_invar ? false_edge : true_edge;
+  skip_edge2 = true_invar ? true_edge : false_edge;
+  basic_block *bbs1, *bbs2;
+  bbs1 = get_loop_body (loop1);
+  unsigned j;
+  for (j = 0; j < loop1->num_nodes; j++)
+    if (bbs1[j] == loop1->latch
+	|| !dominated_by_p (CDI_DOMINATORS, bbs1[j], skip_edge1->dest))
+      bbs1[j]->count
+	= bbs1[j]->count.apply_probability (skip_edge1->probability);
+  free (bbs1);
+
+  /* Fix first loop's exit probability after scaling.  */
+  to_loop1->probability = invar_branch->probability.invert ();
+  to_loop2->probability = invar_branch->probability;
+
+  /* Proportion second loop's bb counts except those dominated by false
+     branch to avoid drop 1s down.  */
+  basic_block bbi_copy = get_bb_copy (skip_edge2->dest);
+  bbs2 = get_loop_body (loop2);
+  for (j = 0; j < loop2->num_nodes; j++)
+    if (bbs2[j] == loop2->latch
+	|| !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
+      bbs2[j]->count
+	= bbs2[j]->count.apply_probability (skip_edge2->probability);
+  free (bbs2);
+
+  /* Fix second loop's exit probability after scaling.  */
+  edge loop2_latch_exit;
+  edge exit_to_latch2 = single_pred_edge (loop2->latch);
+  exit_to_latch2->probability = exit_to_latch2->probability.apply_scale (
+    skip_edge2->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE);
+  loop2_latch_exit = EDGE_SUCC (exit_to_latch2->src, 0) == exit_to_latch2
+		       ? EDGE_SUCC (exit_to_latch2->src, 1)
+		       : EDGE_SUCC (exit_to_latch2->src, 0);
+  loop2_latch_exit->probability = exit_to_latch2->probability.invert ();
+
   free_original_copy_tables ();
 
   return true;

Xionghu Luo Nov. 24, 2021, 5:11 a.m. UTC | #6

Gentle ping, thanks.

[PATCH v3] Fix loop split incorrect count and probability

https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583626.html


On 2021/11/8 14:09, Xionghu Luo via Gcc-patches wrote:
> 
> 
> On 2021/10/27 15:44, Jan Hubicka wrote:
>>> On Wed, 27 Oct 2021, Jan Hubicka wrote:
>>>
>>>>>
>>>>> gcc/ChangeLog:
>>>>>
>>>>> 	* tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
>>>>> 	(do_split_loop_on_cond): Likewise.
>>>>> ---
>>>>>  gcc/tree-ssa-loop-split.c | 25 ++++++++++++++++---------
>>>>>  1 file changed, 16 insertions(+), 9 deletions(-)
>>>>>
>>>>> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
>>>>> index 3f6ad046623..d30782888f3 100644
>>>>> --- a/gcc/tree-ssa-loop-split.c
>>>>> +++ b/gcc/tree-ssa-loop-split.c
>>>>> @@ -575,7 +575,11 @@ split_loop (class loop *loop1)
>>>>>  					    stmts2);
>>>>>  	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
>>>>>  	if (!initial_true)
>>>>> -	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
>>>>> +	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
>>>>> +
>>>>> +	edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
>>>>> +			   ? EDGE_SUCC (bbs[i], 0)
>>>>> +			   : EDGE_SUCC (bbs[i], 1);
>>>>>  
>>>>>  	/* Now version the loop, placing loop2 after loop1 connecting
>>>>>  	   them, and fix up SSA form for that.  */
>>>>> @@ -583,10 +587,10 @@ split_loop (class loop *loop1)
>>>>>  	basic_block cond_bb;
>>>>>  
>>>>>  	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
>>>>> -					   profile_probability::always (),
>>>>> -					   profile_probability::always (),
>>>>> -					   profile_probability::always (),
>>>>> -					   profile_probability::always (),
>>>>> +					   true_edge->probability,
>>>>> +					   true_edge->probability.invert (),
>>>>> +					   true_edge->probability,
>>>>> +					   true_edge->probability.invert (),
>>>>>  					   true);
>>>>
>>>> As discussed yesterday, for loop of form
>>>>
>>>> for (...)
>>>>   if (cond)
>>>>     cond = something();
>>>>   else
>>>>     something2
>>>>
>>>> Split as
>>>
>>> Note that you are missing to conditionalize loop1 execution
>>> on 'cond' (not sure if that makes a difference).
>> You are right - forgot to mention that.
>>
>> Entry conditional makes no difference on scaling stmts inside loop but
>> affects its header and expected trip count. We however need to set up
>> probability of this conditional (and preheader count if it exists)
>> There is no general way to read the probability of this initial
>> conditional from cfg profile.  So I guess we are stuck with guessing
>> some arbitrary value. I guess common case is that cond is true first
>> iteration tough and often we can easily see that fromo PHI node
>> initializing the test variable.
>>
>> Other thing that changes is expected number of iterations of the split
>> loops, so we may want to update the exit conditinal probability
>> accordingly...
>>
> Sorry for the late reply.  The below updated patch mainly solves the issues
> you pointed out:
>   - profile count proportion for both original loop and copied loop
> without dropping down the true branch's count;
>   - probability update in the two loops and between the two loops;
>   - number of iterations update/check for split_loop.
> 
> 
> [PATCH v3] Fix loop split incorrect count and probability
> 
> In tree-ssa-loop-split.c, split_loop and split_loop_on_cond does two
> kind of split. split_loop only works for single loop and insert edge at
> exit when split, while split_loop_on_cond is not limited to single loop
> and insert edge at latch when split.  Both split behavior should consider
> loop count and probability update.  For split_loop, loop split condition
> is moved in front of loop1 and loop2; But split_loop_on_cond moves the
> condition between loop1 and loop2, this patch does:
> 1) profile count proportion for both original loop and copied loop
> without dropping down the true branch's count;
> 2) probability update in and between the two loops;
> 3) number of iterations update for split_loop.
> 
> Regression tested pass, OK for master?
> 
> Changes diff for split_loop and split_loop_on_cond cases:
> 
> 1) diff base/loop-split.c.151t.lsplit patched/loop-split.c.152t.lsplit
> ...
>    <bb 2> [local count: 118111600]:
>    if (beg_5(D) < end_8(D))
>      goto <bb 14>; [89.00%]
>    else
>      goto <bb 6>; [11.00%]
> 
>    <bb 14> [local count: 105119324]:
>    if (beg2_6(D) < c_9(D))
> -    goto <bb 15>; [100.00%]
> +    goto <bb 15>; [33.00%]
>    else
> -    goto <bb 16>; [100.00%]
> +    goto <bb 16>; [67.00%]
> 
> -  <bb 15> [local count: 105119324]:
> +  <bb 15> [local count: 34689377]:
>    _25 = beg_5(D) + 1;
>    _26 = end_8(D) - beg_5(D);
>    _27 = beg2_6(D) + _26;
>    _28 = MIN_EXPR <c_9(D), _27>;
> 
> -  <bb 3> [local count: 955630225]:
> +  <bb 3> [local count: 315357973]:
>    # i_16 = PHI <i_11(8), beg_5(D)(15)>
>    # j_17 = PHI <j_12(8), beg2_6(D)(15)>
>    printf ("a: %d %d\n", i_16, j_17);
>    i_11 = i_16 + 1;
>    j_12 = j_17 + 1;
>    if (j_12 < _28)
> -    goto <bb 8>; [89.00%]
> +    goto <bb 8>; [29.37%]
>    else
> -    goto <bb 17>; [11.00%]
> +    goto <bb 17>; [70.63%]
> 
> -  <bb 8> [local count: 850510901]:
> +  <bb 8> [local count: 280668596]:
>    goto <bb 3>; [100.00%]
> 
> -  <bb 16> [local count: 105119324]:
> +  <bb 16> [local count: 70429947]:
>    # i_22 = PHI <beg_5(D)(14), i_29(17)>
>    # j_23 = PHI <beg2_6(D)(14), j_30(17)>
> 
>    <bb 10> [local count: 955630225]:
>    # i_2 = PHI <i_22(16), i_20(13)>
>    # j_1 = PHI <j_23(16), j_21(13)>
>    i_20 = i_2 + 1;
>    j_21 = j_1 + 1;
>    if (end_8(D) > i_20)
> -    goto <bb 13>; [89.00%]
> +    goto <bb 13>; [59.63%]
>    else
> -    goto <bb 9>; [11.00%]
> +    goto <bb 9>; [40.37%]
> 
> -  <bb 13> [local count: 850510901]:
> +  <bb 13> [local count: 569842305]:
>    goto <bb 10>; [100.00%]
> 
>    <bb 17> [local count: 105119324]:
>    # i_29 = PHI <i_11(3)>
>    # j_30 = PHI <j_12(3)>
>    if (end_8(D) > i_29)
>      goto <bb 16>; [80.00%]
>    else
>      goto <bb 9>; [20.00%]
> 
>    <bb 9> [local count: 105119324]:
> 
>    <bb 6> [local count: 118111600]:
>    return 0;
> 
>  }
> 
> 2) diff base/loop-cond-split-1.c.151t.lsplit  patched/loop-cond-split-1.c.151t.lsplit:
> ...
>    <bb 2> [local count: 118111600]:
>    if (n_7(D) > 0)
>      goto <bb 4>; [89.00%]
>    else
>      goto <bb 3>; [11.00%]
> 
>    <bb 3> [local count: 118111600]:
>    return;
> 
>    <bb 4> [local count: 105119324]:
>    pretmp_3 = ga;
> 
> -  <bb 5> [local count: 955630225]:
> +  <bb 5> [local count: 315357973]:
>    # i_13 = PHI <i_10(20), 0(4)>
>    # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)>
>    if (prephitmp_12 != 0)
>      goto <bb 6>; [33.00%]
>    else
>      goto <bb 7>; [67.00%]
> 
>    <bb 6> [local count: 315357972]:
>    _2 = do_something ();
>    ga = _2;
> 
> -  <bb 7> [local count: 955630225]:
> +  <bb 7> [local count: 315357973]:
>    # prephitmp_5 = PHI <prephitmp_12(5), _2(6)>
>    i_10 = inc (i_13);
>    if (n_7(D) > i_10)
>      goto <bb 21>; [89.00%]
>    else
>      goto <bb 11>; [11.00%]
> 
>    <bb 11> [local count: 105119324]:
>    goto <bb 3>; [100.00%]
> 
> -  <bb 21> [local count: 850510901]:
> +  <bb 21> [local count: 280668596]:
>    if (prephitmp_12 != 0)
> -    goto <bb 20>; [100.00%]
> +    goto <bb 20>; [33.00%]
>    else
> -    goto <bb 19>; [INV]
> +    goto <bb 19>; [67.00%]
> 
> -  <bb 20> [local count: 850510901]:
> +  <bb 20> [local count: 280668596]:
>    goto <bb 5>; [100.00%]
> 
> -  <bb 19> [count: 0]:
> +  <bb 19> [local count: 70429947]:
>    # i_23 = PHI <i_10(21)>
>    # prephitmp_25 = PHI <prephitmp_5(21)>
> 
> -  <bb 12> [local count: 955630225]:
> +  <bb 12> [local count: 640272252]:
>    # i_15 = PHI <i_23(19), i_22(16)>
>    # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)>
>    i_22 = inc (i_15);
>    if (n_7(D) > i_22)
> -    goto <bb 16>; [89.00%]
> +    goto <bb 16>; [59.63%]
>    else
> -    goto <bb 11>; [11.00%]
> +    goto <bb 11>; [40.37%]
> 
> -  <bb 16> [local count: 850510901]:
> +  <bb 16> [local count: 569842305]:
>    goto <bb 12>; [100.00%]
> 
>  }
> 
> gcc/ChangeLog:
> 
> 	* tree-ssa-loop-split.c (split_loop): Fix incorrect
> 	profile_count and probability.
> 	(do_split_loop_on_cond): Likewise.
> ---
>  gcc/tree-ssa-loop-split.c | 110 +++++++++++++++++++++++++++++++++++---
>  1 file changed, 102 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> index 3f6ad046623..102766241fb 100644
> --- a/gcc/tree-ssa-loop-split.c
> +++ b/gcc/tree-ssa-loop-split.c
> @@ -575,7 +575,10 @@ split_loop (class loop *loop1)
>  					    stmts2);
>  	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
>  	if (!initial_true)
> -	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
> +	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
> +
> +	edge true_edge, false_edge;
> +	extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
> 
>  	/* Now version the loop, placing loop2 after loop1 connecting
>  	   them, and fix up SSA form for that.  */
> @@ -583,11 +586,11 @@ split_loop (class loop *loop1)
>  	basic_block cond_bb;
> 
>  	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
> -					   profile_probability::always (),
> -					   profile_probability::always (),
> -					   profile_probability::always (),
> -					   profile_probability::always (),
> -					   true);
> +					  true_edge->probability,
> +					  true_edge->probability.invert (),
> +					  profile_probability::always (),
> +					  profile_probability::always (),
> +					  true);
>  	gcc_assert (loop2);
> 
>  	edge new_e = connect_loops (loop1, loop2);
> @@ -607,6 +610,53 @@ split_loop (class loop *loop1)
>  	tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
>  	patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
> 
> +	/* Check first loop's number of iterations.  */
> +	update_ssa (TODO_update_ssa);
> +	gcc_assert (number_of_iterations_exit (loop1, single_exit (loop1),
> +					       &niter, false, true));
> +
> +	/* Proportion first loop's bb counts except those dominated by true
> +	   branch to avoid drop 1s down.  */
> +	basic_block *bbs1, *bbs2;
> +	bbs1 = get_loop_body (loop1);
> +	unsigned j;
> +	for (j = 0; j < loop1->num_nodes; j++)
> +	  if (bbs1[j] == loop1->latch
> +	      || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
> +	    bbs1[j]->count
> +	      = bbs1[j]->count.apply_probability (true_edge->probability);
> +	free (bbs1);
> +
> +	/* Fix first loop's exit probability after scaling.  */
> +	edge exit_to_latch1 = single_pred_edge (loop1->latch);
> +	exit_to_latch1->probability = exit_to_latch1->probability.apply_scale (
> +	  true_edge->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE);
> +	single_exit (loop1)->probability
> +	  = exit_to_latch1->probability.invert ();
> +
> +	/* Check second loop's number of iterations.  */
> +	class tree_niter_desc niter2;
> +	gcc_assert (number_of_iterations_exit (loop2, single_exit (loop2),
> +					       &niter2, false, true));
> +
> +	/* Proportion second loop's bb counts except those dominated by false
> +	   branch to avoid drop 1s down.  */
> +	basic_block bbi_copy = get_bb_copy (false_edge->dest);
> +	bbs2 = get_loop_body (loop2);
> +	for (j = 0; j < loop2->num_nodes; j++)
> +	  if (bbs2[j] == loop2->latch
> +	      || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
> +	    bbs2[j]->count = bbs2[j]->count.apply_probability (
> +	      true_edge->probability.invert ());
> +	free (bbs2);
> +
> +	/* Fix second loop's exit probability after scaling.  */
> +	edge exit_to_latch2 = single_pred_edge (loop2->latch);
> +	exit_to_latch2->probability = exit_to_latch2->probability.apply_scale (
> +	  false_edge->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE);
> +	single_exit (loop2)->probability
> +	  = exit_to_latch2->probability.invert ();
> +
>  	/* Finally patch out the two copies of the condition to be always
>  	   true/false (or opposite).  */
>  	gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
> @@ -1486,8 +1536,8 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
>    initialize_original_copy_tables ();
> 
>    struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> -				     profile_probability::always (),
> -				     profile_probability::never (),
> +				     invar_branch->probability.invert (),
> +				     invar_branch->probability,
>  				     profile_probability::always (),
>  				     profile_probability::always (),
>  				     true);
> @@ -1535,6 +1585,50 @@ do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
>       between loop1 and loop2.  */
>    connect_loop_phis (loop1, loop2, to_loop2);
> 
> +  update_ssa (TODO_update_ssa);
> +
> +  edge true_edge, false_edge, skip_edge1, skip_edge2;
> +  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
> +
> +  /* Proportion first loop's bb counts except those dominated by true
> +     branch to avoid drop 1s down.  */
> +  skip_edge1 = true_invar ? false_edge : true_edge;
> +  skip_edge2 = true_invar ? true_edge : false_edge;
> +  basic_block *bbs1, *bbs2;
> +  bbs1 = get_loop_body (loop1);
> +  unsigned j;
> +  for (j = 0; j < loop1->num_nodes; j++)
> +    if (bbs1[j] == loop1->latch
> +	|| !dominated_by_p (CDI_DOMINATORS, bbs1[j], skip_edge1->dest))
> +      bbs1[j]->count
> +	= bbs1[j]->count.apply_probability (skip_edge1->probability);
> +  free (bbs1);
> +
> +  /* Fix first loop's exit probability after scaling.  */
> +  to_loop1->probability = invar_branch->probability.invert ();
> +  to_loop2->probability = invar_branch->probability;
> +
> +  /* Proportion second loop's bb counts except those dominated by false
> +     branch to avoid drop 1s down.  */
> +  basic_block bbi_copy = get_bb_copy (skip_edge2->dest);
> +  bbs2 = get_loop_body (loop2);
> +  for (j = 0; j < loop2->num_nodes; j++)
> +    if (bbs2[j] == loop2->latch
> +	|| !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
> +      bbs2[j]->count
> +	= bbs2[j]->count.apply_probability (skip_edge2->probability);
> +  free (bbs2);
> +
> +  /* Fix second loop's exit probability after scaling.  */
> +  edge loop2_latch_exit;
> +  edge exit_to_latch2 = single_pred_edge (loop2->latch);
> +  exit_to_latch2->probability = exit_to_latch2->probability.apply_scale (
> +    skip_edge2->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE);
> +  loop2_latch_exit = EDGE_SUCC (exit_to_latch2->src, 0) == exit_to_latch2
> +		       ? EDGE_SUCC (exit_to_latch2->src, 1)
> +		       : EDGE_SUCC (exit_to_latch2->src, 0);
> +  loop2_latch_exit->probability = exit_to_latch2->probability.invert ();
> +
>    free_original_copy_tables ();
> 
>    return true;
>

diff mbox series

Patch

diff base/loop-cond-split-1.c.151t.lsplit  patched/loop-cond-split-1.c.151t.lsplit:
...
   int prephitmp_16;
   int prephitmp_25;

   <bb 2> [local count: 118111600]:
   if (n_7(D) > 0)
     goto <bb 4>; [89.00%]
   else
     goto <bb 3>; [11.00%]

   <bb 3> [local count: 118111600]:
   return;

   <bb 4> [local count: 105119324]:
   pretmp_3 = ga;

-  <bb 5> [local count: 955630225]:
+  <bb 5> [local count: 315357973]:
   # i_13 = PHI <i_10(20), 0(4)>
   # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)>
   if (prephitmp_12 != 0)
     goto <bb 6>; [33.00%]
   else
     goto <bb 7>; [67.00%]

-  <bb 6> [local count: 315357972]:
+  <bb 6> [local count: 104068130]:
   _2 = do_something ();
   ga = _2;

-  <bb 7> [local count: 955630225]:
+  <bb 7> [local count: 315357973]:
   # prephitmp_5 = PHI <prephitmp_12(5), _2(6)>
   i_10 = inc (i_13);
   if (n_7(D) > i_10)
     goto <bb 21>; [89.00%]
   else
     goto <bb 11>; [11.00%]

   <bb 11> [local count: 105119324]:
   goto <bb 3>; [100.00%]

-  <bb 21> [local count: 850510901]:
+  <bb 21> [local count: 280668596]:
   if (prephitmp_12 != 0)
-    goto <bb 20>; [100.00%]
+    goto <bb 20>; [33.00%]
   else
-    goto <bb 19>; [INV]
+    goto <bb 19>; [67.00%]

-  <bb 20> [local count: 850510901]:
+  <bb 20> [local count: 280668596]:
   goto <bb 5>; [100.00%]

-  <bb 19> [count: 0]:
+  <bb 19> [local count: 70429947]:
   # i_23 = PHI <i_10(21)>
   # prephitmp_25 = PHI <prephitmp_5(21)>

-  <bb 12> [local count: 955630225]:
+  <bb 12> [local count: 640272252]:
   # i_15 = PHI <i_23(19), i_22(16)>
   # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)>
   i_22 = inc (i_15);
   if (n_7(D) > i_22)
     goto <bb 16>; [89.00%]
   else
     goto <bb 11>; [11.00%]

-  <bb 16> [local count: 850510901]:
+  <bb 16> [local count: 569842305]:
   goto <bb 12>; [100.00%]

 }

gcc/ChangeLog:

	* tree-ssa-loop-split.c (split_loop): Fix incorrect probability.
	(do_split_loop_on_cond): Likewise.
---
 gcc/tree-ssa-loop-split.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 3f6ad046623..d30782888f3 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -575,7 +575,11 @@  split_loop (class loop *loop1)
 					    stmts2);
 	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
 	if (!initial_true)
-	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
+	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
+
+	edge true_edge = EDGE_SUCC (bbs[i], 0)->flags & EDGE_TRUE_VALUE
+			   ? EDGE_SUCC (bbs[i], 0)
+			   : EDGE_SUCC (bbs[i], 1);
 
 	/* Now version the loop, placing loop2 after loop1 connecting
 	   them, and fix up SSA form for that.  */
@@ -583,10 +587,10 @@  split_loop (class loop *loop1)
 	basic_block cond_bb;
 
 	class loop *loop2 = loop_version (loop1, cond, &cond_bb,
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   profile_probability::always (),
-					   profile_probability::always (),
+					   true_edge->probability,
+					   true_edge->probability.invert (),
+					   true_edge->probability,
+					   true_edge->probability.invert (),
 					   true);
 	gcc_assert (loop2);
 
@@ -1486,10 +1490,10 @@  do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
   initialize_original_copy_tables ();
 
   struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
-				     profile_probability::always (),
-				     profile_probability::never (),
-				     profile_probability::always (),
-				     profile_probability::always (),
+				     invar_branch->probability.invert (),
+				     invar_branch->probability,
+				     invar_branch->probability.invert (),
+				     invar_branch->probability,
 				     true);
   if (!loop2)
     {
@@ -1530,6 +1534,9 @@  do_split_loop_on_cond (struct loop *loop1, edge invar_branch)
   to_loop1->flags |= true_invar ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE;
   to_loop2->flags |= true_invar ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
 
+  to_loop1->probability = invar_branch->probability.invert ();
+  to_loop2->probability = invar_branch->probability;
+
   /* Due to introduction of a control flow edge from loop1 latch to loop2
      pre-header, we should update PHIs in loop2 to reflect this connection
      between loop1 and loop2.  */