disable aggressive_loop_optimizations until niter ready

Message ID 20211222022617.33192-1-guojiufu@linux.ibm.com
State New
Headers
Series disable aggressive_loop_optimizations until niter ready |

Commit Message

Jiufu Guo Dec. 22, 2021, 2:26 a.m. UTC
  Hi,

Normaly, estimate_numbers_of_iterations get/caculate niter first,
and then invokes infer_loop_bounds_from_undefined. While in some case,
after a few call stacks, estimate_numbers_of_iterations is invoked before
niter is ready (e.g. before number_of_latch_executions returns).

e.g. number_of_latch_executions->...follow_ssa_edge_expr-->
  --> estimate_numbers_of_iterations --> infer_loop_bounds_from_undefined.

Since niter is still not computed, call to infer_loop_bounds_from_undefined
may not get final result.
To avoid infer_loop_bounds_from_undefined to be called with interim state
and avoid infer_loop_bounds_from_undefined generates interim data, during
niter's computing, we could disable flag_aggressive_loop_optimizations.

Bootstrap and regtest pass on ppc64* and x86_64.  Is this ok for trunk?

BR,
Jiufu

gcc/ChangeLog:

	* tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions):
	Disable/restore flag_aggressive_loop_optimizations.

gcc/testsuite/ChangeLog:

	* gcc.dg/tree-ssa/scev-16.c: New test.

---
 gcc/tree-ssa-loop-niter.c               | 23 +++++++++++++++++++----
 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c | 20 ++++++++++++++++++++
 2 files changed, 39 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
  

Comments

Richard Biener Jan. 3, 2022, 2:30 p.m. UTC | #1
On Wed, 22 Dec 2021, Jiufu Guo wrote:

> Hi,
> 
> Normaly, estimate_numbers_of_iterations get/caculate niter first,
> and then invokes infer_loop_bounds_from_undefined. While in some case,
> after a few call stacks, estimate_numbers_of_iterations is invoked before
> niter is ready (e.g. before number_of_latch_executions returns).
> 
> e.g. number_of_latch_executions->...follow_ssa_edge_expr-->
>   --> estimate_numbers_of_iterations --> infer_loop_bounds_from_undefined.
> 
> Since niter is still not computed, call to infer_loop_bounds_from_undefined
> may not get final result.
> To avoid infer_loop_bounds_from_undefined to be called with interim state
> and avoid infer_loop_bounds_from_undefined generates interim data, during
> niter's computing, we could disable flag_aggressive_loop_optimizations.
> 
> Bootstrap and regtest pass on ppc64* and x86_64.  Is this ok for trunk?

So this is a optimality fix, not a correctness one?  I suppose the
estimates are computed/used from scev_probably_wraps_p via
loop_exits_before_overflow and ultimatively chrec_convert.

We have a call cycle here,

estimate_numbers_of_iterations -> number_of_latch_executions ->
... -> estimate_numbers_of_iterations

where the first estimate_numbers_of_iterations will make sure
the later call will immediately return.

I'm not sure what your patch tries to do - it seems to tackle
the case where we enter the cycle via number_of_latch_executions?
Why do we get "non-final" values?  idx_infer_loop_bounds resorts
to SCEV and thus may recurse again - to me it would be more
logical to try avoid recursing in number_of_latch_executions by
setting ->nb_iterations to something early, maybe chrec_dont_know,
to signal we're using something we're just trying to compute.

Richard.

> BR,
> Jiufu
> 
> gcc/ChangeLog:
> 
> 	* tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions):
> 	Disable/restore flag_aggressive_loop_optimizations.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.dg/tree-ssa/scev-16.c: New test.
> 
> ---
>  gcc/tree-ssa-loop-niter.c               | 23 +++++++++++++++++++----
>  gcc/testsuite/gcc.dg/tree-ssa/scev-16.c | 20 ++++++++++++++++++++
>  2 files changed, 39 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> 
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index 06954e437f5..51bb501019e 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -2534,18 +2534,31 @@ number_of_iterations_exit_assumptions (class loop *loop, edge exit,
>        && !POINTER_TYPE_P (type))
>      return false;
>  
> +  /* Before niter is calculated, avoid to analyze interim state. */
> +  int old_aggressive_loop_optimizations = flag_aggressive_loop_optimizations;
> +  flag_aggressive_loop_optimizations = 0;
> +
>    tree iv0_niters = NULL_TREE;
>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>  			      op0, &iv0, safe ? &iv0_niters : NULL, false))
> -    return number_of_iterations_popcount (loop, exit, code, niter);
> +    {
> +      bool res = number_of_iterations_popcount (loop, exit, code, niter);
> +      flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
> +      return res;
> +    }
>    tree iv1_niters = NULL_TREE;
>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>  			      op1, &iv1, safe ? &iv1_niters : NULL, false))
> -    return false;
> +    {
> +      flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
> +      return false;
> +    }
>    /* Give up on complicated case.  */
>    if (iv0_niters && iv1_niters)
> -    return false;
> -
> +    {
> +      flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
> +      return false;
> +    }
>    /* We don't want to see undefined signed overflow warnings while
>       computing the number of iterations.  */
>    fold_defer_overflow_warnings ();
> @@ -2565,6 +2578,7 @@ number_of_iterations_exit_assumptions (class loop *loop, edge exit,
>  				  only_exit_p, safe))
>      {
>        fold_undefer_and_ignore_overflow_warnings ();
> +      flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
>        return false;
>      }
>  
> @@ -2608,6 +2622,7 @@ number_of_iterations_exit_assumptions (class loop *loop, edge exit,
>  					       niter->may_be_zero);
>  
>    fold_undefer_and_ignore_overflow_warnings ();
> +  flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
>  
>    /* If NITER has simplified into a constant, update MAX.  */
>    if (TREE_CODE (niter->niter) == INTEGER_CST)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> new file mode 100644
> index 00000000000..708ffab88ca
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-ivopts-scev" } */
> +
> +/* Expect scalar_evolution = {(int) start_7(D), +, 1}_1), instead
> +   (int) (short int) {(unsigned short) start_7(D), +, 1}_1 */
> +
> +int arr[1000];
> +
> +void
> +s2i (short start, short end)
> +{
> +  int res = 0;
> +  for (short i = start; i < end; i++)
> +    {
> +      int lv = i;
> +      arr[lv] += lv;
> +    }
> +}
> +
> +/* { dg-final { scan-tree-dump-times "scalar_evolution = \{\\(int\\) start_\[0-9\]+\\(D\\), \\+, 1\}_1" 1 "ivopts" } } */
>
  
Jiufu Guo Jan. 13, 2022, 11:27 a.m. UTC | #2
On 2022-01-03 22:30, Richard Biener wrote:
> On Wed, 22 Dec 2021, Jiufu Guo wrote:
> 
>> Hi,
>> 
>> Normaly, estimate_numbers_of_iterations get/caculate niter first,
>> and then invokes infer_loop_bounds_from_undefined. While in some case,
>> after a few call stacks, estimate_numbers_of_iterations is invoked 
>> before
>> niter is ready (e.g. before number_of_latch_executions returns).
>> 
>> e.g. number_of_latch_executions->...follow_ssa_edge_expr-->
>>   --> estimate_numbers_of_iterations --> 
>> infer_loop_bounds_from_undefined.
>> 
>> Since niter is still not computed, call to 
>> infer_loop_bounds_from_undefined
>> may not get final result.
>> To avoid infer_loop_bounds_from_undefined to be called with interim 
>> state
>> and avoid infer_loop_bounds_from_undefined generates interim data, 
>> during
>> niter's computing, we could disable 
>> flag_aggressive_loop_optimizations.
>> 
>> Bootstrap and regtest pass on ppc64* and x86_64.  Is this ok for 
>> trunk?
> 
> So this is a optimality fix, not a correctness one?  I suppose the
> estimates are computed/used from scev_probably_wraps_p via
> loop_exits_before_overflow and ultimatively chrec_convert.
> 
> We have a call cycle here,
> 
> estimate_numbers_of_iterations -> number_of_latch_executions ->
> ... -> estimate_numbers_of_iterations
> 
> where the first estimate_numbers_of_iterations will make sure
> the later call will immediately return.

Hi Richard,
Thanks for your comments! And sorry for the late reply.

In estimate_numbers_of_iterations, there is a guard to make sure
the second call to estimate_numbers_of_iterations returns
immediately.

Exactly as you said, it relates to scev_probably_wraps_p calls
loop_exits_before_overflow.

The issue is: the first calling to estimate_numbers_of_iterations
maybe inside number_of_latch_executions.

> 
> I'm not sure what your patch tries to do - it seems to tackle
> the case where we enter the cycle via number_of_latch_executions?
> Why do we get "non-final" values?  idx_infer_loop_bounds resorts

Right, when the call cycle starts from number_of_latch_execution,
the issue may occur:

number_of_latch_executions(*1st call)->..->
analyze_scalar_evolution(IVs 1st) ->..follow_ssa_edge_expr..->
loop_exits_before_overflow->
estimate_numbers_of_iterations (*1st call)->
number_of_latch_executions(*2nd call)->..->
analyze_scalar_evolution(IVs 2nd)->..loop_exits_before_overflow-> 
estimate_numbers_of_iterations(*2nd call)

The second calling to estimate_numbers_of_iterations returns quickly.
And then, in the first calling to estimate_numbers_of_iterations,
infer_loop_bounds_from_undefined is invoked.

And, function "infer_loop_bounds_from_undefined" instantiate/analyze
SCEV for each SSA in the loop.
*Here the issue occur*, these SCEVs are based on the interim IV's
SCEV which come from "analyze_scalar_evolution(IVs 2nd)",
and those IV's SCEV will be overridden by up level
"analyze_scalar_evolution(IVs 1st)".

To handle this issue, disabling flag_aggressive_loop_optimizations
inside number_of_latch_executions is one method.
To avoid the issue in other cases, e.g. the call cycle starts from
number_of_iterations_exit or number_of_iterations_exit_assumptions,
this patch disable flag_aggressive_loop_optimizations inside
number_of_iterations_exit_assumptions.

Thanks again.

BR,
Jiufu

> to SCEV and thus may recurse again - to me it would be more
> logical to try avoid recursing in number_of_latch_executions by
> setting ->nb_iterations to something early, maybe chrec_dont_know,
> to signal we're using something we're just trying to compute.
> 
> Richard.
> 
>> BR,
>> Jiufu
>> 
>> gcc/ChangeLog:
>> 
>> 	* tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions):
>> 	Disable/restore flag_aggressive_loop_optimizations.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>> 	* gcc.dg/tree-ssa/scev-16.c: New test.
>> 
>> ---
>>  gcc/tree-ssa-loop-niter.c               | 23 +++++++++++++++++++----
>>  gcc/testsuite/gcc.dg/tree-ssa/scev-16.c | 20 ++++++++++++++++++++
>>  2 files changed, 39 insertions(+), 4 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> 
>> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> index 06954e437f5..51bb501019e 100644
>> --- a/gcc/tree-ssa-loop-niter.c
>> +++ b/gcc/tree-ssa-loop-niter.c
>> @@ -2534,18 +2534,31 @@ number_of_iterations_exit_assumptions (class 
>> loop *loop, edge exit,
>>        && !POINTER_TYPE_P (type))
>>      return false;
>> 
>> +  /* Before niter is calculated, avoid to analyze interim state. */
>> +  int old_aggressive_loop_optimizations = 
>> flag_aggressive_loop_optimizations;
>> +  flag_aggressive_loop_optimizations = 0;
>> +
>>    tree iv0_niters = NULL_TREE;
>>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>>  			      op0, &iv0, safe ? &iv0_niters : NULL, false))
>> -    return number_of_iterations_popcount (loop, exit, code, niter);
>> +    {
>> +      bool res = number_of_iterations_popcount (loop, exit, code, 
>> niter);
>> +      flag_aggressive_loop_optimizations = 
>> old_aggressive_loop_optimizations;
>> +      return res;
>> +    }
>>    tree iv1_niters = NULL_TREE;
>>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>>  			      op1, &iv1, safe ? &iv1_niters : NULL, false))
>> -    return false;
>> +    {
>> +      flag_aggressive_loop_optimizations = 
>> old_aggressive_loop_optimizations;
>> +      return false;
>> +    }
>>    /* Give up on complicated case.  */
>>    if (iv0_niters && iv1_niters)
>> -    return false;
>> -
>> +    {
>> +      flag_aggressive_loop_optimizations = 
>> old_aggressive_loop_optimizations;
>> +      return false;
>> +    }
>>    /* We don't want to see undefined signed overflow warnings while
>>       computing the number of iterations.  */
>>    fold_defer_overflow_warnings ();
>> @@ -2565,6 +2578,7 @@ number_of_iterations_exit_assumptions (class 
>> loop *loop, edge exit,
>>  				  only_exit_p, safe))
>>      {
>>        fold_undefer_and_ignore_overflow_warnings ();
>> +      flag_aggressive_loop_optimizations = 
>> old_aggressive_loop_optimizations;
>>        return false;
>>      }
>> 
>> @@ -2608,6 +2622,7 @@ number_of_iterations_exit_assumptions (class 
>> loop *loop, edge exit,
>>  					       niter->may_be_zero);
>> 
>>    fold_undefer_and_ignore_overflow_warnings ();
>> +  flag_aggressive_loop_optimizations = 
>> old_aggressive_loop_optimizations;
>> 
>>    /* If NITER has simplified into a constant, update MAX.  */
>>    if (TREE_CODE (niter->niter) == INTEGER_CST)
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> new file mode 100644
>> index 00000000000..708ffab88ca
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-ivopts-scev" } */
>> +
>> +/* Expect scalar_evolution = {(int) start_7(D), +, 1}_1), instead
>> +   (int) (short int) {(unsigned short) start_7(D), +, 1}_1 */
>> +
>> +int arr[1000];
>> +
>> +void
>> +s2i (short start, short end)
>> +{
>> +  int res = 0;
>> +  for (short i = start; i < end; i++)
>> +    {
>> +      int lv = i;
>> +      arr[lv] += lv;
>> +    }
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "scalar_evolution = \{\\(int\\) 
>> start_\[0-9\]+\\(D\\), \\+, 1\}_1" 1 "ivopts" } } */
>>
  
Richard Biener Jan. 13, 2022, 1:39 p.m. UTC | #3
On Thu, 13 Jan 2022, guojiufu wrote:

> On 2022-01-03 22:30, Richard Biener wrote:
> > On Wed, 22 Dec 2021, Jiufu Guo wrote:
> > 
> >> Hi,
> >> 
> >> Normaly, estimate_numbers_of_iterations get/caculate niter first,
> >> and then invokes infer_loop_bounds_from_undefined. While in some case,
> >> after a few call stacks, estimate_numbers_of_iterations is invoked before
> >> niter is ready (e.g. before number_of_latch_executions returns).
> >> 
> >> e.g. number_of_latch_executions->...follow_ssa_edge_expr-->
> >>   --> estimate_numbers_of_iterations --> 
> >> infer_loop_bounds_from_undefined.
> >> 
> >> Since niter is still not computed, call to infer_loop_bounds_from_undefined
> >> may not get final result.
> >> To avoid infer_loop_bounds_from_undefined to be called with interim state
> >> and avoid infer_loop_bounds_from_undefined generates interim data, during
> >> niter's computing, we could disable flag_aggressive_loop_optimizations.
> >> 
> >> Bootstrap and regtest pass on ppc64* and x86_64.  Is this ok for trunk?
> > 
> > So this is a optimality fix, not a correctness one?  I suppose the
> > estimates are computed/used from scev_probably_wraps_p via
> > loop_exits_before_overflow and ultimatively chrec_convert.
> > 
> > We have a call cycle here,
> > 
> > estimate_numbers_of_iterations -> number_of_latch_executions ->
> > ... -> estimate_numbers_of_iterations
> > 
> > where the first estimate_numbers_of_iterations will make sure
> > the later call will immediately return.
> 
> Hi Richard,
> Thanks for your comments! And sorry for the late reply.
> 
> In estimate_numbers_of_iterations, there is a guard to make sure
> the second call to estimate_numbers_of_iterations returns
> immediately.
> 
> Exactly as you said, it relates to scev_probably_wraps_p calls
> loop_exits_before_overflow.
> 
> The issue is: the first calling to estimate_numbers_of_iterations
> maybe inside number_of_latch_executions.
> 
> > 
> > I'm not sure what your patch tries to do - it seems to tackle
> > the case where we enter the cycle via number_of_latch_executions?
> > Why do we get "non-final" values?  idx_infer_loop_bounds resorts
> 
> Right, when the call cycle starts from number_of_latch_execution,
> the issue may occur:
> 
> number_of_latch_executions(*1st call)->..->
> analyze_scalar_evolution(IVs 1st) ->..follow_ssa_edge_expr..->
> loop_exits_before_overflow->
> estimate_numbers_of_iterations (*1st call)->
> number_of_latch_executions(*2nd call)->..->
> analyze_scalar_evolution(IVs 2nd)->..loop_exits_before_overflow->
> estimate_numbers_of_iterations(*2nd call)
> 
> The second calling to estimate_numbers_of_iterations returns quickly.
> And then, in the first calling to estimate_numbers_of_iterations,
> infer_loop_bounds_from_undefined is invoked.
> 
> And, function "infer_loop_bounds_from_undefined" instantiate/analyze
> SCEV for each SSA in the loop.
> *Here the issue occur*, these SCEVs are based on the interim IV's
> SCEV which come from "analyze_scalar_evolution(IVs 2nd)",
> and those IV's SCEV will be overridden by up level
> "analyze_scalar_evolution(IVs 1st)".

OK, so indeed analyze_scalar_evolution is not protected against
recursive invocation on the same SSA name (though it definitely
doesn't expect to do that).  We could fix that by pre-seeding
the cache conservatively in analyze_scalar_evolution or by
not overwriting the cached result of the recursive invocation.

But to re-iterate an unanswered question, is this a correctness issue
or an optimization issue?

> To handle this issue, disabling flag_aggressive_loop_optimizations
> inside number_of_latch_executions is one method.
> To avoid the issue in other cases, e.g. the call cycle starts from
> number_of_iterations_exit or number_of_iterations_exit_assumptions,
> this patch disable flag_aggressive_loop_optimizations inside
> number_of_iterations_exit_assumptions.

But disabling flag_aggressive_loop_optimizations is a very
non-intuitive way of avoiding recursive calls.  I'd rather
avoid those in a similar way estimate_numbers_of_iterations does,
for example with

diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 61d72c278a1..cc1e510b6c2 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -2807,7 +2807,7 @@ number_of_latch_executions (class loop *loop)
   if (dump_file && (dump_flags & TDF_SCEV))
     fprintf (dump_file, "(number_of_iterations_in_loop = \n");
 
-  res = chrec_dont_know;
+  loop->nb_iterations = res = chrec_dont_know;
   exit = single_exit (loop);
 
   if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))

though this doesn't seem to improve the SCEV analysis with your
testcase.  Alternatively one could more conciously compute an
"estimated" estimate like with

diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 61d72c278a1..8529c44d574 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -2802,6 +2802,19 @@ number_of_latch_executions (class loop *loop)
   if (res)
     return res;
 
+  /* When estimates for this loop are not computed yet compute them
+     without resorting to niter analysis results which means w/o
+     flag_aggressive_loop_optimizations.  */
+  if (loop->estimate_state == EST_NOT_COMPUTED)
+    {
+      bool saved_flag_aggressive_loop_optimizations
+       = flag_aggressive_loop_optimizations;
+      flag_aggressive_loop_optimizations = false;
+      estimate_numbers_of_iterations (loop);
+      flag_aggressive_loop_optimizations
+       = saved_flag_aggressive_loop_optimizations;
+    }
+
   may_be_zero = NULL_TREE;
 
but that also doesn't change your testcase for me.  Applying your
patch does the trick but then I really wonder why.

(get_scalar_evolution
  (scalar = lv_10)
  (scalar_evolution = {(int) start_7(D), +, 1}_1))

from

  <bb 3> [local count: 955630225]:
  # i_15 = PHI <i_12(6), start_7(D)(5)>
  lv_10 = (int) i_15;
  i.1_3 = (unsigned short) i_15;
  _4 = i.1_3 + 1;
  i_12 = (short int) _4;

where the 'i' IV can wrap when start >= end but that's ruled out
by the entry test.  The scalar evolution for lv_10 would be
wrong if we didn't conclude that so I guess this optimization
is provided by the estimate somehow.

I also tried

diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 935d2d4d8f3..d1787ba39b6 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -8093,6 +8093,8 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, 
class loop *loop,
       fprintf (dump_file, "\n");
     }
 
+  estimate_numbers_of_iterations (loop);
+
   body = get_loop_body (loop);
   data->body_includes_call = loop_body_includes_call (body, 
loop->num_nodes);
   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);

to get into the cycles from the "correct" entry but that does
not help either.  Nor does

diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b767056aeb0..f058be04836 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -2534,6 +2534,14 @@ number_of_iterations_exit_assumptions (class loop 
*loop, edge exit,
       && !POINTER_TYPE_P (type))
     return false;
 
+  if (loop->estimate_state == EST_NOT_COMPUTED)
+    {
+      bool saved = flag_aggressive_loop_optimizations;
+      flag_aggressive_loop_optimizations = false;
+      estimate_numbers_of_iterations (loop);
+      flag_aggressive_loop_optimizations = saved;
+    }
+
   tree iv0_niters = NULL_TREE;
   if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
                              op0, &iv0, safe ? &iv0_niters : NULL, 
false))

or

diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index 61d72c278a1..d1af89eb459 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -518,7 +518,8 @@ set_scalar_evolution (basic_block instantiated_below, 
tree scalar, tree chrec)
        nb_set_scev++;
     }
 
-  *scalar_info = chrec;
+  if (*scalar_info == chrec_not_analyzed_yet)
+    *scalar_info = chrec;
 }
 
 /* Retrieve the chrec associated to SCALAR instantiated below

That said, having the cycles is bad, we should definitively break
them (if only for compile-time reasons).  But I don't really
understand how your patch helps and my attempts do not which
means I am missing a critical piece of understanding ... :/

Thanks,
Richard.

> Thanks again.
> 
> BR,
> Jiufu
> 
> > to SCEV and thus may recurse again - to me it would be more
> > logical to try avoid recursing in number_of_latch_executions by
> > setting ->nb_iterations to something early, maybe chrec_dont_know,
> > to signal we're using something we're just trying to compute.
> > 
> > Richard.
> > 
> >> BR,
> >> Jiufu
> >> 
> >> gcc/ChangeLog:
> >> 
> >>  * tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions):
> >>  Disable/restore flag_aggressive_loop_optimizations.
> >> 
> >> gcc/testsuite/ChangeLog:
> >> 
> >>  * gcc.dg/tree-ssa/scev-16.c: New test.
> >> 
> >> ---
> >>  gcc/tree-ssa-loop-niter.c               | 23 +++++++++++++++++++----
> >>  gcc/testsuite/gcc.dg/tree-ssa/scev-16.c | 20 ++++++++++++++++++++
> >>  2 files changed, 39 insertions(+), 4 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> 
> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> >> index 06954e437f5..51bb501019e 100644
> >> --- a/gcc/tree-ssa-loop-niter.c
> >> +++ b/gcc/tree-ssa-loop-niter.c
> >> @@ -2534,18 +2534,31 @@ number_of_iterations_exit_assumptions (class loop
> >> *loop, edge exit,
> >>        && !POINTER_TYPE_P (type))
> >>      return false;
> >> 
> >> +  /* Before niter is calculated, avoid to analyze interim state. */
> >> +  int old_aggressive_loop_optimizations =
> >> flag_aggressive_loop_optimizations;
> >> +  flag_aggressive_loop_optimizations = 0;
> >> +
> >>    tree iv0_niters = NULL_TREE;
> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
> >> 			      op0, &iv0, safe ? &iv0_niters : NULL, false))
> >> -    return number_of_iterations_popcount (loop, exit, code, niter);
> >> +    {
> >> +      bool res = number_of_iterations_popcount (loop, exit, code, niter);
> >> +      flag_aggressive_loop_optimizations =
> >> old_aggressive_loop_optimizations;
> >> +      return res;
> >> +    }
> >>    tree iv1_niters = NULL_TREE;
> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
> >> 			      op1, &iv1, safe ? &iv1_niters : NULL, false))
> >> -    return false;
> >> +    {
> >> +      flag_aggressive_loop_optimizations =
> >> old_aggressive_loop_optimizations;
> >> +      return false;
> >> +    }
> >>    /* Give up on complicated case.  */
> >>    if (iv0_niters && iv1_niters)
> >> -    return false;
> >> -
> >> +    {
> >> +      flag_aggressive_loop_optimizations =
> >> old_aggressive_loop_optimizations;
> >> +      return false;
> >> +    }
> >>    /* We don't want to see undefined signed overflow warnings while
> >>       computing the number of iterations.  */
> >>    fold_defer_overflow_warnings ();
> >> @@ -2565,6 +2578,7 @@ number_of_iterations_exit_assumptions (class loop
> >> *loop, edge exit,
> >>      				  only_exit_p, safe))
> >>      {
> >>        fold_undefer_and_ignore_overflow_warnings ();
> >> +      flag_aggressive_loop_optimizations =
> >> old_aggressive_loop_optimizations;
> >>        return false;
> >>      }
> >> 
> >> @@ -2608,6 +2622,7 @@ number_of_iterations_exit_assumptions (class loop
> >> *loop, edge exit,
> >>              niter->may_be_zero);
> >> 
> >>    fold_undefer_and_ignore_overflow_warnings ();
> >> +  flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
> >> 
> >>    /* If NITER has simplified into a constant, update MAX.  */
> >>    if (TREE_CODE (niter->niter) == INTEGER_CST)
> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> new file mode 100644
> >> index 00000000000..708ffab88ca
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> @@ -0,0 +1,20 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-options "-O2 -fdump-tree-ivopts-scev" } */
> >> +
> >> +/* Expect scalar_evolution = {(int) start_7(D), +, 1}_1), instead
> >> +   (int) (short int) {(unsigned short) start_7(D), +, 1}_1 */
> >> +
> >> +int arr[1000];
> >> +
> >> +void
> >> +s2i (short start, short end)
> >> +{
> >> +  int res = 0;
> >> +  for (short i = start; i < end; i++)
> >> +    {
> >> +      int lv = i;
> >> +      arr[lv] += lv;
> >> +    }
> >> +}
> >> +
> >> +/* { dg-final { scan-tree-dump-times "scalar_evolution = \{\\(int\\)
> >> start_\[0-9\]+\\(D\\), \\+, 1\}_1" 1 "ivopts" } } */
> >> 
> 
>
  
Jiufu Guo Jan. 14, 2022, 5:38 a.m. UTC | #4
Richard Biener <rguenther@suse.de> writes:

> On Thu, 13 Jan 2022, guojiufu wrote:
>
>> On 2022-01-03 22:30, Richard Biener wrote:
>> > On Wed, 22 Dec 2021, Jiufu Guo wrote:
>> > 
>> >> Hi,
>> >> ...
>> >> 
>> >> Bootstrap and regtest pass on ppc64* and x86_64.  Is this ok for trunk?
>> > 
>> > So this is a optimality fix, not a correctness one?  I suppose the
>> > estimates are computed/used from scev_probably_wraps_p via
>> > loop_exits_before_overflow and ultimatively chrec_convert.
>> > 
>> > We have a call cycle here,
>> > 
>> > estimate_numbers_of_iterations -> number_of_latch_executions ->
>> > ... -> estimate_numbers_of_iterations
>> > 
>> > where the first estimate_numbers_of_iterations will make sure
>> > the later call will immediately return.
>> 
>> Hi Richard,
>> Thanks for your comments! And sorry for the late reply.
>> 
>> In estimate_numbers_of_iterations, there is a guard to make sure
>> the second call to estimate_numbers_of_iterations returns
>> immediately.
>> 
>> Exactly as you said, it relates to scev_probably_wraps_p calls
>> loop_exits_before_overflow.
>> 
>> The issue is: the first calling to estimate_numbers_of_iterations
>> maybe inside number_of_latch_executions.
>> 
>> > 
>> > I'm not sure what your patch tries to do - it seems to tackle
>> > the case where we enter the cycle via number_of_latch_executions?
>> > Why do we get "non-final" values?  idx_infer_loop_bounds resorts
>> 
>> Right, when the call cycle starts from number_of_latch_execution,
>> the issue may occur:
>> 
>> number_of_latch_executions(*1st call)->..->
>> analyze_scalar_evolution(IVs 1st) ->..follow_ssa_edge_expr..->
>> loop_exits_before_overflow->
>> estimate_numbers_of_iterations (*1st call)->
>> number_of_latch_executions(*2nd call)->..->
>> analyze_scalar_evolution(IVs 2nd)->..loop_exits_before_overflow->
>> estimate_numbers_of_iterations(*2nd call)
>> 
>> The second calling to estimate_numbers_of_iterations returns quickly.
>> And then, in the first calling to estimate_numbers_of_iterations,
>> infer_loop_bounds_from_undefined is invoked.
>> 
>> And, function "infer_loop_bounds_from_undefined" instantiate/analyze
>> SCEV for each SSA in the loop.
>> *Here the issue occur*, these SCEVs are based on the interim IV's
>> SCEV which come from "analyze_scalar_evolution(IVs 2nd)",
>> and those IV's SCEV will be overridden by up level
>> "analyze_scalar_evolution(IVs 1st)".
>
> OK, so indeed analyze_scalar_evolution is not protected against
> recursive invocation on the same SSA name (though it definitely
> doesn't expect to do that).  We could fix that by pre-seeding
> the cache conservatively in analyze_scalar_evolution or by
> not overwriting the cached result of the recursive invocation.
>
> But to re-iterate an unanswered question, is this a correctness issue
> or an optimization issue?

Hi Richard,

Thanks for your time and patience on review this!

I would say it is an optimization issue for the current code,
it does not fix known error.

The patch could help compiling-time.  Another benefit, this patch
would be able to improve some scev(s) if the scev is cached in
infer_loop_bounds_from_undefined under the call stack where IV's
SCEV is under analyzing.

Yes, in analyze_scalar_evolution call chain, it may recursive on
same SSA name.
While outer level analyze_scalar_evolution 'may' get better
chrec(POLYNOMIAL_CHREC), inner one may get other scev (e.g.
conversion).  I'm even wondering this recursive is intended :).
It may help to handle the chick-egg issue(wrap vs. niter).

>
>> To handle this issue, disabling flag_aggressive_loop_optimizations
>> inside number_of_latch_executions is one method.
>> To avoid the issue in other cases, e.g. the call cycle starts from
>> number_of_iterations_exit or number_of_iterations_exit_assumptions,
>> this patch disable flag_aggressive_loop_optimizations inside
>> number_of_iterations_exit_assumptions.
>
> But disabling flag_aggressive_loop_optimizations is a very
> non-intuitive way of avoiding recursive calls.  I'd rather
> avoid those in a similar way estimate_numbers_of_iterations does,
> for example with
>
> diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
> index 61d72c278a1..cc1e510b6c2 100644
> --- a/gcc/tree-scalar-evolution.c
> +++ b/gcc/tree-scalar-evolution.c
> @@ -2807,7 +2807,7 @@ number_of_latch_executions (class loop *loop)
>    if (dump_file && (dump_flags & TDF_SCEV))
>      fprintf (dump_file, "(number_of_iterations_in_loop = \n");
>  
> -  res = chrec_dont_know;
> +  loop->nb_iterations = res = chrec_dont_know;
>    exit = single_exit (loop);
>  
>    if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
>
> though this doesn't seem to improve the SCEV analysis with your
> testcase.  Alternatively one could more conciously compute an
> "estimated" estimate like with
>
> diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
> index 61d72c278a1..8529c44d574 100644
> --- a/gcc/tree-scalar-evolution.c
> +++ b/gcc/tree-scalar-evolution.c
> @@ -2802,6 +2802,19 @@ number_of_latch_executions (class loop *loop)
>    if (res)
>      return res;
>  
> +  /* When estimates for this loop are not computed yet compute them
> +     without resorting to niter analysis results which means w/o
> +     flag_aggressive_loop_optimizations.  */
> +  if (loop->estimate_state == EST_NOT_COMPUTED)
> +    {
> +      bool saved_flag_aggressive_loop_optimizations
> +       = flag_aggressive_loop_optimizations;
> +      flag_aggressive_loop_optimizations = false;
> +      estimate_numbers_of_iterations (loop);
> +      flag_aggressive_loop_optimizations
> +       = saved_flag_aggressive_loop_optimizations;
> +    }
> +
>    may_be_zero = NULL_TREE;
>  
> but that also doesn't change your testcase for me.  Applying your
> patch does the trick but then I really wonder why.
>
> (get_scalar_evolution
>   (scalar = lv_10)
>   (scalar_evolution = {(int) start_7(D), +, 1}_1))
>
> from
>
>   <bb 3> [local count: 955630225]:
>   # i_15 = PHI <i_12(6), start_7(D)(5)>
>   lv_10 = (int) i_15;
>   i.1_3 = (unsigned short) i_15;
>   _4 = i.1_3 + 1;
>   i_12 = (short int) _4;
>
> where the 'i' IV can wrap when start >= end but that's ruled out
> by the entry test.  The scalar evolution for lv_10 would be
> wrong if we didn't conclude that so I guess this optimization
> is provided by the estimate somehow.

In the recursive chain: loop_distribution->number_of_latch_executions

analyze_scalar_evolution(i_15,1)->follow_ssa_edge_expr->chrec_convert
->loop_exits_before_overflow(1st)->estimate_numbers_of_iterations(1st)
->analyze_scalar_evolution(i_15,2)->chrec_convert->
loop_exits_before_overflow(2nd)->estimate_numbers_of_iterations(2nd)

1. Because estimate_numbers_of_iterations(2nd) returns quickly, then
loop_exits_before_overflow(2nd) returns false; and then
the inner analyze_scalar_evolution(i_15,2) gets
"(short int) {(unsigned short) start_7(D), +, 1}_1" for i_15.

2. When call stack back to loop_exits_before_overflow(1st), there is
some loop info (e.g. control_ivs) ready to check. It confirms no
overflows on the subject.  And when back to
analyze_scalar_evolution(i_15,1), it gets "{()start_7(D), +, 1}_1"
for i_15.

3. If flag_aggressive_loop_optimizations is on, lv_10's scev is
computed in 'estimate_numbers_of_iterations(1st)' through function
'infer_loop_bounds_from_undefined'.  At that time, i_15's scev
is still '(short int) {(unsigned short) start_7(D), +, 1}_1'.
And then "(int) (short int) {(unsigned short) start_7(D), +, 1}_1"
is cached for lv_10=(int)i_15.

>
> I also tried
>
> diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> index 935d2d4d8f3..d1787ba39b6 100644
> --- a/gcc/tree-ssa-loop-ivopts.c
> +++ b/gcc/tree-ssa-loop-ivopts.c
> @@ -8093,6 +8093,8 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, 
> class loop *loop,
>        fprintf (dump_file, "\n");
>      }
>  
> +  estimate_numbers_of_iterations (loop);
> +
>    body = get_loop_body (loop);
>    data->body_includes_call = loop_body_includes_call (body, 
> loop->num_nodes);
>    renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
>
> to get into the cycles from the "correct" entry but that does
> not help either.  Nor does
>
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index b767056aeb0..f058be04836 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -2534,6 +2534,14 @@ number_of_iterations_exit_assumptions (class loop 
> *loop, edge exit,
>        && !POINTER_TYPE_P (type))
>      return false;
>  
> +  if (loop->estimate_state == EST_NOT_COMPUTED)
> +    {
> +      bool saved = flag_aggressive_loop_optimizations;
> +      flag_aggressive_loop_optimizations = false;
> +      estimate_numbers_of_iterations (loop);
> +      flag_aggressive_loop_optimizations = saved;
> +    }
> +
>    tree iv0_niters = NULL_TREE;
>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>                               op0, &iv0, safe ? &iv0_niters : NULL, 
> false))
>
> or
>
> diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
> index 61d72c278a1..d1af89eb459 100644
> --- a/gcc/tree-scalar-evolution.c
> +++ b/gcc/tree-scalar-evolution.c
> @@ -518,7 +518,8 @@ set_scalar_evolution (basic_block instantiated_below, 
> tree scalar, tree chrec)
>         nb_set_scev++;
>      }
>  
> -  *scalar_info = chrec;
> +  if (*scalar_info == chrec_not_analyzed_yet)
> +    *scalar_info = chrec;
>  }
>  
>  /* Retrieve the chrec associated to SCALAR instantiated below
>
> That said, having the cycles is bad, we should definitively break
> them (if only for compile-time reasons).  But I don't really
> understand how your patch helps and my attempts do not which
> means I am missing a critical piece of understanding ... :/

This patch disables flag_aggressive_loop_optimizations before
analyze_scalar_evolution is called, then lv_10's scev is not
computed during this call cycle.  lv_10's scev is calculated
when it other passes (e.g. ivopt) request, at that time i_15
has 'final' scev.

I had also tried to disable recursive from analyze_scalar_evolution
on the same ssa name(i_15), and directly get the final result.  But
I'm not finding a good way yet.

Again thanks for your suggestions!

Thanks!
Jiufu

>
> Thanks,
> Richard.
>
>> Thanks again.
>> 
>> BR,
>> Jiufu
>> 
>> > to SCEV and thus may recurse again - to me it would be more
>> > logical to try avoid recursing in number_of_latch_executions by
>> > setting ->nb_iterations to something early, maybe chrec_dont_know,
>> > to signal we're using something we're just trying to compute.
>> > 
>> > Richard.
>> > 
>> >> BR,
>> >> Jiufu
>> >> 
>> >> gcc/ChangeLog:
>> >> 
>> >>  * tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions):
>> >>  Disable/restore flag_aggressive_loop_optimizations.
>> >> 
>> >> gcc/testsuite/ChangeLog:
>> >> 
>> >>  * gcc.dg/tree-ssa/scev-16.c: New test.
>> >> 
>> >> ---
>> >>  gcc/tree-ssa-loop-niter.c               | 23 +++++++++++++++++++----
>> >>  gcc/testsuite/gcc.dg/tree-ssa/scev-16.c | 20 ++++++++++++++++++++
>> >>  2 files changed, 39 insertions(+), 4 deletions(-)
>> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> >> 
>> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> >> index 06954e437f5..51bb501019e 100644
>> >> --- a/gcc/tree-ssa-loop-niter.c
>> >> +++ b/gcc/tree-ssa-loop-niter.c
>> >> @@ -2534,18 +2534,31 @@ number_of_iterations_exit_assumptions (class loop
>> >> *loop, edge exit,
>> >>        && !POINTER_TYPE_P (type))
>> >>      return false;
>> >> 
>> >> +  /* Before niter is calculated, avoid to analyze interim state. */
>> >> +  int old_aggressive_loop_optimizations =
>> >> flag_aggressive_loop_optimizations;
>> >> +  flag_aggressive_loop_optimizations = 0;
>> >> +
>> >>    tree iv0_niters = NULL_TREE;
>> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>> >> 			      op0, &iv0, safe ? &iv0_niters : NULL, false))
>> >> -    return number_of_iterations_popcount (loop, exit, code, niter);
>> >> +    {
>> >> +      bool res = number_of_iterations_popcount (loop, exit, code, niter);
>> >> +      flag_aggressive_loop_optimizations =
>> >> old_aggressive_loop_optimizations;
>> >> +      return res;
>> >> +    }
>> >>    tree iv1_niters = NULL_TREE;
>> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>> >> 			      op1, &iv1, safe ? &iv1_niters : NULL, false))
>> >> -    return false;
>> >> +    {
>> >> +      flag_aggressive_loop_optimizations =
>> >> old_aggressive_loop_optimizations;
>> >> +      return false;
>> >> +    }
>> >>    /* Give up on complicated case.  */
>> >>    if (iv0_niters && iv1_niters)
>> >> -    return false;
>> >> -
>> >> +    {
>> >> +      flag_aggressive_loop_optimizations =
>> >> old_aggressive_loop_optimizations;
>> >> +      return false;
>> >> +    }
>> >>    /* We don't want to see undefined signed overflow warnings while
>> >>       computing the number of iterations.  */
>> >>    fold_defer_overflow_warnings ();
>> >> @@ -2565,6 +2578,7 @@ number_of_iterations_exit_assumptions (class loop
>> >> *loop, edge exit,
>> >>      				  only_exit_p, safe))
>> >>      {
>> >>        fold_undefer_and_ignore_overflow_warnings ();
>> >> +      flag_aggressive_loop_optimizations =
>> >> old_aggressive_loop_optimizations;
>> >>        return false;
>> >>      }
>> >> 
>> >> @@ -2608,6 +2622,7 @@ number_of_iterations_exit_assumptions (class loop
>> >> *loop, edge exit,
>> >>              niter->may_be_zero);
>> >> 
>> >>    fold_undefer_and_ignore_overflow_warnings ();
>> >> +  flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
>> >> 
>> >>    /* If NITER has simplified into a constant, update MAX.  */
>> >>    if (TREE_CODE (niter->niter) == INTEGER_CST)
>> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> >> b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> >> new file mode 100644
>> >> index 00000000000..708ffab88ca
>> >> --- /dev/null
>> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> >> @@ -0,0 +1,20 @@
>> >> +/* { dg-do compile } */
>> >> +/* { dg-options "-O2 -fdump-tree-ivopts-scev" } */
>> >> +
>> >> +/* Expect scalar_evolution = {(int) start_7(D), +, 1}_1), instead
>> >> +   (int) (short int) {(unsigned short) start_7(D), +, 1}_1 */
>> >> +
>> >> +int arr[1000];
>> >> +
>> >> +void
>> >> +s2i (short start, short end)
>> >> +{
>> >> +  int res = 0;
>> >> +  for (short i = start; i < end; i++)
>> >> +    {
>> >> +      int lv = i;
>> >> +      arr[lv] += lv;
>> >> +    }
>> >> +}
>> >> +
>> >> +/* { dg-final { scan-tree-dump-times "scalar_evolution = \{\\(int\\)
>> >> start_\[0-9\]+\\(D\\), \\+, 1\}_1" 1 "ivopts" } } */
>> >> 
>> 
>>
  
Richard Biener Jan. 14, 2022, 12:04 p.m. UTC | #5
On Fri, 14 Jan 2022, Jiufu Guo wrote:

> Richard Biener <rguenther@suse.de> writes:
> 
> > On Thu, 13 Jan 2022, guojiufu wrote:
> >
> >> On 2022-01-03 22:30, Richard Biener wrote:
> >> > On Wed, 22 Dec 2021, Jiufu Guo wrote:
> >> > 
> >> >> Hi,
> >> >> ...
> >> >> 
> >> >> Bootstrap and regtest pass on ppc64* and x86_64.  Is this ok for trunk?
> >> > 
> >> > So this is a optimality fix, not a correctness one?  I suppose the
> >> > estimates are computed/used from scev_probably_wraps_p via
> >> > loop_exits_before_overflow and ultimatively chrec_convert.
> >> > 
> >> > We have a call cycle here,
> >> > 
> >> > estimate_numbers_of_iterations -> number_of_latch_executions ->
> >> > ... -> estimate_numbers_of_iterations
> >> > 
> >> > where the first estimate_numbers_of_iterations will make sure
> >> > the later call will immediately return.
> >> 
> >> Hi Richard,
> >> Thanks for your comments! And sorry for the late reply.
> >> 
> >> In estimate_numbers_of_iterations, there is a guard to make sure
> >> the second call to estimate_numbers_of_iterations returns
> >> immediately.
> >> 
> >> Exactly as you said, it relates to scev_probably_wraps_p calls
> >> loop_exits_before_overflow.
> >> 
> >> The issue is: the first calling to estimate_numbers_of_iterations
> >> maybe inside number_of_latch_executions.
> >> 
> >> > 
> >> > I'm not sure what your patch tries to do - it seems to tackle
> >> > the case where we enter the cycle via number_of_latch_executions?
> >> > Why do we get "non-final" values?  idx_infer_loop_bounds resorts
> >> 
> >> Right, when the call cycle starts from number_of_latch_execution,
> >> the issue may occur:
> >> 
> >> number_of_latch_executions(*1st call)->..->
> >> analyze_scalar_evolution(IVs 1st) ->..follow_ssa_edge_expr..->
> >> loop_exits_before_overflow->
> >> estimate_numbers_of_iterations (*1st call)->
> >> number_of_latch_executions(*2nd call)->..->
> >> analyze_scalar_evolution(IVs 2nd)->..loop_exits_before_overflow->
> >> estimate_numbers_of_iterations(*2nd call)
> >> 
> >> The second calling to estimate_numbers_of_iterations returns quickly.
> >> And then, in the first calling to estimate_numbers_of_iterations,
> >> infer_loop_bounds_from_undefined is invoked.
> >> 
> >> And, function "infer_loop_bounds_from_undefined" instantiate/analyze
> >> SCEV for each SSA in the loop.
> >> *Here the issue occur*, these SCEVs are based on the interim IV's
> >> SCEV which come from "analyze_scalar_evolution(IVs 2nd)",
> >> and those IV's SCEV will be overridden by up level
> >> "analyze_scalar_evolution(IVs 1st)".
> >
> > OK, so indeed analyze_scalar_evolution is not protected against
> > recursive invocation on the same SSA name (though it definitely
> > doesn't expect to do that).  We could fix that by pre-seeding
> > the cache conservatively in analyze_scalar_evolution or by
> > not overwriting the cached result of the recursive invocation.
> >
> > But to re-iterate an unanswered question, is this a correctness issue
> > or an optimization issue?
> 
> Hi Richard,
> 
> Thanks for your time and patience on review this!
> 
> I would say it is an optimization issue for the current code,
> it does not fix known error.
> 
> The patch could help compiling-time.  Another benefit, this patch
> would be able to improve some scev(s) if the scev is cached in
> infer_loop_bounds_from_undefined under the call stack where IV's
> SCEV is under analyzing.
> 
> Yes, in analyze_scalar_evolution call chain, it may recursive on
> same SSA name.
> While outer level analyze_scalar_evolution 'may' get better
> chrec(POLYNOMIAL_CHREC), inner one may get other scev (e.g.
> conversion).  I'm even wondering this recursive is intended :).
> It may help to handle the chick-egg issue(wrap vs. niter).
> 
> >
> >> To handle this issue, disabling flag_aggressive_loop_optimizations
> >> inside number_of_latch_executions is one method.
> >> To avoid the issue in other cases, e.g. the call cycle starts from
> >> number_of_iterations_exit or number_of_iterations_exit_assumptions,
> >> this patch disable flag_aggressive_loop_optimizations inside
> >> number_of_iterations_exit_assumptions.
> >
> > But disabling flag_aggressive_loop_optimizations is a very
> > non-intuitive way of avoiding recursive calls.  I'd rather
> > avoid those in a similar way estimate_numbers_of_iterations does,
> > for example with
> >
> > diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
> > index 61d72c278a1..cc1e510b6c2 100644
> > --- a/gcc/tree-scalar-evolution.c
> > +++ b/gcc/tree-scalar-evolution.c
> > @@ -2807,7 +2807,7 @@ number_of_latch_executions (class loop *loop)
> >    if (dump_file && (dump_flags & TDF_SCEV))
> >      fprintf (dump_file, "(number_of_iterations_in_loop = \n");
> >  
> > -  res = chrec_dont_know;
> > +  loop->nb_iterations = res = chrec_dont_know;
> >    exit = single_exit (loop);
> >  
> >    if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
> >
> > though this doesn't seem to improve the SCEV analysis with your
> > testcase.  Alternatively one could more conciously compute an
> > "estimated" estimate like with
> >
> > diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
> > index 61d72c278a1..8529c44d574 100644
> > --- a/gcc/tree-scalar-evolution.c
> > +++ b/gcc/tree-scalar-evolution.c
> > @@ -2802,6 +2802,19 @@ number_of_latch_executions (class loop *loop)
> >    if (res)
> >      return res;
> >  
> > +  /* When estimates for this loop are not computed yet compute them
> > +     without resorting to niter analysis results which means w/o
> > +     flag_aggressive_loop_optimizations.  */
> > +  if (loop->estimate_state == EST_NOT_COMPUTED)
> > +    {
> > +      bool saved_flag_aggressive_loop_optimizations
> > +       = flag_aggressive_loop_optimizations;
> > +      flag_aggressive_loop_optimizations = false;
> > +      estimate_numbers_of_iterations (loop);
> > +      flag_aggressive_loop_optimizations
> > +       = saved_flag_aggressive_loop_optimizations;
> > +    }
> > +
> >    may_be_zero = NULL_TREE;
> >  
> > but that also doesn't change your testcase for me.  Applying your
> > patch does the trick but then I really wonder why.
> >
> > (get_scalar_evolution
> >   (scalar = lv_10)
> >   (scalar_evolution = {(int) start_7(D), +, 1}_1))
> >
> > from
> >
> >   <bb 3> [local count: 955630225]:
> >   # i_15 = PHI <i_12(6), start_7(D)(5)>
> >   lv_10 = (int) i_15;
> >   i.1_3 = (unsigned short) i_15;
> >   _4 = i.1_3 + 1;
> >   i_12 = (short int) _4;
> >
> > where the 'i' IV can wrap when start >= end but that's ruled out
> > by the entry test.  The scalar evolution for lv_10 would be
> > wrong if we didn't conclude that so I guess this optimization
> > is provided by the estimate somehow.
> 
> In the recursive chain: loop_distribution->number_of_latch_executions
> 
> analyze_scalar_evolution(i_15,1)->follow_ssa_edge_expr->chrec_convert
> ->loop_exits_before_overflow(1st)->estimate_numbers_of_iterations(1st)
> ->analyze_scalar_evolution(i_15,2)->chrec_convert->
> loop_exits_before_overflow(2nd)->estimate_numbers_of_iterations(2nd)
> 
> 1. Because estimate_numbers_of_iterations(2nd) returns quickly, then
> loop_exits_before_overflow(2nd) returns false; and then
> the inner analyze_scalar_evolution(i_15,2) gets
> "(short int) {(unsigned short) start_7(D), +, 1}_1" for i_15.
> 
> 2. When call stack back to loop_exits_before_overflow(1st), there is
> some loop info (e.g. control_ivs) ready to check. It confirms no
> overflows on the subject.  And when back to
> analyze_scalar_evolution(i_15,1), it gets "{()start_7(D), +, 1}_1"
> for i_15.
> 
> 3. If flag_aggressive_loop_optimizations is on, lv_10's scev is
> computed in 'estimate_numbers_of_iterations(1st)' through function
> 'infer_loop_bounds_from_undefined'.  At that time, i_15's scev
> is still '(short int) {(unsigned short) start_7(D), +, 1}_1'.
> And then "(int) (short int) {(unsigned short) start_7(D), +, 1}_1"
> is cached for lv_10=(int)i_15.

I see, so it's important to not compute and cache the info for lv_10
too early (as side-effect of infer_loop_bounds_from_undefined, which
does scalar evolution analysis for each variable).  While we
override the global caching of analyze_scalar_evolution the per
SSA name cache for SCEV analysis isn't overridden.  That also means
computing the estimates early will break your patch (I've
tested calling estimate_numbers_of_iterations explicitely from
loop distribution for example).

What I'm trying to see is whether we can do some more concious
setup of control IV computation and estimate computation.  While
your patch produces the desired result it is far from obvious
why exactly it does this since it also does it in a "midlevel"
place (of course my attempts to do it in a more obvious position
failed).

But first of all yes, we should disallow / early out on recursions
via public APIs like estimate_numbers_of_iterations (already done)
or number_of_latch_executions (missing) or 
number_of_iterations_exit[_assumptions] (very difficult there).

IMHO that we lazily compute estimate_numbers_of_iterations and that
computes niter for each exit of a loop is a misdesign - the estimate
should be computed from the toplevel, and maybe independently for
each exit?  I think that Honza changed it this way to make sure the
estimates are always available when queried but I may mis-remember.

With your patch, ontop of that limiting recursion of
number_of_latch_executions doesn't break the positive effect
at least.

One unwanted side-effect of your patch might be that the
computed estimate is now recorded w/o infer_loop_bounds_from_undefined
which means it could be worse (and we won't re-compute it).

All in all it is somewhat of a mess and I'm not convinced your
patch is an improvement in this regard :/  It looks like there's
a chicken and egg problem with using and recording (at least one)
control IV and SCEV caching whilst searching for one.

Richard.


> >
> > I also tried
> >
> > diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> > index 935d2d4d8f3..d1787ba39b6 100644
> > --- a/gcc/tree-ssa-loop-ivopts.c
> > +++ b/gcc/tree-ssa-loop-ivopts.c
> > @@ -8093,6 +8093,8 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, 
> > class loop *loop,
> >        fprintf (dump_file, "\n");
> >      }
> >  
> > +  estimate_numbers_of_iterations (loop);
> > +
> >    body = get_loop_body (loop);
> >    data->body_includes_call = loop_body_includes_call (body, 
> > loop->num_nodes);
> >    renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
> >
> > to get into the cycles from the "correct" entry but that does
> > not help either.  Nor does
> >
> > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> > index b767056aeb0..f058be04836 100644
> > --- a/gcc/tree-ssa-loop-niter.c
> > +++ b/gcc/tree-ssa-loop-niter.c
> > @@ -2534,6 +2534,14 @@ number_of_iterations_exit_assumptions (class loop 
> > *loop, edge exit,
> >        && !POINTER_TYPE_P (type))
> >      return false;
> >  
> > +  if (loop->estimate_state == EST_NOT_COMPUTED)
> > +    {
> > +      bool saved = flag_aggressive_loop_optimizations;
> > +      flag_aggressive_loop_optimizations = false;
> > +      estimate_numbers_of_iterations (loop);
> > +      flag_aggressive_loop_optimizations = saved;
> > +    }
> > +
> >    tree iv0_niters = NULL_TREE;
> >    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
> >                               op0, &iv0, safe ? &iv0_niters : NULL, 
> > false))
> >
> > or
> >
> > diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
> > index 61d72c278a1..d1af89eb459 100644
> > --- a/gcc/tree-scalar-evolution.c
> > +++ b/gcc/tree-scalar-evolution.c
> > @@ -518,7 +518,8 @@ set_scalar_evolution (basic_block instantiated_below, 
> > tree scalar, tree chrec)
> >         nb_set_scev++;
> >      }
> >  
> > -  *scalar_info = chrec;
> > +  if (*scalar_info == chrec_not_analyzed_yet)
> > +    *scalar_info = chrec;
> >  }
> >  
> >  /* Retrieve the chrec associated to SCALAR instantiated below
> >
> > That said, having the cycles is bad, we should definitively break
> > them (if only for compile-time reasons).  But I don't really
> > understand how your patch helps and my attempts do not which
> > means I am missing a critical piece of understanding ... :/
> 
> This patch disables flag_aggressive_loop_optimizations before
> analyze_scalar_evolution is called, then lv_10's scev is not
> computed during this call cycle.  lv_10's scev is calculated
> when it other passes (e.g. ivopt) request, at that time i_15
> has 'final' scev.
> 
> I had also tried to disable recursive from analyze_scalar_evolution
> on the same ssa name(i_15), and directly get the final result.  But
> I'm not finding a good way yet.
> 
> Again thanks for your suggestions!
> 
> Thanks!
> Jiufu
> 
> >
> > Thanks,
> > Richard.
> >
> >> Thanks again.
> >> 
> >> BR,
> >> Jiufu
> >> 
> >> > to SCEV and thus may recurse again - to me it would be more
> >> > logical to try avoid recursing in number_of_latch_executions by
> >> > setting ->nb_iterations to something early, maybe chrec_dont_know,
> >> > to signal we're using something we're just trying to compute.
> >> > 
> >> > Richard.
> >> > 
> >> >> BR,
> >> >> Jiufu
> >> >> 
> >> >> gcc/ChangeLog:
> >> >> 
> >> >>  * tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions):
> >> >>  Disable/restore flag_aggressive_loop_optimizations.
> >> >> 
> >> >> gcc/testsuite/ChangeLog:
> >> >> 
> >> >>  * gcc.dg/tree-ssa/scev-16.c: New test.
> >> >> 
> >> >> ---
> >> >>  gcc/tree-ssa-loop-niter.c               | 23 +++++++++++++++++++----
> >> >>  gcc/testsuite/gcc.dg/tree-ssa/scev-16.c | 20 ++++++++++++++++++++
> >> >>  2 files changed, 39 insertions(+), 4 deletions(-)
> >> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> >> 
> >> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> >> >> index 06954e437f5..51bb501019e 100644
> >> >> --- a/gcc/tree-ssa-loop-niter.c
> >> >> +++ b/gcc/tree-ssa-loop-niter.c
> >> >> @@ -2534,18 +2534,31 @@ number_of_iterations_exit_assumptions (class loop
> >> >> *loop, edge exit,
> >> >>        && !POINTER_TYPE_P (type))
> >> >>      return false;
> >> >> 
> >> >> +  /* Before niter is calculated, avoid to analyze interim state. */
> >> >> +  int old_aggressive_loop_optimizations =
> >> >> flag_aggressive_loop_optimizations;
> >> >> +  flag_aggressive_loop_optimizations = 0;
> >> >> +
> >> >>    tree iv0_niters = NULL_TREE;
> >> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
> >> >> 			      op0, &iv0, safe ? &iv0_niters : NULL, false))
> >> >> -    return number_of_iterations_popcount (loop, exit, code, niter);
> >> >> +    {
> >> >> +      bool res = number_of_iterations_popcount (loop, exit, code, niter);
> >> >> +      flag_aggressive_loop_optimizations =
> >> >> old_aggressive_loop_optimizations;
> >> >> +      return res;
> >> >> +    }
> >> >>    tree iv1_niters = NULL_TREE;
> >> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
> >> >> 			      op1, &iv1, safe ? &iv1_niters : NULL, false))
> >> >> -    return false;
> >> >> +    {
> >> >> +      flag_aggressive_loop_optimizations =
> >> >> old_aggressive_loop_optimizations;
> >> >> +      return false;
> >> >> +    }
> >> >>    /* Give up on complicated case.  */
> >> >>    if (iv0_niters && iv1_niters)
> >> >> -    return false;
> >> >> -
> >> >> +    {
> >> >> +      flag_aggressive_loop_optimizations =
> >> >> old_aggressive_loop_optimizations;
> >> >> +      return false;
> >> >> +    }
> >> >>    /* We don't want to see undefined signed overflow warnings while
> >> >>       computing the number of iterations.  */
> >> >>    fold_defer_overflow_warnings ();
> >> >> @@ -2565,6 +2578,7 @@ number_of_iterations_exit_assumptions (class loop
> >> >> *loop, edge exit,
> >> >>      				  only_exit_p, safe))
> >> >>      {
> >> >>        fold_undefer_and_ignore_overflow_warnings ();
> >> >> +      flag_aggressive_loop_optimizations =
> >> >> old_aggressive_loop_optimizations;
> >> >>        return false;
> >> >>      }
> >> >> 
> >> >> @@ -2608,6 +2622,7 @@ number_of_iterations_exit_assumptions (class loop
> >> >> *loop, edge exit,
> >> >>              niter->may_be_zero);
> >> >> 
> >> >>    fold_undefer_and_ignore_overflow_warnings ();
> >> >> +  flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
> >> >> 
> >> >>    /* If NITER has simplified into a constant, update MAX.  */
> >> >>    if (TREE_CODE (niter->niter) == INTEGER_CST)
> >> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> >> b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> >> new file mode 100644
> >> >> index 00000000000..708ffab88ca
> >> >> --- /dev/null
> >> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> >> @@ -0,0 +1,20 @@
> >> >> +/* { dg-do compile } */
> >> >> +/* { dg-options "-O2 -fdump-tree-ivopts-scev" } */
> >> >> +
> >> >> +/* Expect scalar_evolution = {(int) start_7(D), +, 1}_1), instead
> >> >> +   (int) (short int) {(unsigned short) start_7(D), +, 1}_1 */
> >> >> +
> >> >> +int arr[1000];
> >> >> +
> >> >> +void
> >> >> +s2i (short start, short end)
> >> >> +{
> >> >> +  int res = 0;
> >> >> +  for (short i = start; i < end; i++)
> >> >> +    {
> >> >> +      int lv = i;
> >> >> +      arr[lv] += lv;
> >> >> +    }
> >> >> +}
> >> >> +
> >> >> +/* { dg-final { scan-tree-dump-times "scalar_evolution = \{\\(int\\)
> >> >> start_\[0-9\]+\\(D\\), \\+, 1\}_1" 1 "ivopts" } } */
> >> >> 
> >> 
> >> 
>
  
Jiufu Guo Jan. 17, 2022, 2:05 p.m. UTC | #6
Richard Biener <rguenther@suse.de> writes:

> On Fri, 14 Jan 2022, Jiufu Guo wrote:
>
>> Richard Biener <rguenther@suse.de> writes:
>> 
>> > On Thu, 13 Jan 2022, guojiufu wrote:
>> >
>> >> On 2022-01-03 22:30, Richard Biener wrote:
>> >> > On Wed, 22 Dec 2021, Jiufu Guo wrote:
>> >> > 
>> >> >> Hi,
>> >> >> ...
>> >> >> 
>> >> >> Bootstrap and regtest pass on ppc64* and x86_64.  Is this ok for trunk?
>> >> > 
 may means infer_loop_bounds_from_undefined may not useful
before IV's scev is ready.

> override the global caching of analyze_scalar_evolution the per
> SSA name cache for SCEV analysis isn't overridden.  That also means
> computing the estimates early will break your patch (I've
> tested calling estimate_numbers_of_iterations explicitely from
> loop distribution for example).
Calling simple_iv_with_niters/simple_iv early may break the patch,
because only inside number_of_iterations_exit_assumptions, the flag
is disabled.

>
> What I'm trying to see is whether we can do some more concious
> setup of control IV computation and estimate computation.  While
> your patch produces the desired result it is far from obvious
> why exactly it does this since it also does it in a "midlevel"
> place (of course my attempts to do it in a more obvious position
> failed).
>
> But first of all yes, we should disallow / early out on recursions
> via public APIs like estimate_numbers_of_iterations (already done)
> or number_of_latch_executions (missing) or 
> number_of_iterations_exit[_assumptions] (very difficult there).

I'm wondering if we can set loop->nb_iterations=chrec_dont_know
or chrec_known in number_of_iterations_exit_assumption at the
begining, and use it as a guard to avoid recursions on them.

>
> IMHO that we lazily compute estimate_numbers_of_iterations and that
> computes niter for each exit of a loop is a misdesign - the estimate
> should be computed from the toplevel, and maybe independently for
> each exit?  I think that Honza changed it this way to make sure the
> estimates are always available when queried but I may mis-remember.
>
> With your patch, ontop of that limiting recursion of
> number_of_latch_executions doesn't break the positive effect
> at least.
>
> One unwanted side-effect of your patch might be that the
> computed estimate is now recorded w/o infer_loop_bounds_from_undefined
> which means it could be worse (and we won't re-compute it).
Yes, estimate was computed but infer_loop_bounds_from_undefined was
not called, and it will never be called before estimate is reset.

>
> All in all it is somewhat of a mess and I'm not convinced your
> patch is an improvement in this regard :/  It looks like there's
> a chicken and egg problem with using and recording (at least one)
> control IV and SCEV caching whilst searching for one.

Thanks for your comments, and welcome any sugguestions!

BR,
Jiufu

>
> Richard.
>
>
>> >
>> > I also tried
>> >
>> > diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
>> > index 935d2d4d8f3..d1787ba39b6 100644
>> > --- a/gcc/tree-ssa-loop-ivopts.c
>> > +++ b/gcc/tree-ssa-loop-ivopts.c
>> > @@ -8093,6 +8093,8 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, 
>> > class loop *loop,
>> >        fprintf (dump_file, "\n");
>> >      }
>> >  
>> > +  estimate_numbers_of_iterations (loop);
>> > +
>> >    body = get_loop_body (loop);
>> >    data->body_includes_call = loop_body_includes_call (body, 
>> > loop->num_nodes);
>> >    renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
>> >
>> > to get into the cycles from the "correct" entry but that does
>> > not help either.  Nor does
>> >
>> > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> > index b767056aeb0..f058be04836 100644
>> > --- a/gcc/tree-ssa-loop-niter.c
>> > +++ b/gcc/tree-ssa-loop-niter.c
>> > @@ -2534,6 +2534,14 @@ number_of_iterations_exit_assumptions (class loop 
>> > *loop, edge exit,
>> >        && !POINTER_TYPE_P (type))
>> >      return false;
>> >  
>> > +  if (loop->estimate_state == EST_NOT_COMPUTED)
>> > +    {
>> > +      bool saved = flag_aggressive_loop_optimizations;
>> > +      flag_aggressive_loop_optimizations = false;
>> > +      estimate_numbers_of_iterations (loop);
>> > +      flag_aggressive_loop_optimizations = saved;
>> > +    }
>> > +
>> >    tree iv0_niters = NULL_TREE;
>> >    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>> >                               op0, &iv0, safe ? &iv0_niters : NULL, 
>> > false))
>> >
>> > or
>> >
>> > diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
>> > index 61d72c278a1..d1af89eb459 100644
>> > --- a/gcc/tree-scalar-evolution.c
>> > +++ b/gcc/tree-scalar-evolution.c
>> > @@ -518,7 +518,8 @@ set_scalar_evolution (basic_block instantiated_below, 
>> > tree scalar, tree chrec)
>> >         nb_set_scev++;
>> >      }
>> >  
>> > -  *scalar_info = chrec;
>> > +  if (*scalar_info == chrec_not_analyzed_yet)
>> > +    *scalar_info = chrec;
>> >  }
>> >  
>> >  /* Retrieve the chrec associated to SCALAR instantiated below
>> >
>> > That said, having the cycles is bad, we should definitively break
>> > them (if only for compile-time reasons).  But I don't really
>> > understand how your patch helps and my attempts do not which
>> > means I am missing a critical piece of understanding ... :/
>> 
>> This patch disables flag_aggressive_loop_optimizations before
>> analyze_scalar_evolution is called, then lv_10's scev is not
>> computed during this call cycle.  lv_10's scev is calculated
>> when it other passes (e.g. ivopt) request, at that time i_15
>> has 'final' scev.
>> 
>> I had also tried to disable recursive from analyze_scalar_evolution
>> on the same ssa name(i_15), and directly get the final result.  But
>> I'm not finding a good way yet.
>> 
>> Again thanks for your suggestions!
>> 
>> Thanks!
>> Jiufu
>> 
>> >
>> > Thanks,
>> > Richard.
>> >
>> >> Thanks again.
>> >> 
>> >> BR,
>> >> Jiufu
>> >> 
>> >> > to SCEV and thus may recurse again - to me it would be more
>> >> > logical to try avoid recursing in number_of_latch_executions by
>> >> > setting ->nb_iterations to something early, maybe chrec_dont_know,
>> >> > to signal we're using something we're just trying to compute.
>> >> > 
>> >> > Richard.
>> >> > 
>> >> >> BR,
>> >> >> Jiufu
>> >> >> 
>> >> >> gcc/ChangeLog:
>> >> >> 
>> >> >>  * tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions):
>> >> >>  Disable/restore flag_aggressive_loop_optimizations.
>> >> >> 
>> >> >> gcc/testsuite/ChangeLog:
>> >> >> 
>> >> >>  * gcc.dg/tree-ssa/scev-16.c: New test.
>> >> >> 
>> >> >> ---
>> >> >>  gcc/tree-ssa-loop-niter.c               | 23 +++++++++++++++++++----
>> >> >>  gcc/testsuite/gcc.dg/tree-ssa/scev-16.c | 20 ++++++++++++++++++++
>> >> >>  2 files changed, 39 insertions(+), 4 deletions(-)
>> >> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> >> >> 
>> >> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
>> >> >> index 06954e437f5..51bb501019e 100644
>> >> >> --- a/gcc/tree-ssa-loop-niter.c
>> >> >> +++ b/gcc/tree-ssa-loop-niter.c
>> >> >> @@ -2534,18 +2534,31 @@ number_of_iterations_exit_assumptions (class loop
>> >> >> *loop, edge exit,
>> >> >>        && !POINTER_TYPE_P (type))
>> >> >>      return false;
>> >> >> 
>> >> >> +  /* Before niter is calculated, avoid to analyze interim state. */
>> >> >> +  int old_aggressive_loop_optimizations =
>> >> >> flag_aggressive_loop_optimizations;
>> >> >> +  flag_aggressive_loop_optimizations = 0;
>> >> >> +
>> >> >>    tree iv0_niters = NULL_TREE;
>> >> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>> >> >> 			      op0, &iv0, safe ? &iv0_niters : NULL, false))
>> >> >> -    return number_of_iterations_popcount (loop, exit, code, niter);
>> >> >> +    {
>> >> >> +      bool res = number_of_iterations_popcount (loop, exit, code, niter);
>> >> >> +      flag_aggressive_loop_optimizations =
>> >> >> old_aggressive_loop_optimizations;
>> >> >> +      return res;
>> >> >> +    }
>> >> >>    tree iv1_niters = NULL_TREE;
>> >> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
>> >> >> 			      op1, &iv1, safe ? &iv1_niters : NULL, false))
>> >> >> -    return false;
>> >> >> +    {
>> >> >> +      flag_aggressive_loop_optimizations =
>> >> >> old_aggressive_loop_optimizations;
>> >> >> +      return false;
>> >> >> +    }
>> >> >>    /* Give up on complicated case.  */
>> >> >>    if (iv0_niters && iv1_niters)
>> >> >> -    return false;
>> >> >> -
>> >> >> +    {
>> >> >> +      flag_aggressive_loop_optimizations =
>> >> >> old_aggressive_loop_optimizations;
>> >> >> +      return false;
>> >> >> +    }
>> >> >>    /* We don't want to see undefined signed overflow warnings while
>> >> >>       computing the number of iterations.  */
>> >> >>    fold_defer_overflow_warnings ();
>> >> >> @@ -2565,6 +2578,7 @@ number_of_iterations_exit_assumptions (class loop
>> >> >> *loop, edge exit,
>> >> >>      				  only_exit_p, safe))
>> >> >>      {
>> >> >>        fold_undefer_and_ignore_overflow_warnings ();
>> >> >> +      flag_aggressive_loop_optimizations =
>> >> >> old_aggressive_loop_optimizations;
>> >> >>        return false;
>> >> >>      }
>> >> >> 
>> >> >> @@ -2608,6 +2622,7 @@ number_of_iterations_exit_assumptions (class loop
>> >> >> *loop, edge exit,
>> >> >>              niter->may_be_zero);
>> >> >> 
>> >> >>    fold_undefer_and_ignore_overflow_warnings ();
>> >> >> +  flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
>> >> >> 
>> >> >>    /* If NITER has simplified into a constant, update MAX.  */
>> >> >>    if (TREE_CODE (niter->niter) == INTEGER_CST)
>> >> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> >> >> b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> >> >> new file mode 100644
>> >> >> index 00000000000..708ffab88ca
>> >> >> --- /dev/null
>> >> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
>> >> >> @@ -0,0 +1,20 @@
>> >> >> +/* { dg-do compile } */
>> >> >> +/* { dg-options "-O2 -fdump-tree-ivopts-scev" } */
>> >> >> +
>> >> >> +/* Expect scalar_evolution = {(int) start_7(D), +, 1}_1), instead
>> >> >> +   (int) (short int) {(unsigned short) start_7(D), +, 1}_1 */
>> >> >> +
>> >> >> +int arr[1000];
>> >> >> +
>> >> >> +void
>> >> >> +s2i (short start, short end)
>> >> >> +{
>> >> >> +  int res = 0;
>> >> >> +  for (short i = start; i < end; i++)
>> >> >> +    {
>> >> >> +      int lv = i;
>> >> >> +      arr[lv] += lv;
>> >> >> +    }
>> >> >> +}
>> >> >> +
>> >> >> +/* { dg-final { scan-tree-dump-times "scalar_evolution = \{\\(int\\)
>> >> >> start_\[0-9\]+\\(D\\), \\+, 1\}_1" 1 "ivopts" } } */
>> >> >> 
>> >> 
>> >> 
>>
  
Richard Biener Jan. 18, 2022, 10:30 a.m. UTC | #7
On Mon, 17 Jan 2022, Jiufu Guo wrote:

> Richard Biener <rguenther@suse.de> writes:
> 
> > On Fri, 14 Jan 2022, Jiufu Guo wrote:
> >
> >> Richard Biener <rguenther@suse.de> writes:
> >> 
> >> > On Thu, 13 Jan 2022, guojiufu wrote:
> >> >
> >> >> On 2022-01-03 22:30, Richard Biener wrote:
> >> >> > On Wed, 22 Dec 2021, Jiufu Guo wrote:
> >> >> > 
> >> >> >> Hi,
> >> >> >> ...
> >> >> >> 
> >> >> >> Bootstrap and regtest pass on ppc64* and x86_64.  Is this ok for trunk?
> >> >> > 
>  may means infer_loop_bounds_from_undefined may not useful
> before IV's scev is ready.

I think it's also SCEV does not get optimal results without
niter estimates (or rather filled control IVs).

> > override the global caching of analyze_scalar_evolution the per
> > SSA name cache for SCEV analysis isn't overridden.  That also means
> > computing the estimates early will break your patch (I've
> > tested calling estimate_numbers_of_iterations explicitely from
> > loop distribution for example).
> Calling simple_iv_with_niters/simple_iv early may break the patch,
> because only inside number_of_iterations_exit_assumptions, the flag
> is disabled.
> 
> >
> > What I'm trying to see is whether we can do some more concious
> > setup of control IV computation and estimate computation.  While
> > your patch produces the desired result it is far from obvious
> > why exactly it does this since it also does it in a "midlevel"
> > place (of course my attempts to do it in a more obvious position
> > failed).
> >
> > But first of all yes, we should disallow / early out on recursions
> > via public APIs like estimate_numbers_of_iterations (already done)
> > or number_of_latch_executions (missing) or 
> > number_of_iterations_exit[_assumptions] (very difficult there).
> 
> I'm wondering if we can set loop->nb_iterations=chrec_dont_know
> or chrec_known in number_of_iterations_exit_assumption at the
> begining, and use it as a guard to avoid recursions on them.

Yes, I tried that but in number_of_latch_executions which is the
one eventually setting loop->nb_iterations.  It's more difficult
to avoid recursing in number_of_iterations_exit, but in theory
we could add a niter specific edge flag like EDGE_NITER_IN_PROGRESS
we could set.

> >
> > IMHO that we lazily compute estimate_numbers_of_iterations and that
> > computes niter for each exit of a loop is a misdesign - the estimate
> > should be computed from the toplevel, and maybe independently for
> > each exit?  I think that Honza changed it this way to make sure the
> > estimates are always available when queried but I may mis-remember.
> >
> > With your patch, ontop of that limiting recursion of
> > number_of_latch_executions doesn't break the positive effect
> > at least.
> >
> > One unwanted side-effect of your patch might be that the
> > computed estimate is now recorded w/o infer_loop_bounds_from_undefined
> > which means it could be worse (and we won't re-compute it).
> Yes, estimate was computed but infer_loop_bounds_from_undefined was
> not called, and it will never be called before estimate is reset.
> 
> >
> > All in all it is somewhat of a mess and I'm not convinced your
> > patch is an improvement in this regard :/  It looks like there's
> > a chicken and egg problem with using and recording (at least one)
> > control IV and SCEV caching whilst searching for one.
> 
> Thanks for your comments, and welcome any sugguestions!

To address the missed optimization in the testcase I think we need
a way to roll back parts of SCEV caching so that
estimate_numbers_of_iterations can wipe all caching it caused
(and only that) after it is done.  To then get the maximal positive
effect we would also need to ensure that whenever we "visit" a loop
with SCEV we "first" do estimate_numbers_of_iterations (that's the
more difficult part I think, but maybe the less important one).

The first part involves the 'scalar_evolution_info' cache where
a solution would for example involve chaining the entries with
a 'next' pointer and keeping track of the last added entry so 
estimate_numbers_of_iterations can remember the last added entry
at start and remove those added.  So maybe add scev_{push,pop}_htab
functions (not sure if 'push' is a good name, the present entries
will still be used just after 'pop' all entries added between the
last push and the pop will be removed).

Doing that in estimate_numbers_of_iterations will cause some
extra work - maybe the function can pop() already after all
control IVs are recorded, so the infer_loop_bounds_from_undefined
work isn't wasted.

All of this is IMHO stage1 stuff now but would be really useful
to have.  Which is why I'm adding a note to my very large
RIRO (random-in-random-out ;)) TODO list - feel free to beat me
on it!

Thanks,
Richard.

> BR,
> Jiufu
> 
> >
> > Richard.
> >
> >
> >> >
> >> > I also tried
> >> >
> >> > diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> >> > index 935d2d4d8f3..d1787ba39b6 100644
> >> > --- a/gcc/tree-ssa-loop-ivopts.c
> >> > +++ b/gcc/tree-ssa-loop-ivopts.c
> >> > @@ -8093,6 +8093,8 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, 
> >> > class loop *loop,
> >> >        fprintf (dump_file, "\n");
> >> >      }
> >> >  
> >> > +  estimate_numbers_of_iterations (loop);
> >> > +
> >> >    body = get_loop_body (loop);
> >> >    data->body_includes_call = loop_body_includes_call (body, 
> >> > loop->num_nodes);
> >> >    renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
> >> >
> >> > to get into the cycles from the "correct" entry but that does
> >> > not help either.  Nor does
> >> >
> >> > diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> >> > index b767056aeb0..f058be04836 100644
> >> > --- a/gcc/tree-ssa-loop-niter.c
> >> > +++ b/gcc/tree-ssa-loop-niter.c
> >> > @@ -2534,6 +2534,14 @@ number_of_iterations_exit_assumptions (class loop 
> >> > *loop, edge exit,
> >> >        && !POINTER_TYPE_P (type))
> >> >      return false;
> >> >  
> >> > +  if (loop->estimate_state == EST_NOT_COMPUTED)
> >> > +    {
> >> > +      bool saved = flag_aggressive_loop_optimizations;
> >> > +      flag_aggressive_loop_optimizations = false;
> >> > +      estimate_numbers_of_iterations (loop);
> >> > +      flag_aggressive_loop_optimizations = saved;
> >> > +    }
> >> > +
> >> >    tree iv0_niters = NULL_TREE;
> >> >    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
> >> >                               op0, &iv0, safe ? &iv0_niters : NULL, 
> >> > false))
> >> >
> >> > or
> >> >
> >> > diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
> >> > index 61d72c278a1..d1af89eb459 100644
> >> > --- a/gcc/tree-scalar-evolution.c
> >> > +++ b/gcc/tree-scalar-evolution.c
> >> > @@ -518,7 +518,8 @@ set_scalar_evolution (basic_block instantiated_below, 
> >> > tree scalar, tree chrec)
> >> >         nb_set_scev++;
> >> >      }
> >> >  
> >> > -  *scalar_info = chrec;
> >> > +  if (*scalar_info == chrec_not_analyzed_yet)
> >> > +    *scalar_info = chrec;
> >> >  }
> >> >  
> >> >  /* Retrieve the chrec associated to SCALAR instantiated below
> >> >
> >> > That said, having the cycles is bad, we should definitively break
> >> > them (if only for compile-time reasons).  But I don't really
> >> > understand how your patch helps and my attempts do not which
> >> > means I am missing a critical piece of understanding ... :/
> >> 
> >> This patch disables flag_aggressive_loop_optimizations before
> >> analyze_scalar_evolution is called, then lv_10's scev is not
> >> computed during this call cycle.  lv_10's scev is calculated
> >> when it other passes (e.g. ivopt) request, at that time i_15
> >> has 'final' scev.
> >> 
> >> I had also tried to disable recursive from analyze_scalar_evolution
> >> on the same ssa name(i_15), and directly get the final result.  But
> >> I'm not finding a good way yet.
> >> 
> >> Again thanks for your suggestions!
> >> 
> >> Thanks!
> >> Jiufu
> >> 
> >> >
> >> > Thanks,
> >> > Richard.
> >> >
> >> >> Thanks again.
> >> >> 
> >> >> BR,
> >> >> Jiufu
> >> >> 
> >> >> > to SCEV and thus may recurse again - to me it would be more
> >> >> > logical to try avoid recursing in number_of_latch_executions by
> >> >> > setting ->nb_iterations to something early, maybe chrec_dont_know,
> >> >> > to signal we're using something we're just trying to compute.
> >> >> > 
> >> >> > Richard.
> >> >> > 
> >> >> >> BR,
> >> >> >> Jiufu
> >> >> >> 
> >> >> >> gcc/ChangeLog:
> >> >> >> 
> >> >> >>  * tree-ssa-loop-niter.c (number_of_iterations_exit_assumptions):
> >> >> >>  Disable/restore flag_aggressive_loop_optimizations.
> >> >> >> 
> >> >> >> gcc/testsuite/ChangeLog:
> >> >> >> 
> >> >> >>  * gcc.dg/tree-ssa/scev-16.c: New test.
> >> >> >> 
> >> >> >> ---
> >> >> >>  gcc/tree-ssa-loop-niter.c               | 23 +++++++++++++++++++----
> >> >> >>  gcc/testsuite/gcc.dg/tree-ssa/scev-16.c | 20 ++++++++++++++++++++
> >> >> >>  2 files changed, 39 insertions(+), 4 deletions(-)
> >> >> >>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> >> >> 
> >> >> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> >> >> >> index 06954e437f5..51bb501019e 100644
> >> >> >> --- a/gcc/tree-ssa-loop-niter.c
> >> >> >> +++ b/gcc/tree-ssa-loop-niter.c
> >> >> >> @@ -2534,18 +2534,31 @@ number_of_iterations_exit_assumptions (class loop
> >> >> >> *loop, edge exit,
> >> >> >>        && !POINTER_TYPE_P (type))
> >> >> >>      return false;
> >> >> >> 
> >> >> >> +  /* Before niter is calculated, avoid to analyze interim state. */
> >> >> >> +  int old_aggressive_loop_optimizations =
> >> >> >> flag_aggressive_loop_optimizations;
> >> >> >> +  flag_aggressive_loop_optimizations = 0;
> >> >> >> +
> >> >> >>    tree iv0_niters = NULL_TREE;
> >> >> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
> >> >> >> 			      op0, &iv0, safe ? &iv0_niters : NULL, false))
> >> >> >> -    return number_of_iterations_popcount (loop, exit, code, niter);
> >> >> >> +    {
> >> >> >> +      bool res = number_of_iterations_popcount (loop, exit, code, niter);
> >> >> >> +      flag_aggressive_loop_optimizations =
> >> >> >> old_aggressive_loop_optimizations;
> >> >> >> +      return res;
> >> >> >> +    }
> >> >> >>    tree iv1_niters = NULL_TREE;
> >> >> >>    if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
> >> >> >> 			      op1, &iv1, safe ? &iv1_niters : NULL, false))
> >> >> >> -    return false;
> >> >> >> +    {
> >> >> >> +      flag_aggressive_loop_optimizations =
> >> >> >> old_aggressive_loop_optimizations;
> >> >> >> +      return false;
> >> >> >> +    }
> >> >> >>    /* Give up on complicated case.  */
> >> >> >>    if (iv0_niters && iv1_niters)
> >> >> >> -    return false;
> >> >> >> -
> >> >> >> +    {
> >> >> >> +      flag_aggressive_loop_optimizations =
> >> >> >> old_aggressive_loop_optimizations;
> >> >> >> +      return false;
> >> >> >> +    }
> >> >> >>    /* We don't want to see undefined signed overflow warnings while
> >> >> >>       computing the number of iterations.  */
> >> >> >>    fold_defer_overflow_warnings ();
> >> >> >> @@ -2565,6 +2578,7 @@ number_of_iterations_exit_assumptions (class loop
> >> >> >> *loop, edge exit,
> >> >> >>      				  only_exit_p, safe))
> >> >> >>      {
> >> >> >>        fold_undefer_and_ignore_overflow_warnings ();
> >> >> >> +      flag_aggressive_loop_optimizations =
> >> >> >> old_aggressive_loop_optimizations;
> >> >> >>        return false;
> >> >> >>      }
> >> >> >> 
> >> >> >> @@ -2608,6 +2622,7 @@ number_of_iterations_exit_assumptions (class loop
> >> >> >> *loop, edge exit,
> >> >> >>              niter->may_be_zero);
> >> >> >> 
> >> >> >>    fold_undefer_and_ignore_overflow_warnings ();
> >> >> >> +  flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
> >> >> >> 
> >> >> >>    /* If NITER has simplified into a constant, update MAX.  */
> >> >> >>    if (TREE_CODE (niter->niter) == INTEGER_CST)
> >> >> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> >> >> b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> >> >> new file mode 100644
> >> >> >> index 00000000000..708ffab88ca
> >> >> >> --- /dev/null
> >> >> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
> >> >> >> @@ -0,0 +1,20 @@
> >> >> >> +/* { dg-do compile } */
> >> >> >> +/* { dg-options "-O2 -fdump-tree-ivopts-scev" } */
> >> >> >> +
> >> >> >> +/* Expect scalar_evolution = {(int) start_7(D), +, 1}_1), instead
> >> >> >> +   (int) (short int) {(unsigned short) start_7(D), +, 1}_1 */
> >> >> >> +
> >> >> >> +int arr[1000];
> >> >> >> +
> >> >> >> +void
> >> >> >> +s2i (short start, short end)
> >> >> >> +{
> >> >> >> +  int res = 0;
> >> >> >> +  for (short i = start; i < end; i++)
> >> >> >> +    {
> >> >> >> +      int lv = i;
> >> >> >> +      arr[lv] += lv;
> >> >> >> +    }
> >> >> >> +}
> >> >> >> +
> >> >> >> +/* { dg-final { scan-tree-dump-times "scalar_evolution = \{\\(int\\)
> >> >> >> start_\[0-9\]+\\(D\\), \\+, 1\}_1" 1 "ivopts" } } */
> >> >> >> 
> >> >> 
> >> >> 
> >> 
>
  
Richard Biener Jan. 18, 2022, 11 a.m. UTC | #8
On Tue, 18 Jan 2022, Richard Biener wrote:

> On Mon, 17 Jan 2022, Jiufu Guo wrote:
> 
> > Richard Biener <rguenther@suse.de> writes:
> > 
> > > On Fri, 14 Jan 2022, Jiufu Guo wrote:
> > >
> > >> Richard Biener <rguenther@suse.de> writes:
> > >> 
> > >> > On Thu, 13 Jan 2022, guojiufu wrote:
> > >> >
> > >> >> On 2022-01-03 22:30, Richard Biener wrote:
> > >> >> > On Wed, 22 Dec 2021, Jiufu Guo wrote:
> > >> >> > 
> > >> >> >> Hi,
> > >> >> >> ...
> > >> >> >> 
> > >> >> >> Bootstrap and regtest pass on ppc64* and x86_64.  Is this ok for trunk?
> > >> >> > 
> >  may means infer_loop_bounds_from_undefined may not useful
> > before IV's scev is ready.
> 
> I think it's also SCEV does not get optimal results without
> niter estimates (or rather filled control IVs).
> 
> > > override the global caching of analyze_scalar_evolution the per
> > > SSA name cache for SCEV analysis isn't overridden.  That also means
> > > computing the estimates early will break your patch (I've
> > > tested calling estimate_numbers_of_iterations explicitely from
> > > loop distribution for example).
> > Calling simple_iv_with_niters/simple_iv early may break the patch,
> > because only inside number_of_iterations_exit_assumptions, the flag
> > is disabled.
> > 
> > >
> > > What I'm trying to see is whether we can do some more concious
> > > setup of control IV computation and estimate computation.  While
> > > your patch produces the desired result it is far from obvious
> > > why exactly it does this since it also does it in a "midlevel"
> > > place (of course my attempts to do it in a more obvious position
> > > failed).
> > >
> > > But first of all yes, we should disallow / early out on recursions
> > > via public APIs like estimate_numbers_of_iterations (already done)
> > > or number_of_latch_executions (missing) or 
> > > number_of_iterations_exit[_assumptions] (very difficult there).
> > 
> > I'm wondering if we can set loop->nb_iterations=chrec_dont_know
> > or chrec_known in number_of_iterations_exit_assumption at the
> > begining, and use it as a guard to avoid recursions on them.
> 
> Yes, I tried that but in number_of_latch_executions which is the
> one eventually setting loop->nb_iterations.  It's more difficult
> to avoid recursing in number_of_iterations_exit, but in theory
> we could add a niter specific edge flag like EDGE_NITER_IN_PROGRESS
> we could set.
> 
> > >
> > > IMHO that we lazily compute estimate_numbers_of_iterations and that
> > > computes niter for each exit of a loop is a misdesign - the estimate
> > > should be computed from the toplevel, and maybe independently for
> > > each exit?  I think that Honza changed it this way to make sure the
> > > estimates are always available when queried but I may mis-remember.
> > >
> > > With your patch, ontop of that limiting recursion of
> > > number_of_latch_executions doesn't break the positive effect
> > > at least.
> > >
> > > One unwanted side-effect of your patch might be that the
> > > computed estimate is now recorded w/o infer_loop_bounds_from_undefined
> > > which means it could be worse (and we won't re-compute it).
> > Yes, estimate was computed but infer_loop_bounds_from_undefined was
> > not called, and it will never be called before estimate is reset.
> > 
> > >
> > > All in all it is somewhat of a mess and I'm not convinced your
> > > patch is an improvement in this regard :/  It looks like there's
> > > a chicken and egg problem with using and recording (at least one)
> > > control IV and SCEV caching whilst searching for one.
> > 
> > Thanks for your comments, and welcome any sugguestions!
> 
> To address the missed optimization in the testcase I think we need
> a way to roll back parts of SCEV caching so that
> estimate_numbers_of_iterations can wipe all caching it caused
> (and only that) after it is done.  To then get the maximal positive
> effect we would also need to ensure that whenever we "visit" a loop
> with SCEV we "first" do estimate_numbers_of_iterations (that's the
> more difficult part I think, but maybe the less important one).
> 
> The first part involves the 'scalar_evolution_info' cache where
> a solution would for example involve chaining the entries with
> a 'next' pointer and keeping track of the last added entry so 
> estimate_numbers_of_iterations can remember the last added entry
> at start and remove those added.  So maybe add scev_{push,pop}_htab
> functions (not sure if 'push' is a good name, the present entries
> will still be used just after 'pop' all entries added between the
> last push and the pop will be removed).
> 
> Doing that in estimate_numbers_of_iterations will cause some
> extra work - maybe the function can pop() already after all
> control IVs are recorded, so the infer_loop_bounds_from_undefined
> work isn't wasted.
> 
> All of this is IMHO stage1 stuff now but would be really useful
> to have.  Which is why I'm adding a note to my very large
> RIRO (random-in-random-out ;)) TODO list - feel free to beat me
> on it!

Btw, in the least efficient way the effect can be simulated by
calling scev_reset_htab from estimate_numbers_of_iterations.

Richard.
  

Patch

diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index 06954e437f5..51bb501019e 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -2534,18 +2534,31 @@  number_of_iterations_exit_assumptions (class loop *loop, edge exit,
       && !POINTER_TYPE_P (type))
     return false;
 
+  /* Before niter is calculated, avoid to analyze interim state. */
+  int old_aggressive_loop_optimizations = flag_aggressive_loop_optimizations;
+  flag_aggressive_loop_optimizations = 0;
+
   tree iv0_niters = NULL_TREE;
   if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
 			      op0, &iv0, safe ? &iv0_niters : NULL, false))
-    return number_of_iterations_popcount (loop, exit, code, niter);
+    {
+      bool res = number_of_iterations_popcount (loop, exit, code, niter);
+      flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
+      return res;
+    }
   tree iv1_niters = NULL_TREE;
   if (!simple_iv_with_niters (loop, loop_containing_stmt (stmt),
 			      op1, &iv1, safe ? &iv1_niters : NULL, false))
-    return false;
+    {
+      flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
+      return false;
+    }
   /* Give up on complicated case.  */
   if (iv0_niters && iv1_niters)
-    return false;
-
+    {
+      flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
+      return false;
+    }
   /* We don't want to see undefined signed overflow warnings while
      computing the number of iterations.  */
   fold_defer_overflow_warnings ();
@@ -2565,6 +2578,7 @@  number_of_iterations_exit_assumptions (class loop *loop, edge exit,
 				  only_exit_p, safe))
     {
       fold_undefer_and_ignore_overflow_warnings ();
+      flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
       return false;
     }
 
@@ -2608,6 +2622,7 @@  number_of_iterations_exit_assumptions (class loop *loop, edge exit,
 					       niter->may_be_zero);
 
   fold_undefer_and_ignore_overflow_warnings ();
+  flag_aggressive_loop_optimizations = old_aggressive_loop_optimizations;
 
   /* If NITER has simplified into a constant, update MAX.  */
   if (TREE_CODE (niter->niter) == INTEGER_CST)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
new file mode 100644
index 00000000000..708ffab88ca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/scev-16.c
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-ivopts-scev" } */
+
+/* Expect scalar_evolution = {(int) start_7(D), +, 1}_1), instead
+   (int) (short int) {(unsigned short) start_7(D), +, 1}_1 */
+
+int arr[1000];
+
+void
+s2i (short start, short end)
+{
+  int res = 0;
+  for (short i = start; i < end; i++)
+    {
+      int lv = i;
+      arr[lv] += lv;
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "scalar_evolution = \{\\(int\\) start_\[0-9\]+\\(D\\), \\+, 1\}_1" 1 "ivopts" } } */