[V2] VECT: Change flow of decrement IV

Message ID 20230531150819.136379-1-juzhe.zhong@rivai.ai
State Committed
Commit bffc52838e393a775e13dc48162669b0f43ebe09
Headers
Series [V2] VECT: Change flow of decrement IV |

Commit Message

juzhe.zhong@rivai.ai May 31, 2023, 3:08 p.m. UTC
  From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>

Follow Richi's suggestion, I change current decrement IV flow from:

do {
   remain -= MIN (vf, remain);
} while (remain != 0);

into:

do {
   old_remain = remain;
   len = MIN (vf, remain);
   remain -= vf;
} while (old_remain >= vf);

to enhance SCEV.

Include fixes from kewen.


This patch will need to wait for Kewen's test feedback.

Testing on X86 is on-going

Co-Authored by: Kewen Lin  <linkw@linux.ibm.com>

gcc/ChangeLog:

        * tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Change decrement IV flow.
        (vect_set_loop_condition_partial_vectors): Ditto.

---
 gcc/tree-vect-loop-manip.cc | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)
  

Comments

juzhe.zhong@rivai.ai June 1, 2023, 12:31 a.m. UTC | #1
Bootstrapped and Regression on X86 no surprise different.

Looking forward Kewen's test report for this patch.

Thanks.


juzhe.zhong@rivai.ai
 
From: juzhe.zhong
Date: 2023-05-31 23:08
To: gcc-patches
CC: richard.sandiford; rguenther; linkw; Ju-Zhe Zhong
Subject: [PATCH V2] VECT: Change flow of decrement IV
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
 
Follow Richi's suggestion, I change current decrement IV flow from:
 
do {
   remain -= MIN (vf, remain);
} while (remain != 0);
 
into:
 
do {
   old_remain = remain;
   len = MIN (vf, remain);
   remain -= vf;
} while (old_remain >= vf);
 
to enhance SCEV.
 
Include fixes from kewen.
 
 
This patch will need to wait for Kewen's test feedback.
 
Testing on X86 is on-going
 
Co-Authored by: Kewen Lin  <linkw@linux.ibm.com>
 
gcc/ChangeLog:
 
        * tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Change decrement IV flow.
        (vect_set_loop_condition_partial_vectors): Ditto.
 
---
gcc/tree-vect-loop-manip.cc | 36 +++++++++++++++++++++++++-----------
1 file changed, 25 insertions(+), 11 deletions(-)
 
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index acf3642ceb2..3f735945e67 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -483,7 +483,7 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
gimple_stmt_iterator loop_cond_gsi,
rgroup_controls *rgc, tree niters,
tree niters_skip, bool might_wrap_p,
- tree *iv_step)
+ tree *iv_step, tree *compare_step)
{
   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
@@ -538,9 +538,9 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
   ...
   vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
   ...
-    ivtmp_35 = ivtmp_9 - _36;
+    ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
   ...
-    if (ivtmp_35 != 0)
+    if (ivtmp_9 > POLY_INT_CST [4, 4])
     goto <bb 4>; [83.33%]
   else
     goto <bb 5>; [16.67%]
@@ -549,13 +549,15 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
       tree step = rgc->controls.length () == 1 ? rgc->controls[0]
       : make_ssa_name (iv_type);
       /* Create decrement IV.  */
-      create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
- insert_after, &index_before_incr, &index_after_incr);
+      create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
+ &incr_gsi, insert_after, &index_before_incr,
+ &index_after_incr);
       gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
    index_before_incr,
    nitems_step));
       *iv_step = step;
-      return index_after_incr;
+      *compare_step = nitems_step;
+      return index_before_incr;
     }
   /* Create increment IV.  */
@@ -825,6 +827,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
      arbitrarily pick the last.  */
   tree test_ctrl = NULL_TREE;
   tree iv_step = NULL_TREE;
+  tree compare_step = NULL_TREE;
   rgroup_controls *rgc;
   rgroup_controls *iv_rgc = nullptr;
   unsigned int i;
@@ -861,7 +864,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
&preheader_seq, &header_seq,
loop_cond_gsi, rgc, niters,
niters_skip, might_wrap_p,
- &iv_step);
+ &iv_step, &compare_step);
    iv_rgc = rgc;
  }
@@ -884,10 +887,21 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
   /* Get a boolean result that tells us whether to iterate.  */
   edge exit_edge = single_exit (loop);
-  tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
-  tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
-  gcond *cond_stmt = gimple_build_cond (code, test_ctrl, zero_ctrl,
- NULL_TREE, NULL_TREE);
+  gcond *cond_stmt;
+  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+    {
+      gcc_assert (compare_step);
+      tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
+      cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
+      NULL_TREE);
+    }
+  else
+    {
+      tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
+      tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
+      cond_stmt
+ = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
+    }
   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
   /* The loop iterates (NITERS - 1) / VF + 1 times.
-- 
2.36.3
  
Kewen.Lin June 1, 2023, 4:32 a.m. UTC | #2
Hi Juzhe,

on 2023/6/1 08:31, juzhe.zhong@rivai.ai wrote:
> Bootstrapped and Regression on X86 no surprise different.
> 
> Looking forward Kewen's test report for this patch.
> 

This patch can be bootstrapped and regress-tested on
powerpc64-linux-gnu P9 and powerpc64le-linux-gnu P9/P10.

Also SPEC2017 int/fp bmks build and run successfully
with it on powerpc64le-linux-gnu P10 (with an explicit
parameter --param=vect-partial-vector-usage=2).

It can fix the 510.parest_r -5% degradation, and it speed-ed up
525.x264_r +1%, 521.wrf_r +2.03%, 544.nab_r +1.27% and
549.fotonik3d_r +3.22%, but it degraded 503.bwaves_r -4%, we have
some heuristics on load and load pct. for 503.bwaves_r on Power,
I suspected it's related, by considering vect-partial-vector-usage=2
isn't default on Power and this can fix exposed failures and parest_r
degradation, I think the bwaves_r degradation should not block this.
For bwaves_r degradation, I'll have a further look later, open a PR
if it's an actual issue rather than just costing heuristics having
no effects.

btw, it would be better to add one PR marker line to associate
this with PR109971, something like:

	PR tree-optimization/109971

Thanks!

BR,
Kewen

> Thanks.
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> juzhe.zhong@rivai.ai
> 
>      
>     *From:* juzhe.zhong <mailto:juzhe.zhong@rivai.ai>
>     *Date:* 2023-05-31 23:08
>     *To:* gcc-patches <mailto:gcc-patches@gcc.gnu.org>
>     *CC:* richard.sandiford <mailto:richard.sandiford@arm.com>; rguenther <mailto:rguenther@suse.de>; linkw <mailto:linkw@linux.ibm.com>; Ju-Zhe Zhong <mailto:juzhe.zhong@rivai.ai>
>     *Subject:* [PATCH V2] VECT: Change flow of decrement IV
>     From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>      
>     Follow Richi's suggestion, I change current decrement IV flow from:
>      
>     do {
>        remain -= MIN (vf, remain);
>     } while (remain != 0);
>      
>     into:
>      
>     do {
>        old_remain = remain;
>        len = MIN (vf, remain);
>        remain -= vf;
>     } while (old_remain >= vf);
>      
>     to enhance SCEV.
>      
>     Include fixes from kewen.
>      
>      
>     This patch will need to wait for Kewen's test feedback.
>      
>     Testing on X86 is on-going
>      
>     Co-Authored by: Kewen Lin  <linkw@linux.ibm.com>
>      
>     gcc/ChangeLog:
>      
>             * tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Change decrement IV flow.
>             (vect_set_loop_condition_partial_vectors): Ditto.
>      
>     ---
>     gcc/tree-vect-loop-manip.cc | 36 +++++++++++++++++++++++++-----------
>     1 file changed, 25 insertions(+), 11 deletions(-)
>      
>     diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
>     index acf3642ceb2..3f735945e67 100644
>     --- a/gcc/tree-vect-loop-manip.cc
>     +++ b/gcc/tree-vect-loop-manip.cc
>     @@ -483,7 +483,7 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
>     gimple_stmt_iterator loop_cond_gsi,
>     rgroup_controls *rgc, tree niters,
>     tree niters_skip, bool might_wrap_p,
>     - tree *iv_step)
>     + tree *iv_step, tree *compare_step)
>     {
>        tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
>        tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
>     @@ -538,9 +538,9 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
>        ...
>        vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
>        ...
>     -    ivtmp_35 = ivtmp_9 - _36;
>     +    ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
>        ...
>     -    if (ivtmp_35 != 0)
>     +    if (ivtmp_9 > POLY_INT_CST [4, 4])
>          goto <bb 4>; [83.33%]
>        else
>          goto <bb 5>; [16.67%]
>     @@ -549,13 +549,15 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
>            tree step = rgc->controls.length () == 1 ? rgc->controls[0]
>            : make_ssa_name (iv_type);
>            /* Create decrement IV.  */
>     -      create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
>     - insert_after, &index_before_incr, &index_after_incr);
>     +      create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
>     + &incr_gsi, insert_after, &index_before_incr,
>     + &index_after_incr);
>            gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
>         index_before_incr,
>         nitems_step));
>            *iv_step = step;
>     -      return index_after_incr;
>     +      *compare_step = nitems_step;
>     +      return index_before_incr;
>          }
>        /* Create increment IV.  */
>     @@ -825,6 +827,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
>           arbitrarily pick the last.  */
>        tree test_ctrl = NULL_TREE;
>        tree iv_step = NULL_TREE;
>     +  tree compare_step = NULL_TREE;
>        rgroup_controls *rgc;
>        rgroup_controls *iv_rgc = nullptr;
>        unsigned int i;
>     @@ -861,7 +864,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
>     &preheader_seq, &header_seq,
>     loop_cond_gsi, rgc, niters,
>     niters_skip, might_wrap_p,
>     - &iv_step);
>     + &iv_step, &compare_step);
>         iv_rgc = rgc;
>       }
>     @@ -884,10 +887,21 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
>        /* Get a boolean result that tells us whether to iterate.  */
>        edge exit_edge = single_exit (loop);
>     -  tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
>     -  tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
>     -  gcond *cond_stmt = gimple_build_cond (code, test_ctrl, zero_ctrl,
>     - NULL_TREE, NULL_TREE);
>     +  gcond *cond_stmt;
>     +  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
>     +    {
>     +      gcc_assert (compare_step);
>     +      tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
>     +      cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
>     +      NULL_TREE);
>     +    }
>     +  else
>     +    {
>     +      tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
>     +      tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
>     +      cond_stmt
>     + = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
>     +    }
>        gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
>        /* The loop iterates (NITERS - 1) / VF + 1 times.
>     -- 
>     2.36.3
>      
>
  
juzhe.zhong@rivai.ai June 1, 2023, 4:36 a.m. UTC | #3
Thanks kewen.
I have send V3 patch. Could you comment that ?
I want to make sure you do support that patch.

Thanks.


juzhe.zhong@rivai.ai
 
From: Kewen.Lin
Date: 2023-06-01 12:32
To: juzhe.zhong@rivai.ai
CC: richard.sandiford; rguenther; gcc-patches
Subject: Re: [PATCH V2] VECT: Change flow of decrement IV
Hi Juzhe,
 
on 2023/6/1 08:31, juzhe.zhong@rivai.ai wrote:
> Bootstrapped and Regression on X86 no surprise different.
> 
> Looking forward Kewen's test report for this patch.
> 
 
This patch can be bootstrapped and regress-tested on
powerpc64-linux-gnu P9 and powerpc64le-linux-gnu P9/P10.
 
Also SPEC2017 int/fp bmks build and run successfully
with it on powerpc64le-linux-gnu P10 (with an explicit
parameter --param=vect-partial-vector-usage=2).
 
It can fix the 510.parest_r -5% degradation, and it speed-ed up
525.x264_r +1%, 521.wrf_r +2.03%, 544.nab_r +1.27% and
549.fotonik3d_r +3.22%, but it degraded 503.bwaves_r -4%, we have
some heuristics on load and load pct. for 503.bwaves_r on Power,
I suspected it's related, by considering vect-partial-vector-usage=2
isn't default on Power and this can fix exposed failures and parest_r
degradation, I think the bwaves_r degradation should not block this.
For bwaves_r degradation, I'll have a further look later, open a PR
if it's an actual issue rather than just costing heuristics having
no effects.
 
btw, it would be better to add one PR marker line to associate
this with PR109971, something like:
 
PR tree-optimization/109971
 
Thanks!
 
BR,
Kewen
 
> Thanks.
> ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
> juzhe.zhong@rivai.ai
> 
>      
>     *From:* juzhe.zhong <mailto:juzhe.zhong@rivai.ai>
>     *Date:* 2023-05-31 23:08
>     *To:* gcc-patches <mailto:gcc-patches@gcc.gnu.org>
>     *CC:* richard.sandiford <mailto:richard.sandiford@arm.com>; rguenther <mailto:rguenther@suse.de>; linkw <mailto:linkw@linux.ibm.com>; Ju-Zhe Zhong <mailto:juzhe.zhong@rivai.ai>
>     *Subject:* [PATCH V2] VECT: Change flow of decrement IV
>     From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>      
>     Follow Richi's suggestion, I change current decrement IV flow from:
>      
>     do {
>        remain -= MIN (vf, remain);
>     } while (remain != 0);
>      
>     into:
>      
>     do {
>        old_remain = remain;
>        len = MIN (vf, remain);
>        remain -= vf;
>     } while (old_remain >= vf);
>      
>     to enhance SCEV.
>      
>     Include fixes from kewen.
>      
>      
>     This patch will need to wait for Kewen's test feedback.
>      
>     Testing on X86 is on-going
>      
>     Co-Authored by: Kewen Lin  <linkw@linux.ibm.com>
>      
>     gcc/ChangeLog:
>      
>             * tree-vect-loop-manip.cc (vect_set_loop_controls_directly): Change decrement IV flow.
>             (vect_set_loop_condition_partial_vectors): Ditto.
>      
>     ---
>     gcc/tree-vect-loop-manip.cc | 36 +++++++++++++++++++++++++-----------
>     1 file changed, 25 insertions(+), 11 deletions(-)
>      
>     diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
>     index acf3642ceb2..3f735945e67 100644
>     --- a/gcc/tree-vect-loop-manip.cc
>     +++ b/gcc/tree-vect-loop-manip.cc
>     @@ -483,7 +483,7 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
>     gimple_stmt_iterator loop_cond_gsi,
>     rgroup_controls *rgc, tree niters,
>     tree niters_skip, bool might_wrap_p,
>     - tree *iv_step)
>     + tree *iv_step, tree *compare_step)
>     {
>        tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
>        tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
>     @@ -538,9 +538,9 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
>        ...
>        vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
>        ...
>     -    ivtmp_35 = ivtmp_9 - _36;
>     +    ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
>        ...
>     -    if (ivtmp_35 != 0)
>     +    if (ivtmp_9 > POLY_INT_CST [4, 4])
>          goto <bb 4>; [83.33%]
>        else
>          goto <bb 5>; [16.67%]
>     @@ -549,13 +549,15 @@ vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
>            tree step = rgc->controls.length () == 1 ? rgc->controls[0]
>            : make_ssa_name (iv_type);
>            /* Create decrement IV.  */
>     -      create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
>     - insert_after, &index_before_incr, &index_after_incr);
>     +      create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
>     + &incr_gsi, insert_after, &index_before_incr,
>     + &index_after_incr);
>            gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
>         index_before_incr,
>         nitems_step));
>            *iv_step = step;
>     -      return index_after_incr;
>     +      *compare_step = nitems_step;
>     +      return index_before_incr;
>          }
>        /* Create increment IV.  */
>     @@ -825,6 +827,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
>           arbitrarily pick the last.  */
>        tree test_ctrl = NULL_TREE;
>        tree iv_step = NULL_TREE;
>     +  tree compare_step = NULL_TREE;
>        rgroup_controls *rgc;
>        rgroup_controls *iv_rgc = nullptr;
>        unsigned int i;
>     @@ -861,7 +864,7 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
>     &preheader_seq, &header_seq,
>     loop_cond_gsi, rgc, niters,
>     niters_skip, might_wrap_p,
>     - &iv_step);
>     + &iv_step, &compare_step);
>         iv_rgc = rgc;
>       }
>     @@ -884,10 +887,21 @@ vect_set_loop_condition_partial_vectors (class loop *loop,
>        /* Get a boolean result that tells us whether to iterate.  */
>        edge exit_edge = single_exit (loop);
>     -  tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
>     -  tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
>     -  gcond *cond_stmt = gimple_build_cond (code, test_ctrl, zero_ctrl,
>     - NULL_TREE, NULL_TREE);
>     +  gcond *cond_stmt;
>     +  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
>     +    {
>     +      gcc_assert (compare_step);
>     +      tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
>     +      cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
>     +      NULL_TREE);
>     +    }
>     +  else
>     +    {
>     +      tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
>     +      tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
>     +      cond_stmt
>     + = gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
>     +    }
>        gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
>        /* The loop iterates (NITERS - 1) / VF + 1 times.
>     -- 
>     2.36.3
>      
>
  

Patch

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index acf3642ceb2..3f735945e67 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -483,7 +483,7 @@  vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
 				 gimple_stmt_iterator loop_cond_gsi,
 				 rgroup_controls *rgc, tree niters,
 				 tree niters_skip, bool might_wrap_p,
-				 tree *iv_step)
+				 tree *iv_step, tree *compare_step)
 {
   tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
   tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
@@ -538,9 +538,9 @@  vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
 	   ...
 	   vect__4.8_28 = .LEN_LOAD (_17, 32B, _36, 0);
 	   ...
-	   ivtmp_35 = ivtmp_9 - _36;
+	   ivtmp_35 = ivtmp_9 - POLY_INT_CST [4, 4];
 	   ...
-	   if (ivtmp_35 != 0)
+	   if (ivtmp_9 > POLY_INT_CST [4, 4])
 	     goto <bb 4>; [83.33%]
 	   else
 	     goto <bb 5>; [16.67%]
@@ -549,13 +549,15 @@  vect_set_loop_controls_directly (class loop *loop, loop_vec_info loop_vinfo,
       tree step = rgc->controls.length () == 1 ? rgc->controls[0]
 					       : make_ssa_name (iv_type);
       /* Create decrement IV.  */
-      create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, &incr_gsi,
-		 insert_after, &index_before_incr, &index_after_incr);
+      create_iv (nitems_total, MINUS_EXPR, nitems_step, NULL_TREE, loop,
+		 &incr_gsi, insert_after, &index_before_incr,
+		 &index_after_incr);
       gimple_seq_add_stmt (header_seq, gimple_build_assign (step, MIN_EXPR,
 							    index_before_incr,
 							    nitems_step));
       *iv_step = step;
-      return index_after_incr;
+      *compare_step = nitems_step;
+      return index_before_incr;
     }
 
   /* Create increment IV.  */
@@ -825,6 +827,7 @@  vect_set_loop_condition_partial_vectors (class loop *loop,
      arbitrarily pick the last.  */
   tree test_ctrl = NULL_TREE;
   tree iv_step = NULL_TREE;
+  tree compare_step = NULL_TREE;
   rgroup_controls *rgc;
   rgroup_controls *iv_rgc = nullptr;
   unsigned int i;
@@ -861,7 +864,7 @@  vect_set_loop_condition_partial_vectors (class loop *loop,
 						 &preheader_seq, &header_seq,
 						 loop_cond_gsi, rgc, niters,
 						 niters_skip, might_wrap_p,
-						 &iv_step);
+						 &iv_step, &compare_step);
 
 	    iv_rgc = rgc;
 	  }
@@ -884,10 +887,21 @@  vect_set_loop_condition_partial_vectors (class loop *loop,
 
   /* Get a boolean result that tells us whether to iterate.  */
   edge exit_edge = single_exit (loop);
-  tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
-  tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
-  gcond *cond_stmt = gimple_build_cond (code, test_ctrl, zero_ctrl,
-					NULL_TREE, NULL_TREE);
+  gcond *cond_stmt;
+  if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
+    {
+      gcc_assert (compare_step);
+      tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? LE_EXPR : GT_EXPR;
+      cond_stmt = gimple_build_cond (code, test_ctrl, compare_step, NULL_TREE,
+				     NULL_TREE);
+    }
+  else
+    {
+      tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
+      tree zero_ctrl = build_zero_cst (TREE_TYPE (test_ctrl));
+      cond_stmt
+	= gimple_build_cond (code, test_ctrl, zero_ctrl, NULL_TREE, NULL_TREE);
+    }
   gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
 
   /* The loop iterates (NITERS - 1) / VF + 1 times.