@@ -1788,6 +1788,63 @@ dump_affine_iv (FILE *file, affine_iv *iv)
}
}
+/* Generate expr: (HIGH - LOW) / STEP, under UTYPE. */
+
+static tree
+get_step_count (tree high, tree low, tree step, tree utype,
+ bool end_inclusive = false)
+{
+ tree delta = fold_build2 (MINUS_EXPR, TREE_TYPE (low), high, low);
+ delta = fold_convert (utype, delta);
+ if (end_inclusive)
+ delta = fold_build2 (PLUS_EXPR, utype, delta, build_one_cst (utype));
+
+ if (tree_int_cst_sign_bit (step))
+ step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
+ step = fold_convert (utype, step);
+
+ return fold_build2 (FLOOR_DIV_EXPR, utype, delta, step);
+}
+
+/* Get the additional assumption if both two steps are not zero.
+ Assumptions satisfy that there is no overflow or wrap during
+ v0 and v1 chasing. */
+
+static tree
+iv_chase_assumption (affine_iv *iv0, affine_iv *iv1, tree step,
+ enum tree_code code)
+{
+ /* Here, no need additional assumptions for NE. */
+ if (code == NE_EXPR)
+ return boolean_true_node;
+
+ /* No need addition assumption for pointer. */
+ tree type = TREE_TYPE (iv0->base);
+ if (POINTER_TYPE_P (type))
+ return boolean_true_node;
+
+ bool positive0 = !tree_int_cst_sign_bit (iv0->step);
+ bool positive1 = !tree_int_cst_sign_bit (iv1->step);
+ tree utype = unsigned_type_for (type);
+ bool add1 = code == LE_EXPR;
+ tree niter = get_step_count (iv1->base, iv0->base, step, utype, add1);
+
+ int prec = TYPE_PRECISION (type);
+ signop sgn = TYPE_SIGN (type);
+ tree max = wide_int_to_tree (type, wi::max_value (prec, sgn));
+ tree min = wide_int_to_tree (type, wi::min_value (prec, sgn));
+ tree valid_niter0, valid_niter1;
+
+ valid_niter0 = positive0 ? get_step_count (max, iv0->base, iv0->step, utype)
+ : get_step_count (iv0->base, min, iv0->step, utype);
+ valid_niter1 = positive1 ? get_step_count (max, iv1->base, iv1->step, utype)
+ : get_step_count (iv1->base, min, iv1->step, utype);
+
+ tree e0 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter0);
+ tree e1 = fold_build2 (LT_EXPR, boolean_type_node, niter, valid_niter1);
+ return fold_build2 (TRUTH_AND_EXPR, boolean_type_node, e0, e1);
+}
+
/* Determine the number of iterations according to condition (for staying
inside loop) which compares two induction variables using comparison
operator CODE. The induction variable on left side of the comparison
@@ -1879,11 +1936,10 @@ number_of_iterations_cond (class loop *loop,
{iv0.base, iv0.step - iv1.step} cmp_code {iv1.base, 0}
provided that either below condition is satisfied:
+ a. iv0.step and iv1.step are integer.
+ b. Additional condition: before iv0 chase up v1, iv0 and iv1 should not
+ step over min or max of the type. */
- a) the test is NE_EXPR;
- b) iv0.step - iv1.step is integer and iv0/iv1 don't overflow.
-
- This rarely occurs in practice, but it is simple enough to manage. */
if (!integer_zerop (iv0->step) && !integer_zerop (iv1->step))
{
tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
@@ -1894,15 +1950,12 @@ number_of_iterations_cond (class loop *loop,
if (tree_int_cst_sign_bit (step))
return false;
- if (code != NE_EXPR
- && (TREE_CODE (step) != INTEGER_CST
- || !iv0->no_overflow || !iv1->no_overflow))
+ niter->assumptions = iv_chase_assumption (iv0, iv1, step, code);
+ if (integer_zerop (niter->assumptions))
return false;
iv0->step = step;
- if (!POINTER_TYPE_P (type))
- iv0->no_overflow = false;
-
+ iv0->no_overflow = true;
iv1->step = build_int_cst (step_type, 0);
iv1->no_overflow = true;
}
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+#define MAX ((unsigned int) 0xffffffff)
+#define MIN ((unsigned int) (0))
+
+int arr[512];
+
+#define FUNC(NAME, CODE, S0, S1) \
+ unsigned __attribute__ ((noinline)) NAME (unsigned int b0, unsigned int b1) \
+ { \
+ unsigned int n = 0; \
+ unsigned int i0, i1; \
+ int *p = arr; \
+ for (i0 = b0, i1 = b1; i0 CODE i1; i0 += S0, i1 += S1) \
+ { \
+ n++; \
+ *p++ = i0 + i1; \
+ } \
+ return n; \
+ }
+
+FUNC (lt_5_1, <, 5, 1);
+FUNC (le_1_m5, <=, 1, -5);
+FUNC (lt_1_10, <, 1, 10);
+
+int
+main ()
+{
+ int fail = 0;
+ if (lt_5_1 (MAX - 124, MAX - 27) != 28)
+ fail++;
+
+ /* to save time, do not run this. */
+ /*
+ if (le_1_m5 (MIN + 1, MIN + 9) != 715827885)
+ fail++; */
+
+ if (lt_1_10 (MAX - 1000, MAX - 500) != 51)
+ fail++;
+
+ if (fail)
+ __builtin_abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */