[1/3] tree-optimization/104582 - Simplify vectorizer cost API and fixes
Commit Message
This simplifies the vectorizer cost API by providing overloads
to add_stmt_cost and record_stmt_cost suitable for scalar stmt
and branch stmt costing which do not need information like
a vector type or alignment. It also fixes two mistakes where
costs for versioning tests were recorded as vector stmt rather
than scalar stmt.
This is a first patch to simplify the actual fix for PR104582.
Bootstrapped and tested on x86_64-unknown-linux-gnu.
OK?
2022-02-18 Richard Biener <rguenther@suse.de>
PR tree-optimization/104582
* tree-vectorizer.h (add_stmt_cost): New overload.
(record_stmt_cost): Likewise.
* tree-vect-loop.cc (vect_compute_single_scalar_iteration_cost):
Use add_stmt_costs.
(vect_get_known_peeling_cost): Use new overloads.
(vect_estimate_min_profitable_iters): Likewise. Consistently
use scalar_stmt for costing versioning checks.
* tree-vect-stmts.cc (record_stmt_cost): New overload.
---
gcc/tree-vect-loop.cc | 39 +++++++++++++++------------------------
gcc/tree-vect-stmts.cc | 10 ++++++++++
gcc/tree-vectorizer.h | 12 ++++++++++++
3 files changed, 37 insertions(+), 24 deletions(-)
Comments
Richard Biener <rguenther@suse.de> writes:
> This simplifies the vectorizer cost API by providing overloads
> to add_stmt_cost and record_stmt_cost suitable for scalar stmt
> and branch stmt costing which do not need information like
> a vector type or alignment. It also fixes two mistakes where
> costs for versioning tests were recorded as vector stmt rather
> than scalar stmt.
>
> This is a first patch to simplify the actual fix for PR104582.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
LGTM. Maybe the record_stmt_cost overload should accept scalar_stmt
too though? It seems inconsistent to allow it only for add_stmt_cost.
Not a strong opinion though, just a suggestion.
Thanks,
Richard
> 2022-02-18 Richard Biener <rguenther@suse.de>
>
> PR tree-optimization/104582
> * tree-vectorizer.h (add_stmt_cost): New overload.
> (record_stmt_cost): Likewise.
> * tree-vect-loop.cc (vect_compute_single_scalar_iteration_cost):
> Use add_stmt_costs.
> (vect_get_known_peeling_cost): Use new overloads.
> (vect_estimate_min_profitable_iters): Likewise. Consistently
> use scalar_stmt for costing versioning checks.
> * tree-vect-stmts.cc (record_stmt_cost): New overload.
> ---
> gcc/tree-vect-loop.cc | 39 +++++++++++++++------------------------
> gcc/tree-vect-stmts.cc | 10 ++++++++++
> gcc/tree-vectorizer.h | 12 ++++++++++++
> 3 files changed, 37 insertions(+), 24 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 896218f23ea..5c7b163f01c 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -1323,13 +1323,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
>
> /* Now accumulate cost. */
> loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
> - stmt_info_for_cost *si;
> - int j;
> - FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
> - j, si)
> - (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count,
> - si->kind, si->stmt_info, si->vectype,
> - si->misalign, si->where);
> + add_stmt_costs (loop_vinfo->scalar_costs,
> + &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo));
> loop_vinfo->scalar_costs->finish_cost (nullptr);
> }
>
> @@ -3873,10 +3868,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
> iterations are unknown, count a taken branch per peeled loop. */
> if (peel_iters_prologue > 0)
> retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
> - NULL, NULL_TREE, 0, vect_prologue);
> + vect_prologue);
> if (*peel_iters_epilogue > 0)
> retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
> - NULL, NULL_TREE, 0, vect_epilogue);
> + vect_epilogue);
> }
>
> stmt_info_for_cost *si;
> @@ -3946,8 +3941,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
> {
> /* FIXME: Make cost depend on complexity of individual check. */
> unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
> - (void) add_stmt_cost (target_cost_data, len, vector_stmt,
> - NULL, NULL_TREE, 0, vect_prologue);
> + (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
> if (dump_enabled_p ())
> dump_printf (MSG_NOTE,
> "cost model: Adding cost of checks for loop "
> @@ -3959,13 +3953,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
> {
> /* FIXME: Make cost depend on complexity of individual check. */
> unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
> - (void) add_stmt_cost (target_cost_data, len, vector_stmt,
> - NULL, NULL_TREE, 0, vect_prologue);
> + (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
> len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
> if (len)
> /* Count LEN - 1 ANDs and LEN comparisons. */
> (void) add_stmt_cost (target_cost_data, len * 2 - 1,
> - scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
> + scalar_stmt, vect_prologue);
> len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
> if (len)
> {
> @@ -3976,7 +3969,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
> if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
> nstmts += 1;
> (void) add_stmt_cost (target_cost_data, nstmts,
> - scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
> + scalar_stmt, vect_prologue);
> }
> if (dump_enabled_p ())
> dump_printf (MSG_NOTE,
> @@ -3998,7 +3991,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>
> if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
> (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
> - NULL, NULL_TREE, 0, vect_prologue);
> + vect_prologue);
>
> /* Count statements in scalar loop. Using this as scalar cost for a single
> iteration for now.
> @@ -4104,21 +4097,19 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>
> if (prologue_need_br_taken_cost)
> (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
> - NULL, NULL_TREE, 0, vect_prologue);
> + vect_prologue);
>
> if (prologue_need_br_not_taken_cost)
> (void) add_stmt_cost (target_cost_data, 1,
> - cond_branch_not_taken, NULL, NULL_TREE, 0,
> - vect_prologue);
> + cond_branch_not_taken, vect_prologue);
>
> if (epilogue_need_br_taken_cost)
> (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
> - NULL, NULL_TREE, 0, vect_epilogue);
> + vect_epilogue);
>
> if (epilogue_need_br_not_taken_cost)
> (void) add_stmt_cost (target_cost_data, 1,
> - cond_branch_not_taken, NULL, NULL_TREE, 0,
> - vect_epilogue);
> + cond_branch_not_taken, vect_epilogue);
>
> /* Take care of special costs for rgroup controls of partial vectors. */
> if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
> @@ -4204,9 +4195,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
> }
>
> (void) add_stmt_cost (target_cost_data, prologue_stmts,
> - scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
> + scalar_stmt, vect_prologue);
> (void) add_stmt_cost (target_cost_data, body_stmts,
> - scalar_stmt, NULL, NULL_TREE, 0, vect_body);
> + scalar_stmt, vect_body);
> }
>
> /* FORNOW: The scalar outside cost is incremented in one of the
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 000a0f4b47e..acb9fa611c1 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -109,6 +109,16 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
> (builtin_vectorization_cost (kind, vectype, misalign) * count);
> }
>
> +unsigned
> +record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
> + enum vect_cost_for_stmt kind,
> + enum vect_cost_model_location where)
> +{
> + gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken);
> + return record_stmt_cost (body_cost_vec, count, kind, NULL,
> + NULL_TREE, 0, where);
> +}
> +
> /* Return a variable of type ELEM_TYPE[NELEMS]. */
>
> static tree
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index ddd0637185c..da99f28c0dc 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1762,6 +1762,15 @@ add_stmt_cost (vector_costs *costs, int count,
> return cost;
> }
>
> +static inline unsigned
> +add_stmt_cost (vector_costs *costs, int count, enum vect_cost_for_stmt kind,
> + enum vect_cost_model_location where)
> +{
> + gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
> + || kind == scalar_stmt);
> + return add_stmt_cost (costs, count, kind, NULL, NULL_TREE, 0, where);
> +}
> +
> /* Alias targetm.vectorize.add_stmt_cost. */
>
> static inline unsigned
> @@ -2120,6 +2129,9 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
> extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
> enum vect_cost_for_stmt, stmt_vec_info,
> tree, int, enum vect_cost_model_location);
> +extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
> + enum vect_cost_for_stmt,
> + enum vect_cost_model_location);
>
> /* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO. */
@@ -1323,13 +1323,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
/* Now accumulate cost. */
loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
- stmt_info_for_cost *si;
- int j;
- FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
- j, si)
- (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count,
- si->kind, si->stmt_info, si->vectype,
- si->misalign, si->where);
+ add_stmt_costs (loop_vinfo->scalar_costs,
+ &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo));
loop_vinfo->scalar_costs->finish_cost (nullptr);
}
@@ -3873,10 +3868,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
iterations are unknown, count a taken branch per peeled loop. */
if (peel_iters_prologue > 0)
retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
- NULL, NULL_TREE, 0, vect_prologue);
+ vect_prologue);
if (*peel_iters_epilogue > 0)
retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
- NULL, NULL_TREE, 0, vect_epilogue);
+ vect_epilogue);
}
stmt_info_for_cost *si;
@@ -3946,8 +3941,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
{
/* FIXME: Make cost depend on complexity of individual check. */
unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
- (void) add_stmt_cost (target_cost_data, len, vector_stmt,
- NULL, NULL_TREE, 0, vect_prologue);
+ (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
if (dump_enabled_p ())
dump_printf (MSG_NOTE,
"cost model: Adding cost of checks for loop "
@@ -3959,13 +3953,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
{
/* FIXME: Make cost depend on complexity of individual check. */
unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
- (void) add_stmt_cost (target_cost_data, len, vector_stmt,
- NULL, NULL_TREE, 0, vect_prologue);
+ (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
if (len)
/* Count LEN - 1 ANDs and LEN comparisons. */
(void) add_stmt_cost (target_cost_data, len * 2 - 1,
- scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
+ scalar_stmt, vect_prologue);
len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
if (len)
{
@@ -3976,7 +3969,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
nstmts += 1;
(void) add_stmt_cost (target_cost_data, nstmts,
- scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
+ scalar_stmt, vect_prologue);
}
if (dump_enabled_p ())
dump_printf (MSG_NOTE,
@@ -3998,7 +3991,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
(void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
- NULL, NULL_TREE, 0, vect_prologue);
+ vect_prologue);
/* Count statements in scalar loop. Using this as scalar cost for a single
iteration for now.
@@ -4104,21 +4097,19 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
if (prologue_need_br_taken_cost)
(void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
- NULL, NULL_TREE, 0, vect_prologue);
+ vect_prologue);
if (prologue_need_br_not_taken_cost)
(void) add_stmt_cost (target_cost_data, 1,
- cond_branch_not_taken, NULL, NULL_TREE, 0,
- vect_prologue);
+ cond_branch_not_taken, vect_prologue);
if (epilogue_need_br_taken_cost)
(void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
- NULL, NULL_TREE, 0, vect_epilogue);
+ vect_epilogue);
if (epilogue_need_br_not_taken_cost)
(void) add_stmt_cost (target_cost_data, 1,
- cond_branch_not_taken, NULL, NULL_TREE, 0,
- vect_epilogue);
+ cond_branch_not_taken, vect_epilogue);
/* Take care of special costs for rgroup controls of partial vectors. */
if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
@@ -4204,9 +4195,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
}
(void) add_stmt_cost (target_cost_data, prologue_stmts,
- scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
+ scalar_stmt, vect_prologue);
(void) add_stmt_cost (target_cost_data, body_stmts,
- scalar_stmt, NULL, NULL_TREE, 0, vect_body);
+ scalar_stmt, vect_body);
}
/* FORNOW: The scalar outside cost is incremented in one of the
@@ -109,6 +109,16 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
(builtin_vectorization_cost (kind, vectype, misalign) * count);
}
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+ enum vect_cost_for_stmt kind,
+ enum vect_cost_model_location where)
+{
+ gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken);
+ return record_stmt_cost (body_cost_vec, count, kind, NULL,
+ NULL_TREE, 0, where);
+}
+
/* Return a variable of type ELEM_TYPE[NELEMS]. */
static tree
@@ -1762,6 +1762,15 @@ add_stmt_cost (vector_costs *costs, int count,
return cost;
}
+static inline unsigned
+add_stmt_cost (vector_costs *costs, int count, enum vect_cost_for_stmt kind,
+ enum vect_cost_model_location where)
+{
+ gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
+ || kind == scalar_stmt);
+ return add_stmt_cost (costs, count, kind, NULL, NULL_TREE, 0, where);
+}
+
/* Alias targetm.vectorize.add_stmt_cost. */
static inline unsigned
@@ -2120,6 +2129,9 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
enum vect_cost_for_stmt, stmt_vec_info,
tree, int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+ enum vect_cost_for_stmt,
+ enum vect_cost_model_location);
/* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO. */