vect: Keep scalar costs around longer
Commit Message
The scalar costs for a loop are fleeting, with only the final
single_scalar_iteration_cost being kept for later comparison.
This patch replaces single_scalar_iteration_cost with the cost
structure, so that (with later patches) it's possible for targets
to examine other target-specific cost properties as well. This will
be done by passing the scalar costs to hooks where appropriate;
targets shouldn't try to read the information directly from
loop_vec_infos.
Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
Richard
gcc/
* tree-vectorizer.h (_loop_vec_info::scalar_costs): New member
variable.
(_loop_vec_info::single_scalar_iteration_cost): Delete.
(LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST): Delete.
(vector_costs::total_cost): New function.
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Update
after above changes.
(_loop_vec_info::~_loop_vec_info): Delete scalar_costs.
(vect_compute_single_scalar_iteration_cost): Store the costs
in loop_vinfo->scalar_costs.
(vect_estimate_min_profitable_iters): Get the scalar cost from
loop_vinfo->scalar_costs.
---
gcc/tree-vect-loop.c | 17 ++++++-----------
gcc/tree-vectorizer.h | 17 +++++++++++++----
2 files changed, 19 insertions(+), 15 deletions(-)
Comments
On Mon, Nov 8, 2021 at 11:47 AM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> The scalar costs for a loop are fleeting, with only the final
> single_scalar_iteration_cost being kept for later comparison.
> This patch replaces single_scalar_iteration_cost with the cost
> structure, so that (with later patches) it's possible for targets
> to examine other target-specific cost properties as well. This will
> be done by passing the scalar costs to hooks where appropriate;
> targets shouldn't try to read the information directly from
> loop_vec_infos.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu. OK to install?
OK. I wondered if we can put this cost into vec_info_shared but
we seem to look at per-stmt info in vect_compute_single_scalar_iteration_cost
though quite possibly the relevant bits should not change. So
we could eventually compute it lazily once. Something to think about
later.
Richard.
> Richard
>
>
> gcc/
> * tree-vectorizer.h (_loop_vec_info::scalar_costs): New member
> variable.
> (_loop_vec_info::single_scalar_iteration_cost): Delete.
> (LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST): Delete.
> (vector_costs::total_cost): New function.
> * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Update
> after above changes.
> (_loop_vec_info::~_loop_vec_info): Delete scalar_costs.
> (vect_compute_single_scalar_iteration_cost): Store the costs
> in loop_vinfo->scalar_costs.
> (vect_estimate_min_profitable_iters): Get the scalar cost from
> loop_vinfo->scalar_costs.
> ---
> gcc/tree-vect-loop.c | 17 ++++++-----------
> gcc/tree-vectorizer.h | 17 +++++++++++++----
> 2 files changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index c9ee2e15e35..887275a5071 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -822,6 +822,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
> num_iters_unchanged (NULL_TREE),
> num_iters_assumptions (NULL_TREE),
> vector_costs (nullptr),
> + scalar_costs (nullptr),
> th (0),
> versioning_threshold (0),
> vectorization_factor (0),
> @@ -839,7 +840,6 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
> ivexpr_map (NULL),
> scan_map (NULL),
> slp_unrolling_factor (1),
> - single_scalar_iteration_cost (0),
> inner_loop_cost_factor (param_vect_inner_loop_cost_factor),
> vectorizable (false),
> can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
> @@ -931,6 +931,7 @@ _loop_vec_info::~_loop_vec_info ()
> delete ivexpr_map;
> delete scan_map;
> epilogue_vinfos.release ();
> + delete scalar_costs;
> delete vector_costs;
>
> /* When we release an epiloge vinfo that we do not intend to use
> @@ -1292,20 +1293,15 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
> }
>
> /* Now accumulate cost. */
> - vector_costs *target_cost_data = init_cost (loop_vinfo, true);
> + loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
> stmt_info_for_cost *si;
> int j;
> FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
> j, si)
> - (void) add_stmt_cost (target_cost_data, si->count,
> + (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count,
> si->kind, si->stmt_info, si->vectype,
> si->misalign, si->where);
> - unsigned prologue_cost = 0, body_cost = 0, epilogue_cost = 0;
> - finish_cost (target_cost_data, &prologue_cost, &body_cost,
> - &epilogue_cost);
> - delete target_cost_data;
> - LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
> - = prologue_cost + body_cost + epilogue_cost;
> + loop_vinfo->scalar_costs->finish_cost ();
> }
>
>
> @@ -3868,8 +3864,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
> TODO: Consider assigning different costs to different scalar
> statements. */
>
> - scalar_single_iter_cost
> - = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
> + scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
>
> /* Add additional cost for the peeled instructions in prologue and epilogue
> loop. (For fully-masked loops there will be no peeling.)
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 0e3aad590e8..8dba3a34aa9 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -590,6 +590,9 @@ public:
> /* The cost of the vector code. */
> class vector_costs *vector_costs;
>
> + /* The cost of the scalar code. */
> + class vector_costs *scalar_costs;
> +
> /* Threshold of number of iterations below which vectorization will not be
> performed. It is calculated from MIN_PROFITABLE_ITERS and
> param_min_vect_loop_bound. */
> @@ -721,9 +724,6 @@ public:
> applied to the loop, i.e., no unrolling is needed, this is 1. */
> poly_uint64 slp_unrolling_factor;
>
> - /* Cost of a single scalar iteration. */
> - int single_scalar_iteration_cost;
> -
> /* The factor used to over weight those statements in an inner loop
> relative to the loop being vectorized. */
> unsigned int inner_loop_cost_factor;
> @@ -843,7 +843,6 @@ public:
> #define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling
> #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store
> #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
> -#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
> #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
> #define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
> #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
> @@ -1438,6 +1437,7 @@ public:
> unsigned int body_cost () const;
> unsigned int epilogue_cost () const;
> unsigned int outside_cost () const;
> + unsigned int total_cost () const;
>
> protected:
> unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
> @@ -1508,6 +1508,15 @@ vector_costs::outside_cost () const
> return prologue_cost () + epilogue_cost ();
> }
>
> +/* Return the cost of the prologue, body and epilogue code
> + (in abstract units). */
> +
> +inline unsigned int
> +vector_costs::total_cost () const
> +{
> + return body_cost () + outside_cost ();
> +}
> +
> #define VECT_MAX_COST 1000
>
> /* The maximum number of intermediate steps required in multi-step type
> --
> 2.25.1
>
@@ -822,6 +822,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
num_iters_unchanged (NULL_TREE),
num_iters_assumptions (NULL_TREE),
vector_costs (nullptr),
+ scalar_costs (nullptr),
th (0),
versioning_threshold (0),
vectorization_factor (0),
@@ -839,7 +840,6 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
ivexpr_map (NULL),
scan_map (NULL),
slp_unrolling_factor (1),
- single_scalar_iteration_cost (0),
inner_loop_cost_factor (param_vect_inner_loop_cost_factor),
vectorizable (false),
can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
@@ -931,6 +931,7 @@ _loop_vec_info::~_loop_vec_info ()
delete ivexpr_map;
delete scan_map;
epilogue_vinfos.release ();
+ delete scalar_costs;
delete vector_costs;
/* When we release an epiloge vinfo that we do not intend to use
@@ -1292,20 +1293,15 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
}
/* Now accumulate cost. */
- vector_costs *target_cost_data = init_cost (loop_vinfo, true);
+ loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
stmt_info_for_cost *si;
int j;
FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
j, si)
- (void) add_stmt_cost (target_cost_data, si->count,
+ (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count,
si->kind, si->stmt_info, si->vectype,
si->misalign, si->where);
- unsigned prologue_cost = 0, body_cost = 0, epilogue_cost = 0;
- finish_cost (target_cost_data, &prologue_cost, &body_cost,
- &epilogue_cost);
- delete target_cost_data;
- LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
- = prologue_cost + body_cost + epilogue_cost;
+ loop_vinfo->scalar_costs->finish_cost ();
}
@@ -3868,8 +3864,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
TODO: Consider assigning different costs to different scalar
statements. */
- scalar_single_iter_cost
- = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
+ scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
/* Add additional cost for the peeled instructions in prologue and epilogue
loop. (For fully-masked loops there will be no peeling.)
@@ -590,6 +590,9 @@ public:
/* The cost of the vector code. */
class vector_costs *vector_costs;
+ /* The cost of the scalar code. */
+ class vector_costs *scalar_costs;
+
/* Threshold of number of iterations below which vectorization will not be
performed. It is calculated from MIN_PROFITABLE_ITERS and
param_min_vect_loop_bound. */
@@ -721,9 +724,6 @@ public:
applied to the loop, i.e., no unrolling is needed, this is 1. */
poly_uint64 slp_unrolling_factor;
- /* Cost of a single scalar iteration. */
- int single_scalar_iteration_cost;
-
/* The factor used to over weight those statements in an inner loop
relative to the loop being vectorized. */
unsigned int inner_loop_cost_factor;
@@ -843,7 +843,6 @@ public:
#define LOOP_VINFO_SCALAR_LOOP_SCALING(L) (L)->scalar_loop_scaling
#define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store
#define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
-#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
#define LOOP_VINFO_SIMD_IF_COND(L) (L)->simd_if_cond
#define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
@@ -1438,6 +1437,7 @@ public:
unsigned int body_cost () const;
unsigned int epilogue_cost () const;
unsigned int outside_cost () const;
+ unsigned int total_cost () const;
protected:
unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
@@ -1508,6 +1508,15 @@ vector_costs::outside_cost () const
return prologue_cost () + epilogue_cost ();
}
+/* Return the cost of the prologue, body and epilogue code
+ (in abstract units). */
+
+inline unsigned int
+vector_costs::total_cost () const
+{
+ return body_cost () + outside_cost ();
+}
+
#define VECT_MAX_COST 1000
/* The maximum number of intermediate steps required in multi-step type