vect: Keep scalar costs around longer

Message ID mptsfw62ad6.fsf@arm.com
State Committed
Commit 6ddc6a57a74c3a388eb1626e59005f54c6e66c57
Headers
Series vect: Keep scalar costs around longer |

Commit Message

Richard Sandiford Nov. 8, 2021, 10:46 a.m. UTC
  The scalar costs for a loop are fleeting, with only the final
single_scalar_iteration_cost being kept for later comparison.
This patch replaces single_scalar_iteration_cost with the cost
structure, so that (with later patches) it's possible for targets
to examine other target-specific cost properties as well.  This will
be done by passing the scalar costs to hooks where appropriate;
targets shouldn't try to read the information directly from
loop_vec_infos.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


gcc/
	* tree-vectorizer.h (_loop_vec_info::scalar_costs): New member
	variable.
	(_loop_vec_info::single_scalar_iteration_cost): Delete.
	(LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST): Delete.
	(vector_costs::total_cost): New function.
	* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Update
	after above changes.
	(_loop_vec_info::~_loop_vec_info): Delete scalar_costs.
	(vect_compute_single_scalar_iteration_cost): Store the costs
	in loop_vinfo->scalar_costs.
	(vect_estimate_min_profitable_iters): Get the scalar cost from
	loop_vinfo->scalar_costs.
---
 gcc/tree-vect-loop.c  | 17 ++++++-----------
 gcc/tree-vectorizer.h | 17 +++++++++++++----
 2 files changed, 19 insertions(+), 15 deletions(-)
  

Comments

Richard Biener Nov. 8, 2021, 11:02 a.m. UTC | #1
On Mon, Nov 8, 2021 at 11:47 AM Richard Sandiford via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> The scalar costs for a loop are fleeting, with only the final
> single_scalar_iteration_cost being kept for later comparison.
> This patch replaces single_scalar_iteration_cost with the cost
> structure, so that (with later patches) it's possible for targets
> to examine other target-specific cost properties as well.  This will
> be done by passing the scalar costs to hooks where appropriate;
> targets shouldn't try to read the information directly from
> loop_vec_infos.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

OK.  I wondered if we can put this cost into vec_info_shared but
we seem to look at per-stmt info in vect_compute_single_scalar_iteration_cost
though quite possibly the relevant bits should not change.  So
we could eventually compute it lazily once.  Something to think about
later.

Richard.

> Richard
>
>
> gcc/
>         * tree-vectorizer.h (_loop_vec_info::scalar_costs): New member
>         variable.
>         (_loop_vec_info::single_scalar_iteration_cost): Delete.
>         (LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST): Delete.
>         (vector_costs::total_cost): New function.
>         * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Update
>         after above changes.
>         (_loop_vec_info::~_loop_vec_info): Delete scalar_costs.
>         (vect_compute_single_scalar_iteration_cost): Store the costs
>         in loop_vinfo->scalar_costs.
>         (vect_estimate_min_profitable_iters): Get the scalar cost from
>         loop_vinfo->scalar_costs.
> ---
>  gcc/tree-vect-loop.c  | 17 ++++++-----------
>  gcc/tree-vectorizer.h | 17 +++++++++++++----
>  2 files changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index c9ee2e15e35..887275a5071 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -822,6 +822,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
>      num_iters_unchanged (NULL_TREE),
>      num_iters_assumptions (NULL_TREE),
>      vector_costs (nullptr),
> +    scalar_costs (nullptr),
>      th (0),
>      versioning_threshold (0),
>      vectorization_factor (0),
> @@ -839,7 +840,6 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
>      ivexpr_map (NULL),
>      scan_map (NULL),
>      slp_unrolling_factor (1),
> -    single_scalar_iteration_cost (0),
>      inner_loop_cost_factor (param_vect_inner_loop_cost_factor),
>      vectorizable (false),
>      can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
> @@ -931,6 +931,7 @@ _loop_vec_info::~_loop_vec_info ()
>    delete ivexpr_map;
>    delete scan_map;
>    epilogue_vinfos.release ();
> +  delete scalar_costs;
>    delete vector_costs;
>
>    /* When we release an epiloge vinfo that we do not intend to use
> @@ -1292,20 +1293,15 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
>      }
>
>    /* Now accumulate cost.  */
> -  vector_costs *target_cost_data = init_cost (loop_vinfo, true);
> +  loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
>    stmt_info_for_cost *si;
>    int j;
>    FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
>                     j, si)
> -    (void) add_stmt_cost (target_cost_data, si->count,
> +    (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count,
>                           si->kind, si->stmt_info, si->vectype,
>                           si->misalign, si->where);
> -  unsigned prologue_cost = 0, body_cost = 0, epilogue_cost = 0;
> -  finish_cost (target_cost_data, &prologue_cost, &body_cost,
> -              &epilogue_cost);
> -  delete target_cost_data;
> -  LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
> -    = prologue_cost + body_cost + epilogue_cost;
> +  loop_vinfo->scalar_costs->finish_cost ();
>  }
>
>
> @@ -3868,8 +3864,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>       TODO: Consider assigning different costs to different scalar
>       statements.  */
>
> -  scalar_single_iter_cost
> -    = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
> +  scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
>
>    /* Add additional cost for the peeled instructions in prologue and epilogue
>       loop.  (For fully-masked loops there will be no peeling.)
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 0e3aad590e8..8dba3a34aa9 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -590,6 +590,9 @@ public:
>    /* The cost of the vector code.  */
>    class vector_costs *vector_costs;
>
> +  /* The cost of the scalar code.  */
> +  class vector_costs *scalar_costs;
> +
>    /* Threshold of number of iterations below which vectorization will not be
>       performed. It is calculated from MIN_PROFITABLE_ITERS and
>       param_min_vect_loop_bound.  */
> @@ -721,9 +724,6 @@ public:
>       applied to the loop, i.e., no unrolling is needed, this is 1.  */
>    poly_uint64 slp_unrolling_factor;
>
> -  /* Cost of a single scalar iteration.  */
> -  int single_scalar_iteration_cost;
> -
>    /* The factor used to over weight those statements in an inner loop
>       relative to the loop being vectorized.  */
>    unsigned int inner_loop_cost_factor;
> @@ -843,7 +843,6 @@ public:
>  #define LOOP_VINFO_SCALAR_LOOP_SCALING(L)  (L)->scalar_loop_scaling
>  #define LOOP_VINFO_HAS_MASK_STORE(L)       (L)->has_mask_store
>  #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
> -#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
>  #define LOOP_VINFO_ORIG_LOOP_INFO(L)       (L)->orig_loop_info
>  #define LOOP_VINFO_SIMD_IF_COND(L)         (L)->simd_if_cond
>  #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
> @@ -1438,6 +1437,7 @@ public:
>    unsigned int body_cost () const;
>    unsigned int epilogue_cost () const;
>    unsigned int outside_cost () const;
> +  unsigned int total_cost () const;
>
>  protected:
>    unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
> @@ -1508,6 +1508,15 @@ vector_costs::outside_cost () const
>    return prologue_cost () + epilogue_cost ();
>  }
>
> +/* Return the cost of the prologue, body and epilogue code
> +   (in abstract units).  */
> +
> +inline unsigned int
> +vector_costs::total_cost () const
> +{
> +  return body_cost () + outside_cost ();
> +}
> +
>  #define VECT_MAX_COST 1000
>
>  /* The maximum number of intermediate steps required in multi-step type
> --
> 2.25.1
>
  

Patch

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index c9ee2e15e35..887275a5071 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -822,6 +822,7 @@  _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
     num_iters_unchanged (NULL_TREE),
     num_iters_assumptions (NULL_TREE),
     vector_costs (nullptr),
+    scalar_costs (nullptr),
     th (0),
     versioning_threshold (0),
     vectorization_factor (0),
@@ -839,7 +840,6 @@  _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
     ivexpr_map (NULL),
     scan_map (NULL),
     slp_unrolling_factor (1),
-    single_scalar_iteration_cost (0),
     inner_loop_cost_factor (param_vect_inner_loop_cost_factor),
     vectorizable (false),
     can_use_partial_vectors_p (param_vect_partial_vector_usage != 0),
@@ -931,6 +931,7 @@  _loop_vec_info::~_loop_vec_info ()
   delete ivexpr_map;
   delete scan_map;
   epilogue_vinfos.release ();
+  delete scalar_costs;
   delete vector_costs;
 
   /* When we release an epiloge vinfo that we do not intend to use
@@ -1292,20 +1293,15 @@  vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
     }
 
   /* Now accumulate cost.  */
-  vector_costs *target_cost_data = init_cost (loop_vinfo, true);
+  loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
   stmt_info_for_cost *si;
   int j;
   FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
 		    j, si)
-    (void) add_stmt_cost (target_cost_data, si->count,
+    (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count,
 			  si->kind, si->stmt_info, si->vectype,
 			  si->misalign, si->where);
-  unsigned prologue_cost = 0, body_cost = 0, epilogue_cost = 0;
-  finish_cost (target_cost_data, &prologue_cost, &body_cost,
-	       &epilogue_cost);
-  delete target_cost_data;
-  LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo)
-    = prologue_cost + body_cost + epilogue_cost;
+  loop_vinfo->scalar_costs->finish_cost ();
 }
 
 
@@ -3868,8 +3864,7 @@  vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
      TODO: Consider assigning different costs to different scalar
      statements.  */
 
-  scalar_single_iter_cost
-    = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
+  scalar_single_iter_cost = loop_vinfo->scalar_costs->total_cost ();
 
   /* Add additional cost for the peeled instructions in prologue and epilogue
      loop.  (For fully-masked loops there will be no peeling.)
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 0e3aad590e8..8dba3a34aa9 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -590,6 +590,9 @@  public:
   /* The cost of the vector code.  */
   class vector_costs *vector_costs;
 
+  /* The cost of the scalar code.  */
+  class vector_costs *scalar_costs;
+
   /* Threshold of number of iterations below which vectorization will not be
      performed. It is calculated from MIN_PROFITABLE_ITERS and
      param_min_vect_loop_bound.  */
@@ -721,9 +724,6 @@  public:
      applied to the loop, i.e., no unrolling is needed, this is 1.  */
   poly_uint64 slp_unrolling_factor;
 
-  /* Cost of a single scalar iteration.  */
-  int single_scalar_iteration_cost;
-
   /* The factor used to over weight those statements in an inner loop
      relative to the loop being vectorized.  */
   unsigned int inner_loop_cost_factor;
@@ -843,7 +843,6 @@  public:
 #define LOOP_VINFO_SCALAR_LOOP_SCALING(L)  (L)->scalar_loop_scaling
 #define LOOP_VINFO_HAS_MASK_STORE(L)       (L)->has_mask_store
 #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
-#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
 #define LOOP_VINFO_ORIG_LOOP_INFO(L)       (L)->orig_loop_info
 #define LOOP_VINFO_SIMD_IF_COND(L)         (L)->simd_if_cond
 #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor
@@ -1438,6 +1437,7 @@  public:
   unsigned int body_cost () const;
   unsigned int epilogue_cost () const;
   unsigned int outside_cost () const;
+  unsigned int total_cost () const;
 
 protected:
   unsigned int record_stmt_cost (stmt_vec_info, vect_cost_model_location,
@@ -1508,6 +1508,15 @@  vector_costs::outside_cost () const
   return prologue_cost () + epilogue_cost ();
 }
 
+/* Return the cost of the prologue, body and epilogue code
+   (in abstract units).  */
+
+inline unsigned int
+vector_costs::total_cost () const
+{
+  return body_cost () + outside_cost ();
+}
+
 #define VECT_MAX_COST 1000
 
 /* The maximum number of intermediate steps required in multi-step type