[1/3] tree-optimization/104582 - Simplify vectorizer cost API and fixes

Message ID 20220218135830.6044113C91@imap2.suse-dmz.suse.de
State New
Headers
Series [1/3] tree-optimization/104582 - Simplify vectorizer cost API and fixes |

Commit Message

Richard Biener Feb. 18, 2022, 1:58 p.m. UTC
  This simplifies the vectorizer cost API by providing overloads
to add_stmt_cost and record_stmt_cost suitable for scalar stmt
and branch stmt costing which do not need information like
a vector type or alignment.  It also fixes two mistakes where
costs for versioning tests were recorded as vector stmt rather
than scalar stmt.

This is a first patch to simplify the actual fix for PR104582.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK?

2022-02-18  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/104582
	* tree-vectorizer.h (add_stmt_cost): New overload.
	(record_stmt_cost): Likewise.
	* tree-vect-loop.cc (vect_compute_single_scalar_iteration_cost):
	Use add_stmt_costs.
	(vect_get_known_peeling_cost): Use new overloads.
	(vect_estimate_min_profitable_iters): Likewise.  Consistently
	use scalar_stmt for costing versioning checks.
	* tree-vect-stmts.cc (record_stmt_cost): New overload.
---
 gcc/tree-vect-loop.cc  | 39 +++++++++++++++------------------------
 gcc/tree-vect-stmts.cc | 10 ++++++++++
 gcc/tree-vectorizer.h  | 12 ++++++++++++
 3 files changed, 37 insertions(+), 24 deletions(-)
  

Comments

Richard Sandiford Feb. 21, 2022, 10 a.m. UTC | #1
Richard Biener <rguenther@suse.de> writes:
> This simplifies the vectorizer cost API by providing overloads
> to add_stmt_cost and record_stmt_cost suitable for scalar stmt
> and branch stmt costing which do not need information like
> a vector type or alignment.  It also fixes two mistakes where
> costs for versioning tests were recorded as vector stmt rather
> than scalar stmt.
>
> This is a first patch to simplify the actual fix for PR104582.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?

LGTM.  Maybe the record_stmt_cost overload should accept scalar_stmt
too though?  It seems inconsistent to allow it only for add_stmt_cost.

Not a strong opinion though, just a suggestion.

Thanks,
Richard

> 2022-02-18  Richard Biener  <rguenther@suse.de>
>
> 	PR tree-optimization/104582
> 	* tree-vectorizer.h (add_stmt_cost): New overload.
> 	(record_stmt_cost): Likewise.
> 	* tree-vect-loop.cc (vect_compute_single_scalar_iteration_cost):
> 	Use add_stmt_costs.
> 	(vect_get_known_peeling_cost): Use new overloads.
> 	(vect_estimate_min_profitable_iters): Likewise.  Consistently
> 	use scalar_stmt for costing versioning checks.
> 	* tree-vect-stmts.cc (record_stmt_cost): New overload.
> ---
>  gcc/tree-vect-loop.cc  | 39 +++++++++++++++------------------------
>  gcc/tree-vect-stmts.cc | 10 ++++++++++
>  gcc/tree-vectorizer.h  | 12 ++++++++++++
>  3 files changed, 37 insertions(+), 24 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 896218f23ea..5c7b163f01c 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -1323,13 +1323,8 @@ vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
>  
>    /* Now accumulate cost.  */
>    loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
> -  stmt_info_for_cost *si;
> -  int j;
> -  FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
> -		    j, si)
> -    (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count,
> -			  si->kind, si->stmt_info, si->vectype,
> -			  si->misalign, si->where);
> +  add_stmt_costs (loop_vinfo->scalar_costs,
> +		  &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo));
>    loop_vinfo->scalar_costs->finish_cost (nullptr);
>  }
>  
> @@ -3873,10 +3868,10 @@ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
>  	 iterations are unknown, count a taken branch per peeled loop.  */
>        if (peel_iters_prologue > 0)
>  	retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
> -				   NULL, NULL_TREE, 0, vect_prologue);
> +				   vect_prologue);
>        if (*peel_iters_epilogue > 0)
>  	retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
> -				    NULL, NULL_TREE, 0, vect_epilogue);
> +				    vect_epilogue);
>      }
>  
>    stmt_info_for_cost *si;
> @@ -3946,8 +3941,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>      {
>        /*  FIXME: Make cost depend on complexity of individual check.  */
>        unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
> -      (void) add_stmt_cost (target_cost_data, len, vector_stmt,
> -			    NULL, NULL_TREE, 0, vect_prologue);
> +      (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
>        if (dump_enabled_p ())
>  	dump_printf (MSG_NOTE,
>  		     "cost model: Adding cost of checks for loop "
> @@ -3959,13 +3953,12 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>      {
>        /*  FIXME: Make cost depend on complexity of individual check.  */
>        unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
> -      (void) add_stmt_cost (target_cost_data, len, vector_stmt,
> -			    NULL, NULL_TREE, 0, vect_prologue);
> +      (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
>        len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
>        if (len)
>  	/* Count LEN - 1 ANDs and LEN comparisons.  */
>  	(void) add_stmt_cost (target_cost_data, len * 2 - 1,
> -			      scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
> +			      scalar_stmt, vect_prologue);
>        len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
>        if (len)
>  	{
> @@ -3976,7 +3969,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>  	    if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
>  	      nstmts += 1;
>  	  (void) add_stmt_cost (target_cost_data, nstmts,
> -				scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
> +				scalar_stmt, vect_prologue);
>  	}
>        if (dump_enabled_p ())
>  	dump_printf (MSG_NOTE,
> @@ -3998,7 +3991,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>  
>    if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
>      (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
> -			  NULL, NULL_TREE, 0, vect_prologue);
> +			  vect_prologue);
>  
>    /* Count statements in scalar loop.  Using this as scalar cost for a single
>       iteration for now.
> @@ -4104,21 +4097,19 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>  
>    if (prologue_need_br_taken_cost)
>      (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
> -			  NULL, NULL_TREE, 0, vect_prologue);
> +			  vect_prologue);
>  
>    if (prologue_need_br_not_taken_cost)
>      (void) add_stmt_cost (target_cost_data, 1,
> -			  cond_branch_not_taken, NULL, NULL_TREE, 0,
> -			  vect_prologue);
> +			  cond_branch_not_taken, vect_prologue);
>  
>    if (epilogue_need_br_taken_cost)
>      (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
> -			  NULL, NULL_TREE, 0, vect_epilogue);
> +			  vect_epilogue);
>  
>    if (epilogue_need_br_not_taken_cost)
>      (void) add_stmt_cost (target_cost_data, 1,
> -			  cond_branch_not_taken, NULL, NULL_TREE, 0,
> -			  vect_epilogue);
> +			  cond_branch_not_taken, vect_epilogue);
>  
>    /* Take care of special costs for rgroup controls of partial vectors.  */
>    if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
> @@ -4204,9 +4195,9 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
>  	  }
>  
>        (void) add_stmt_cost (target_cost_data, prologue_stmts,
> -			    scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
> +			    scalar_stmt, vect_prologue);
>        (void) add_stmt_cost (target_cost_data, body_stmts,
> -			    scalar_stmt, NULL, NULL_TREE, 0, vect_body);
> +			    scalar_stmt, vect_body);
>      }
>  
>    /* FORNOW: The scalar outside cost is incremented in one of the
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 000a0f4b47e..acb9fa611c1 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -109,6 +109,16 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
>        (builtin_vectorization_cost (kind, vectype, misalign) * count);
>  }
>  
> +unsigned
> +record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
> +		  enum vect_cost_for_stmt kind,
> +		  enum vect_cost_model_location where)
> +{
> +  gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken);
> +  return record_stmt_cost (body_cost_vec, count, kind, NULL,
> +			   NULL_TREE, 0, where);
> +}
> +
>  /* Return a variable of type ELEM_TYPE[NELEMS].  */
>  
>  static tree
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index ddd0637185c..da99f28c0dc 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1762,6 +1762,15 @@ add_stmt_cost (vector_costs *costs, int count,
>    return cost;
>  }
>  
> +static inline unsigned
> +add_stmt_cost (vector_costs *costs, int count, enum vect_cost_for_stmt kind,
> +	       enum vect_cost_model_location where)
> +{
> +  gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
> +	      || kind == scalar_stmt);
> +  return add_stmt_cost (costs, count, kind, NULL, NULL_TREE, 0, where);
> +}
> +
>  /* Alias targetm.vectorize.add_stmt_cost.  */
>  
>  static inline unsigned
> @@ -2120,6 +2129,9 @@ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
>  extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
>  				  enum vect_cost_for_stmt, stmt_vec_info,
>  				  tree, int, enum vect_cost_model_location);
> +extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
> +				  enum vect_cost_for_stmt,
> +				  enum vect_cost_model_location);
>  
>  /* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO.  */
  

Patch

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 896218f23ea..5c7b163f01c 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1323,13 +1323,8 @@  vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
 
   /* Now accumulate cost.  */
   loop_vinfo->scalar_costs = init_cost (loop_vinfo, true);
-  stmt_info_for_cost *si;
-  int j;
-  FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
-		    j, si)
-    (void) add_stmt_cost (loop_vinfo->scalar_costs, si->count,
-			  si->kind, si->stmt_info, si->vectype,
-			  si->misalign, si->where);
+  add_stmt_costs (loop_vinfo->scalar_costs,
+		  &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo));
   loop_vinfo->scalar_costs->finish_cost (nullptr);
 }
 
@@ -3873,10 +3868,10 @@  vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
 	 iterations are unknown, count a taken branch per peeled loop.  */
       if (peel_iters_prologue > 0)
 	retval = record_stmt_cost (prologue_cost_vec, 1, cond_branch_taken,
-				   NULL, NULL_TREE, 0, vect_prologue);
+				   vect_prologue);
       if (*peel_iters_epilogue > 0)
 	retval += record_stmt_cost (epilogue_cost_vec, 1, cond_branch_taken,
-				    NULL, NULL_TREE, 0, vect_epilogue);
+				    vect_epilogue);
     }
 
   stmt_info_for_cost *si;
@@ -3946,8 +3941,7 @@  vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
     {
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
-      (void) add_stmt_cost (target_cost_data, len, vector_stmt,
-			    NULL, NULL_TREE, 0, vect_prologue);
+      (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
 		     "cost model: Adding cost of checks for loop "
@@ -3959,13 +3953,12 @@  vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
     {
       /*  FIXME: Make cost depend on complexity of individual check.  */
       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
-      (void) add_stmt_cost (target_cost_data, len, vector_stmt,
-			    NULL, NULL_TREE, 0, vect_prologue);
+      (void) add_stmt_cost (target_cost_data, len, scalar_stmt, vect_prologue);
       len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
       if (len)
 	/* Count LEN - 1 ANDs and LEN comparisons.  */
 	(void) add_stmt_cost (target_cost_data, len * 2 - 1,
-			      scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
+			      scalar_stmt, vect_prologue);
       len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
       if (len)
 	{
@@ -3976,7 +3969,7 @@  vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	    if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
 	      nstmts += 1;
 	  (void) add_stmt_cost (target_cost_data, nstmts,
-				scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
+				scalar_stmt, vect_prologue);
 	}
       if (dump_enabled_p ())
 	dump_printf (MSG_NOTE,
@@ -3998,7 +3991,7 @@  vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 
   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
-			  NULL, NULL_TREE, 0, vect_prologue);
+			  vect_prologue);
 
   /* Count statements in scalar loop.  Using this as scalar cost for a single
      iteration for now.
@@ -4104,21 +4097,19 @@  vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 
   if (prologue_need_br_taken_cost)
     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
-			  NULL, NULL_TREE, 0, vect_prologue);
+			  vect_prologue);
 
   if (prologue_need_br_not_taken_cost)
     (void) add_stmt_cost (target_cost_data, 1,
-			  cond_branch_not_taken, NULL, NULL_TREE, 0,
-			  vect_prologue);
+			  cond_branch_not_taken, vect_prologue);
 
   if (epilogue_need_br_taken_cost)
     (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken,
-			  NULL, NULL_TREE, 0, vect_epilogue);
+			  vect_epilogue);
 
   if (epilogue_need_br_not_taken_cost)
     (void) add_stmt_cost (target_cost_data, 1,
-			  cond_branch_not_taken, NULL, NULL_TREE, 0,
-			  vect_epilogue);
+			  cond_branch_not_taken, vect_epilogue);
 
   /* Take care of special costs for rgroup controls of partial vectors.  */
   if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
@@ -4204,9 +4195,9 @@  vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
 	  }
 
       (void) add_stmt_cost (target_cost_data, prologue_stmts,
-			    scalar_stmt, NULL, NULL_TREE, 0, vect_prologue);
+			    scalar_stmt, vect_prologue);
       (void) add_stmt_cost (target_cost_data, body_stmts,
-			    scalar_stmt, NULL, NULL_TREE, 0, vect_body);
+			    scalar_stmt, vect_body);
     }
 
   /* FORNOW: The scalar outside cost is incremented in one of the
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 000a0f4b47e..acb9fa611c1 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -109,6 +109,16 @@  record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
       (builtin_vectorization_cost (kind, vectype, misalign) * count);
 }
 
+unsigned
+record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+		  enum vect_cost_for_stmt kind,
+		  enum vect_cost_model_location where)
+{
+  gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken);
+  return record_stmt_cost (body_cost_vec, count, kind, NULL,
+			   NULL_TREE, 0, where);
+}
+
 /* Return a variable of type ELEM_TYPE[NELEMS].  */
 
 static tree
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index ddd0637185c..da99f28c0dc 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1762,6 +1762,15 @@  add_stmt_cost (vector_costs *costs, int count,
   return cost;
 }
 
+static inline unsigned
+add_stmt_cost (vector_costs *costs, int count, enum vect_cost_for_stmt kind,
+	       enum vect_cost_model_location where)
+{
+  gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
+	      || kind == scalar_stmt);
+  return add_stmt_cost (costs, count, kind, NULL, NULL_TREE, 0, where);
+}
+
 /* Alias targetm.vectorize.add_stmt_cost.  */
 
 static inline unsigned
@@ -2120,6 +2129,9 @@  extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
 				  enum vect_cost_for_stmt, stmt_vec_info,
 				  tree, int, enum vect_cost_model_location);
+extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+				  enum vect_cost_for_stmt,
+				  enum vect_cost_model_location);
 
 /* Overload of record_stmt_cost with VECTYPE derived from STMT_INFO.  */