[1/9] vect: Move vect_model_load_cost next to the transform in vectorizable_load

Message ID a8597dce488a3301f4b9917249a5a286b925f87c.1686573640.git.linkw@linux.ibm.com
State New
Headers
Series vect: Move costing next to the transform for vect load |

Commit Message

Kewen.Lin June 13, 2023, 2:03 a.m. UTC
  This patch is an initial patch to move costing next to the
transform, it still adopts vect_model_load_cost for costing
but moves and duplicates it down according to the handlings
of different vect_memory_access_types, hope it can make the
subsequent patches easy to review.  This patch should not
have any functional changes.

gcc/ChangeLog:

	* tree-vect-stmts.cc (vectorizable_load): Move and duplicate the call
	to vect_model_load_cost down to some different transform paths
	according to the handlings of different vect_memory_access_types.
---
 gcc/tree-vect-stmts.cc | 86 ++++++++++++++++++++++++++++--------------
 1 file changed, 57 insertions(+), 29 deletions(-)
  

Patch

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index a7acc032d47..44514658be3 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9430,7 +9430,9 @@  vectorizable_load (vec_info *vinfo,
 	}
     }
 
-  if (!vec_stmt) /* transformation not required.  */
+  bool costing_p = !vec_stmt;
+
+  if (costing_p) /* transformation not required.  */
     {
       if (slp_node
 	  && mask
@@ -9464,17 +9466,13 @@  vectorizable_load (vec_info *vinfo,
 	vinfo->any_known_not_updated_vssa = true;
 
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
-      vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
-			    alignment_support_scheme, misalignment,
-			    &gs_info, slp_node, cost_vec);
-      return true;
     }
 
   if (!slp)
     gcc_assert (memory_access_type
 		== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
 
-  if (dump_enabled_p ())
+  if (dump_enabled_p () && !costing_p)
     dump_printf_loc (MSG_NOTE, vect_location,
                      "transform load. ncopies = %d\n", ncopies);
 
@@ -9485,13 +9483,26 @@  vectorizable_load (vec_info *vinfo,
 
   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
     {
-      vect_build_gather_load_calls (vinfo,
-				    stmt_info, gsi, vec_stmt, &gs_info, mask);
+      if (costing_p)
+	vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
+			      alignment_support_scheme, misalignment, &gs_info,
+			      slp_node, cost_vec);
+      else
+	vect_build_gather_load_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
+				      mask);
       return true;
     }
 
   if (memory_access_type == VMAT_INVARIANT)
     {
+      if (costing_p)
+	{
+	  vect_model_load_cost (vinfo, stmt_info, ncopies, vf,
+				memory_access_type, alignment_support_scheme,
+				misalignment, &gs_info, slp_node, cost_vec);
+	  return true;
+	}
+
       gcc_assert (!grouped_load && !mask && !bb_vinfo);
       /* If we have versioned for aliasing or the loop doesn't
 	 have any data dependencies that would preclude this,
@@ -9548,6 +9559,14 @@  vectorizable_load (vec_info *vinfo,
   if (memory_access_type == VMAT_ELEMENTWISE
       || memory_access_type == VMAT_STRIDED_SLP)
     {
+      if (costing_p)
+	{
+	  vect_model_load_cost (vinfo, stmt_info, ncopies, vf,
+				memory_access_type, alignment_support_scheme,
+				misalignment, &gs_info, slp_node, cost_vec);
+	  return true;
+	}
+
       gimple_stmt_iterator incr_gsi;
       bool insert_after;
       tree offvar;
@@ -9989,17 +10008,20 @@  vectorizable_load (vec_info *vinfo,
 	 here, since we can't guarantee first_stmt_info DR has been
 	 initialized yet, use first_stmt_info_for_drptr DR by bumping the
 	 distance from first_stmt_info DR instead as below.  */
-      if (!diff_first_stmt_info)
-	msq = vect_setup_realignment (vinfo,
-				      first_stmt_info, gsi, &realignment_token,
-				      alignment_support_scheme, NULL_TREE,
-				      &at_loop);
-      if (alignment_support_scheme == dr_explicit_realign_optimized)
-	{
-	  phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
-	  offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
-			       size_one_node);
-	  gcc_assert (!first_stmt_info_for_drptr);
+      if (!costing_p)
+	{
+	  if (!diff_first_stmt_info)
+	    msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
+					  &realignment_token,
+					  alignment_support_scheme, NULL_TREE,
+					  &at_loop);
+	  if (alignment_support_scheme == dr_explicit_realign_optimized)
+	    {
+	      phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
+	      offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
+				   size_one_node);
+	      gcc_assert (!first_stmt_info_for_drptr);
+	    }
 	}
     }
   else
@@ -10020,8 +10042,9 @@  vectorizable_load (vec_info *vinfo,
   else if (memory_access_type == VMAT_GATHER_SCATTER)
     {
       aggr_type = elem_type;
-      vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
-				       &bump, &vec_offset);
+      if (!costing_p)
+	vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info, &bump,
+					 &vec_offset);
     }
   else
     {
@@ -10035,7 +10058,7 @@  vectorizable_load (vec_info *vinfo,
 
   auto_vec<tree> vec_offsets;
   auto_vec<tree> vec_masks;
-  if (mask)
+  if (mask && !costing_p)
     {
       if (slp_node)
 	vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
@@ -10049,7 +10072,7 @@  vectorizable_load (vec_info *vinfo,
   for (j = 0; j < ncopies; j++)
     {
       /* 1. Create the vector or array pointer update chain.  */
-      if (j == 0)
+      if (j == 0 && !costing_p)
 	{
 	  bool simd_lane_access_p
 	    = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
@@ -10108,7 +10131,7 @@  vectorizable_load (vec_info *vinfo,
 	  if (mask)
 	    vec_mask = vec_masks[0];
 	}
-      else
+      else if (!costing_p)
 	{
 	  gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
 	  if (dataref_offset)
@@ -10125,7 +10148,7 @@  vectorizable_load (vec_info *vinfo,
 	dr_chain.create (vec_num);
 
       gimple *new_stmt = NULL;
-      if (memory_access_type == VMAT_LOAD_STORE_LANES)
+      if (memory_access_type == VMAT_LOAD_STORE_LANES && !costing_p)
 	{
 	  tree vec_array;
 
@@ -10177,7 +10200,7 @@  vectorizable_load (vec_info *vinfo,
 	  /* Record that VEC_ARRAY is now dead.  */
 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
 	}
-      else
+      else if (!costing_p)
 	{
 	  for (i = 0; i < vec_num; i++)
 	    {
@@ -10631,7 +10654,7 @@  vectorizable_load (vec_info *vinfo,
       if (slp && !slp_perm)
 	continue;
 
-      if (slp_perm)
+      if (slp_perm && !costing_p)
         {
 	  unsigned n_perms;
 	  /* For SLP we know we've seen all possible uses of dr_chain so
@@ -10643,7 +10666,7 @@  vectorizable_load (vec_info *vinfo,
 						  nullptr, true);
 	  gcc_assert (ok);
         }
-      else
+      else if (!costing_p)
         {
           if (grouped_load)
   	    {
@@ -10659,9 +10682,14 @@  vectorizable_load (vec_info *vinfo,
         }
       dr_chain.release ();
     }
-  if (!slp)
+  if (!slp && !costing_p)
     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
 
+  if (costing_p)
+    vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
+			  alignment_support_scheme, misalignment, &gs_info,
+			  slp_node, cost_vec);
+
   return true;
 }