[1/4] vect: Add a unified vect_get_num_copies for slp and non-slp

Message ID LV2PR01MB783955CF2F7292184F08C43BF7A72@LV2PR01MB7839.prod.exchangelabs.com
State New
Headers
Series [1/4] vect: Add a unified vect_get_num_copies for slp and non-slp |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply

Commit Message

Feng Xue OS July 13, 2024, 3:46 p.m. UTC
  Extend original vect_get_num_copies (pure loop-based) to calculate number of
vector stmts for slp node regarding a generic vect region.

Thanks,
Feng
---
gcc/
	* tree-vectorizer.h (vect_get_num_copies): New overload function.
	(vect_get_slp_num_vectors): New function.
	* tree-vect-slp.cc (vect_slp_analyze_node_operations_1): Calculate
	number of vector stmts for slp node with vect_get_num_copies.
	(vect_slp_analyze_node_operations): Calculate number of vector elements
	for constant/external slp node with vect_get_num_copies.
---
 gcc/tree-vect-slp.cc  | 19 +++----------------
 gcc/tree-vectorizer.h | 29 ++++++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 17 deletions(-)
  

Comments

Richard Biener July 15, 2024, 2 p.m. UTC | #1
On Sat, Jul 13, 2024 at 5:46 PM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> Extend original vect_get_num_copies (pure loop-based) to calculate number of
> vector stmts for slp node regarding a generic vect region.
>
> Thanks,
> Feng
> ---
> gcc/
>         * tree-vectorizer.h (vect_get_num_copies): New overload function.
>         (vect_get_slp_num_vectors): New function.
>         * tree-vect-slp.cc (vect_slp_analyze_node_operations_1): Calculate
>         number of vector stmts for slp node with vect_get_num_copies.
>         (vect_slp_analyze_node_operations): Calculate number of vector elements
>         for constant/external slp node with vect_get_num_copies.
> ---
>  gcc/tree-vect-slp.cc  | 19 +++----------------
>  gcc/tree-vectorizer.h | 29 ++++++++++++++++++++++++++++-
>  2 files changed, 31 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index d0a8531fd3b..4dadbc6854d 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -6573,17 +6573,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
>           }
>      }
>    else
> -    {
> -      poly_uint64 vf;
> -      if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> -       vf = loop_vinfo->vectorization_factor;
> -      else
> -       vf = 1;
> -      unsigned int group_size = SLP_TREE_LANES (node);
> -      tree vectype = SLP_TREE_VECTYPE (node);
> -      SLP_TREE_NUMBER_OF_VEC_STMTS (node)
> -       = vect_get_num_vectors (vf * group_size, vectype);
> -    }
> +    SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
>
>    /* Handle purely internal nodes.  */
>    if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
> @@ -6851,12 +6841,9 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
>                           && j == 1);
>               continue;
>             }
> -         unsigned group_size = SLP_TREE_LANES (child);
> -         poly_uint64 vf = 1;
> -         if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> -           vf = loop_vinfo->vectorization_factor;
> +
>           SLP_TREE_NUMBER_OF_VEC_STMTS (child)
> -           = vect_get_num_vectors (vf * group_size, vector_type);
> +               = vect_get_num_copies (vinfo, child);
>           /* And cost them.  */
>           vect_prologue_cost_for_slp (child, cost_vec);
>         }
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 8eb3ec4df86..09923b9b440 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2080,6 +2080,33 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
>    return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
>  }
>
> +/* Return the number of vectors in the context of vectorization region VINFO,
> +   needed for a group of total SIZE statements that are supposed to be
> +   interleaved together with no gap, and all operate on vectors of type
> +   VECTYPE.  If NULL, SLP_TREE_VECTYPE of NODE is used.  */
> +
> +inline unsigned int
> +vect_get_num_copies (vec_info *vinfo, slp_tree node, tree vectype = NULL)
> +{
> +  poly_uint64 vf;
> +
> +  if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> +    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> +  else
> +    vf = 1;
> +
> +  if (node)
> +    {
> +      vf *= SLP_TREE_LANES (node);
> +      if (!vectype)
> +       vectype = SLP_TREE_VECTYPE (node);
> +    }
> +  else
> +    gcc_checking_assert (vectype);

can you make the checking assert unconditional?

OK with that change.  vect_get_num_vectors will ICE anyway
I guess, so at your choice remove the assert completely.

Thanks,
Richard.

> +
> +  return vect_get_num_vectors (vf, vectype);
> +}
> +
>  /* Return the number of copies needed for loop vectorization when
>     a statement operates on vectors of type VECTYPE.  This is the
>     vectorization factor divided by the number of elements in
> @@ -2088,7 +2115,7 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
>  inline unsigned int
>  vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
>  {
> -  return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype);
> +  return vect_get_num_copies (loop_vinfo, NULL, vectype);
>  }
>
>  /* Update maximum unit count *MAX_NUNITS so that it accounts for
> --
> 2.17.1
  
Feng Xue OS July 17, 2024, 2:05 p.m. UTC | #2
>> +inline unsigned int
>> +vect_get_num_copies (vec_info *vinfo, slp_tree node, tree vectype = NULL)
>> +{
>> +  poly_uint64 vf;
>> +
>> +  if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
>> +    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
>> +  else
>> +    vf = 1;
>> +
>> +  if (node)
>> +    {
>> +      vf *= SLP_TREE_LANES (node);
>> +      if (!vectype)
>> +       vectype = SLP_TREE_VECTYPE (node);
>> +    }
>> +  else
>> +    gcc_checking_assert (vectype);
>
> can you make the checking assert unconditional?
>
> OK with that change.  vect_get_num_vectors will ICE anyway
> I guess, so at your choice remove the assert completely.
>

OK, I removed the assert.

Thanks,
Feng

________________________________________
From: Richard Biener <richard.guenther@gmail.com>
Sent: Monday, July 15, 2024 10:00 PM
To: Feng Xue OS
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH 1/4] vect: Add a unified vect_get_num_copies for slp and non-slp

On Sat, Jul 13, 2024 at 5:46 PM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> Extend original vect_get_num_copies (pure loop-based) to calculate number of
> vector stmts for slp node regarding a generic vect region.
>
> Thanks,
> Feng
> ---
> gcc/
>         * tree-vectorizer.h (vect_get_num_copies): New overload function.
>         (vect_get_slp_num_vectors): New function.
>         * tree-vect-slp.cc (vect_slp_analyze_node_operations_1): Calculate
>         number of vector stmts for slp node with vect_get_num_copies.
>         (vect_slp_analyze_node_operations): Calculate number of vector elements
>         for constant/external slp node with vect_get_num_copies.
> ---
>  gcc/tree-vect-slp.cc  | 19 +++----------------
>  gcc/tree-vectorizer.h | 29 ++++++++++++++++++++++++++++-
>  2 files changed, 31 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index d0a8531fd3b..4dadbc6854d 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -6573,17 +6573,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
>           }
>      }
>    else
> -    {
> -      poly_uint64 vf;
> -      if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> -       vf = loop_vinfo->vectorization_factor;
> -      else
> -       vf = 1;
> -      unsigned int group_size = SLP_TREE_LANES (node);
> -      tree vectype = SLP_TREE_VECTYPE (node);
> -      SLP_TREE_NUMBER_OF_VEC_STMTS (node)
> -       = vect_get_num_vectors (vf * group_size, vectype);
> -    }
> +    SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
>
>    /* Handle purely internal nodes.  */
>    if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
> @@ -6851,12 +6841,9 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
>                           && j == 1);
>               continue;
>             }
> -         unsigned group_size = SLP_TREE_LANES (child);
> -         poly_uint64 vf = 1;
> -         if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> -           vf = loop_vinfo->vectorization_factor;
> +
>           SLP_TREE_NUMBER_OF_VEC_STMTS (child)
> -           = vect_get_num_vectors (vf * group_size, vector_type);
> +               = vect_get_num_copies (vinfo, child);
>           /* And cost them.  */
>           vect_prologue_cost_for_slp (child, cost_vec);
>         }
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 8eb3ec4df86..09923b9b440 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2080,6 +2080,33 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
>    return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
>  }
>
> +/* Return the number of vectors in the context of vectorization region VINFO,
> +   needed for a group of total SIZE statements that are supposed to be
> +   interleaved together with no gap, and all operate on vectors of type
> +   VECTYPE.  If NULL, SLP_TREE_VECTYPE of NODE is used.  */
> +
> +inline unsigned int
> +vect_get_num_copies (vec_info *vinfo, slp_tree node, tree vectype = NULL)
> +{
> +  poly_uint64 vf;
> +
> +  if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> +    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> +  else
> +    vf = 1;
> +
> +  if (node)
> +    {
> +      vf *= SLP_TREE_LANES (node);
> +      if (!vectype)
> +       vectype = SLP_TREE_VECTYPE (node);
> +    }
> +  else
> +    gcc_checking_assert (vectype);

can you make the checking assert unconditional?

OK with that change.  vect_get_num_vectors will ICE anyway
I guess, so at your choice remove the assert completely.

Thanks,
Richard.

> +
> +  return vect_get_num_vectors (vf, vectype);
> +}
> +
>  /* Return the number of copies needed for loop vectorization when
>     a statement operates on vectors of type VECTYPE.  This is the
>     vectorization factor divided by the number of elements in
> @@ -2088,7 +2115,7 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
>  inline unsigned int
>  vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
>  {
> -  return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype);
> +  return vect_get_num_copies (loop_vinfo, NULL, vectype);
>  }
>
>  /* Update maximum unit count *MAX_NUNITS so that it accounts for
> --
> 2.17.1
  

Patch

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index d0a8531fd3b..4dadbc6854d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -6573,17 +6573,7 @@  vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
 	  }
     }
   else
-    {
-      poly_uint64 vf;
-      if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
-	vf = loop_vinfo->vectorization_factor;
-      else
-	vf = 1;
-      unsigned int group_size = SLP_TREE_LANES (node);
-      tree vectype = SLP_TREE_VECTYPE (node);
-      SLP_TREE_NUMBER_OF_VEC_STMTS (node)
-	= vect_get_num_vectors (vf * group_size, vectype);
-    }
+    SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
 
   /* Handle purely internal nodes.  */
   if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
@@ -6851,12 +6841,9 @@  vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
 			  && j == 1);
 	      continue;
 	    }
-	  unsigned group_size = SLP_TREE_LANES (child);
-	  poly_uint64 vf = 1;
-	  if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
-	    vf = loop_vinfo->vectorization_factor;
+
 	  SLP_TREE_NUMBER_OF_VEC_STMTS (child)
-	    = vect_get_num_vectors (vf * group_size, vector_type);
+		= vect_get_num_copies (vinfo, child);
 	  /* And cost them.  */
 	  vect_prologue_cost_for_slp (child, cost_vec);
 	}
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 8eb3ec4df86..09923b9b440 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2080,6 +2080,33 @@  vect_get_num_vectors (poly_uint64 nunits, tree vectype)
   return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
 }
 
+/* Return the number of vectors in the context of vectorization region VINFO,
+   needed for a group of total SIZE statements that are supposed to be
+   interleaved together with no gap, and all operate on vectors of type
+   VECTYPE.  If NULL, SLP_TREE_VECTYPE of NODE is used.  */
+
+inline unsigned int
+vect_get_num_copies (vec_info *vinfo, slp_tree node, tree vectype = NULL)
+{
+  poly_uint64 vf;
+
+  if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+    vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  else
+    vf = 1;
+
+  if (node)
+    {
+      vf *= SLP_TREE_LANES (node);
+      if (!vectype)
+	vectype = SLP_TREE_VECTYPE (node);
+    }
+  else
+    gcc_checking_assert (vectype);
+
+  return vect_get_num_vectors (vf, vectype);
+}
+
 /* Return the number of copies needed for loop vectorization when
    a statement operates on vectors of type VECTYPE.  This is the
    vectorization factor divided by the number of elements in
@@ -2088,7 +2115,7 @@  vect_get_num_vectors (poly_uint64 nunits, tree vectype)
 inline unsigned int
 vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
 {
-  return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype);
+  return vect_get_num_copies (loop_vinfo, NULL, vectype);
 }
 
 /* Update maximum unit count *MAX_NUNITS so that it accounts for