[1/4] vect: Add a unified vect_get_num_copies for slp and non-slp
Checks
Commit Message
Extend original vect_get_num_copies (pure loop-based) to calculate number of
vector stmts for slp node regarding a generic vect region.
Thanks,
Feng
---
gcc/
* tree-vectorizer.h (vect_get_num_copies): New overload function.
(vect_get_slp_num_vectors): New function.
* tree-vect-slp.cc (vect_slp_analyze_node_operations_1): Calculate
number of vector stmts for slp node with vect_get_num_copies.
(vect_slp_analyze_node_operations): Calculate number of vector elements
for constant/external slp node with vect_get_num_copies.
---
gcc/tree-vect-slp.cc | 19 +++----------------
gcc/tree-vectorizer.h | 29 ++++++++++++++++++++++++++++-
2 files changed, 31 insertions(+), 17 deletions(-)
Comments
On Sat, Jul 13, 2024 at 5:46 PM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> Extend original vect_get_num_copies (pure loop-based) to calculate number of
> vector stmts for slp node regarding a generic vect region.
>
> Thanks,
> Feng
> ---
> gcc/
> * tree-vectorizer.h (vect_get_num_copies): New overload function.
> (vect_get_slp_num_vectors): New function.
> * tree-vect-slp.cc (vect_slp_analyze_node_operations_1): Calculate
> number of vector stmts for slp node with vect_get_num_copies.
> (vect_slp_analyze_node_operations): Calculate number of vector elements
> for constant/external slp node with vect_get_num_copies.
> ---
> gcc/tree-vect-slp.cc | 19 +++----------------
> gcc/tree-vectorizer.h | 29 ++++++++++++++++++++++++++++-
> 2 files changed, 31 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index d0a8531fd3b..4dadbc6854d 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -6573,17 +6573,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
> }
> }
> else
> - {
> - poly_uint64 vf;
> - if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> - vf = loop_vinfo->vectorization_factor;
> - else
> - vf = 1;
> - unsigned int group_size = SLP_TREE_LANES (node);
> - tree vectype = SLP_TREE_VECTYPE (node);
> - SLP_TREE_NUMBER_OF_VEC_STMTS (node)
> - = vect_get_num_vectors (vf * group_size, vectype);
> - }
> + SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
>
> /* Handle purely internal nodes. */
> if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
> @@ -6851,12 +6841,9 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
> && j == 1);
> continue;
> }
> - unsigned group_size = SLP_TREE_LANES (child);
> - poly_uint64 vf = 1;
> - if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> - vf = loop_vinfo->vectorization_factor;
> +
> SLP_TREE_NUMBER_OF_VEC_STMTS (child)
> - = vect_get_num_vectors (vf * group_size, vector_type);
> + = vect_get_num_copies (vinfo, child);
> /* And cost them. */
> vect_prologue_cost_for_slp (child, cost_vec);
> }
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 8eb3ec4df86..09923b9b440 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2080,6 +2080,33 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
> return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
> }
>
> +/* Return the number of vectors in the context of vectorization region VINFO,
> + needed for a group of total SIZE statements that are supposed to be
> + interleaved together with no gap, and all operate on vectors of type
> + VECTYPE. If NULL, SLP_TREE_VECTYPE of NODE is used. */
> +
> +inline unsigned int
> +vect_get_num_copies (vec_info *vinfo, slp_tree node, tree vectype = NULL)
> +{
> + poly_uint64 vf;
> +
> + if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> + else
> + vf = 1;
> +
> + if (node)
> + {
> + vf *= SLP_TREE_LANES (node);
> + if (!vectype)
> + vectype = SLP_TREE_VECTYPE (node);
> + }
> + else
> + gcc_checking_assert (vectype);
can you make the checking assert unconditional?
OK with that change. vect_get_num_vectors will ICE anyway
I guess, so at your choice remove the assert completely.
Thanks,
Richard.
> +
> + return vect_get_num_vectors (vf, vectype);
> +}
> +
> /* Return the number of copies needed for loop vectorization when
> a statement operates on vectors of type VECTYPE. This is the
> vectorization factor divided by the number of elements in
> @@ -2088,7 +2115,7 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
> inline unsigned int
> vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
> {
> - return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype);
> + return vect_get_num_copies (loop_vinfo, NULL, vectype);
> }
>
> /* Update maximum unit count *MAX_NUNITS so that it accounts for
> --
> 2.17.1
>> +inline unsigned int
>> +vect_get_num_copies (vec_info *vinfo, slp_tree node, tree vectype = NULL)
>> +{
>> + poly_uint64 vf;
>> +
>> + if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
>> + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
>> + else
>> + vf = 1;
>> +
>> + if (node)
>> + {
>> + vf *= SLP_TREE_LANES (node);
>> + if (!vectype)
>> + vectype = SLP_TREE_VECTYPE (node);
>> + }
>> + else
>> + gcc_checking_assert (vectype);
>
> can you make the checking assert unconditional?
>
> OK with that change. vect_get_num_vectors will ICE anyway
> I guess, so at your choice remove the assert completely.
>
OK, I removed the assert.
Thanks,
Feng
________________________________________
From: Richard Biener <richard.guenther@gmail.com>
Sent: Monday, July 15, 2024 10:00 PM
To: Feng Xue OS
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH 1/4] vect: Add a unified vect_get_num_copies for slp and non-slp
On Sat, Jul 13, 2024 at 5:46 PM Feng Xue OS <fxue@os.amperecomputing.com> wrote:
>
> Extend original vect_get_num_copies (pure loop-based) to calculate number of
> vector stmts for slp node regarding a generic vect region.
>
> Thanks,
> Feng
> ---
> gcc/
> * tree-vectorizer.h (vect_get_num_copies): New overload function.
> (vect_get_slp_num_vectors): New function.
> * tree-vect-slp.cc (vect_slp_analyze_node_operations_1): Calculate
> number of vector stmts for slp node with vect_get_num_copies.
> (vect_slp_analyze_node_operations): Calculate number of vector elements
> for constant/external slp node with vect_get_num_copies.
> ---
> gcc/tree-vect-slp.cc | 19 +++----------------
> gcc/tree-vectorizer.h | 29 ++++++++++++++++++++++++++++-
> 2 files changed, 31 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index d0a8531fd3b..4dadbc6854d 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -6573,17 +6573,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
> }
> }
> else
> - {
> - poly_uint64 vf;
> - if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> - vf = loop_vinfo->vectorization_factor;
> - else
> - vf = 1;
> - unsigned int group_size = SLP_TREE_LANES (node);
> - tree vectype = SLP_TREE_VECTYPE (node);
> - SLP_TREE_NUMBER_OF_VEC_STMTS (node)
> - = vect_get_num_vectors (vf * group_size, vectype);
> - }
> + SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
>
> /* Handle purely internal nodes. */
> if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
> @@ -6851,12 +6841,9 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
> && j == 1);
> continue;
> }
> - unsigned group_size = SLP_TREE_LANES (child);
> - poly_uint64 vf = 1;
> - if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> - vf = loop_vinfo->vectorization_factor;
> +
> SLP_TREE_NUMBER_OF_VEC_STMTS (child)
> - = vect_get_num_vectors (vf * group_size, vector_type);
> + = vect_get_num_copies (vinfo, child);
> /* And cost them. */
> vect_prologue_cost_for_slp (child, cost_vec);
> }
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 8eb3ec4df86..09923b9b440 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -2080,6 +2080,33 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
> return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
> }
>
> +/* Return the number of vectors in the context of vectorization region VINFO,
> + needed for a group of total SIZE statements that are supposed to be
> + interleaved together with no gap, and all operate on vectors of type
> + VECTYPE. If NULL, SLP_TREE_VECTYPE of NODE is used. */
> +
> +inline unsigned int
> +vect_get_num_copies (vec_info *vinfo, slp_tree node, tree vectype = NULL)
> +{
> + poly_uint64 vf;
> +
> + if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
> + vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> + else
> + vf = 1;
> +
> + if (node)
> + {
> + vf *= SLP_TREE_LANES (node);
> + if (!vectype)
> + vectype = SLP_TREE_VECTYPE (node);
> + }
> + else
> + gcc_checking_assert (vectype);
can you make the checking assert unconditional?
OK with that change. vect_get_num_vectors will ICE anyway
I guess, so at your choice remove the assert completely.
Thanks,
Richard.
> +
> + return vect_get_num_vectors (vf, vectype);
> +}
> +
> /* Return the number of copies needed for loop vectorization when
> a statement operates on vectors of type VECTYPE. This is the
> vectorization factor divided by the number of elements in
> @@ -2088,7 +2115,7 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
> inline unsigned int
> vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
> {
> - return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype);
> + return vect_get_num_copies (loop_vinfo, NULL, vectype);
> }
>
> /* Update maximum unit count *MAX_NUNITS so that it accounts for
> --
> 2.17.1
@@ -6573,17 +6573,7 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
}
}
else
- {
- poly_uint64 vf;
- if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
- vf = loop_vinfo->vectorization_factor;
- else
- vf = 1;
- unsigned int group_size = SLP_TREE_LANES (node);
- tree vectype = SLP_TREE_VECTYPE (node);
- SLP_TREE_NUMBER_OF_VEC_STMTS (node)
- = vect_get_num_vectors (vf * group_size, vectype);
- }
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vect_get_num_copies (vinfo, node);
/* Handle purely internal nodes. */
if (SLP_TREE_CODE (node) == VEC_PERM_EXPR)
@@ -6851,12 +6841,9 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
&& j == 1);
continue;
}
- unsigned group_size = SLP_TREE_LANES (child);
- poly_uint64 vf = 1;
- if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
- vf = loop_vinfo->vectorization_factor;
+
SLP_TREE_NUMBER_OF_VEC_STMTS (child)
- = vect_get_num_vectors (vf * group_size, vector_type);
+ = vect_get_num_copies (vinfo, child);
/* And cost them. */
vect_prologue_cost_for_slp (child, cost_vec);
}
@@ -2080,6 +2080,33 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
}
+/* Return the number of vectors in the context of vectorization region VINFO,
+ needed for a group of total SIZE statements that are supposed to be
+ interleaved together with no gap, and all operate on vectors of type
+ VECTYPE. If NULL, SLP_TREE_VECTYPE of NODE is used. */
+
+inline unsigned int
+vect_get_num_copies (vec_info *vinfo, slp_tree node, tree vectype = NULL)
+{
+ poly_uint64 vf;
+
+ if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+ vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ else
+ vf = 1;
+
+ if (node)
+ {
+ vf *= SLP_TREE_LANES (node);
+ if (!vectype)
+ vectype = SLP_TREE_VECTYPE (node);
+ }
+ else
+ gcc_checking_assert (vectype);
+
+ return vect_get_num_vectors (vf, vectype);
+}
+
/* Return the number of copies needed for loop vectorization when
a statement operates on vectors of type VECTYPE. This is the
vectorization factor divided by the number of elements in
@@ -2088,7 +2115,7 @@ vect_get_num_vectors (poly_uint64 nunits, tree vectype)
inline unsigned int
vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
{
- return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype);
+ return vect_get_num_copies (loop_vinfo, NULL, vectype);
}
/* Update maximum unit count *MAX_NUNITS so that it accounts for