[1/3] tree-optimization/116609 - SLP live lane vectorization with partial vectors
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
success
|
Test passed
|
Commit Message
The following implements the simple case of single-lane SLP when
using partial vectors which can use the VEC_EXTRACT_LAST code
generation without changes. I'll keep the PR open for further
enhancements.
This avoids FAILs of gcc.target/aarch64/sve/live_1.c when using
single-lane SLP for non-grouped stores.
PR tree-optimization/116609
* tree-vect-loop.cc (vectorizable_live_operation_1): Support
partial vectors for single-lane SLP.
---
gcc/tree-vect-loop.cc | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
@@ -10961,7 +10961,8 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
where VEC_LHS is the vectorized live-out result and MASK is
the loop mask for the final iteration. */
- gcc_assert (ncopies == 1 && !slp_node);
+ gcc_assert (ncopies == 1
+ && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
gimple_seq tem = NULL;
gimple_stmt_iterator gsi = gsi_last (tem);
tree len = vect_get_loop_len (loop_vinfo, &gsi,
@@ -10995,7 +10996,7 @@ vectorizable_live_operation_1 (loop_vec_info loop_vinfo,
where VEC_LHS is the vectorized live-out result and MASK is
the loop mask for the final iteration. */
- gcc_assert (!slp_node);
+ gcc_assert (!slp_node || SLP_TREE_LANES (slp_node) == 1);
tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
gimple_seq tem = NULL;
gimple_stmt_iterator gsi = gsi_last (tem);
@@ -11147,7 +11148,7 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
/* No transformation required. */
if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
{
- if (slp_node)
+ if (slp_node && SLP_TREE_LANES (slp_node) != 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11156,7 +11157,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
"the loop.\n");
LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
}
- else if (ncopies > 1)
+ else if (ncopies > 1
+ || (slp_node && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) > 1))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -11166,7 +11168,8 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
}
else
{
- gcc_assert (ncopies == 1 && !slp_node);
+ gcc_assert (ncopies == 1
+ && (!slp_node || SLP_TREE_LANES (slp_node) == 1));
if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
OPTIMIZE_FOR_SPEED))
vect_record_loop_mask (loop_vinfo,
@@ -11213,8 +11216,9 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info,
if (slp_node)
{
gcc_assert (!loop_vinfo
- || (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
- && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)));
+ || ((!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ && !LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ || SLP_TREE_LANES (slp_node) == 1));
/* Get the correct slp vectorized stmt. */
vec_lhs = SLP_TREE_VEC_DEFS (slp_node)[vec_entry];