The PR uncovers unchecked constraints on the ability to code-generate
with SLP but also latent issues with regard to stmt order checking
since loop (early-break) and BB (for quite some time) vectorization
are no longer constraint to single-BBs. In particular get_later_stmt
simply compares UIDs of stmts, but that's only reliable when they
are in the same BB.
For the PR in question the problematical case is demoting a SLP node
to external which fails to check we can actually code generate this
in the way we do (using get_later_stmt). The following thus adds
checking that we demote to external only when all defs are from
the same BB.
We no longer vectorize gcc.dg/vect/bb-slp-49.c but the testcase was
for a wrong-code issue and the vectorization done is a no-op.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
PR tree-optimization/116352
PR tree-optimization/117876
* tree-vect-slp.cc (vect_slp_can_convert_to_external): New.
(vect_slp_convert_to_external): Call it.
(vect_build_slp_tree_2): Likewise.
* gcc.dg/vect/pr116352.c: New testcase.
* gcc.dg/vect/bb-slp-49.c: Remove vectorization check.
---
gcc/testsuite/gcc.dg/vect/bb-slp-49.c | 3 +--
gcc/testsuite/gcc.dg/vect/pr116352.c | 34 +++++++++++++++++++++++++++
gcc/tree-vect-slp.cc | 29 ++++++++++++++++++-----
3 files changed, 58 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr116352.c
@@ -23,6 +23,5 @@ main ()
return 0;
}
-/* See that we vectorize an SLP instance. */
+/* See that we try to vectorize an SLP instance. */
/* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */
-/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
new file mode 100644
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+static void addPrior(float center_x, float center_y, float width, float height,
+ bool normalized, float *dst)
+{
+ if (normalized)
+ {
+ dst[0] = (center_x - width * 0.5f);
+ dst[1] = (center_y - height * 0.5f);
+ dst[2] = (center_x + width * 0.5f);
+ dst[3] = (center_y + height * 0.5f);
+ }
+ else
+ {
+ dst[0] = center_x - width * 0.5f;
+ dst[1] = center_y - height * 0.5f;
+ dst[2] = center_x + width * 0.5f - 1.0f;
+ dst[3] = center_y + height * 0.5f - 1.0f;
+ }
+}
+void forward(float *outputPtr, int _offsetsXs, float *_offsetsX,
+ float *_offsetsY, float _stepX, float _stepY,
+ bool _bboxesNormalized, float _boxWidth, float _boxHeight)
+{
+ for (int j = 0; j < _offsetsXs; ++j)
+ {
+ float center_x = (_offsetsX[j]) * _stepX;
+ float center_y = (_offsetsY[j]) * _stepY;
+ addPrior(center_x, center_y, _boxWidth, _boxHeight, _bboxesNormalized,
+ outputPtr);
+ outputPtr += 4;
+ }
+}
@@ -67,6 +67,7 @@ static int vectorizable_slp_permutation_1 (vec_info *, gimple_stmt_iterator *,
static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
slp_tree, stmt_vector_for_cost *);
static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree);
+static bool vect_slp_can_convert_to_external (const vec<stmt_vec_info> &);
static object_allocator<_slp_tree> *slp_tree_pool;
static slp_tree slp_first_node;
@@ -2887,7 +2888,8 @@ fail:
for (j = 0; j < group_size; ++j)
if (!matches[j])
break;
- if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype)))
+ if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype))
+ && vect_slp_can_convert_to_external (oprnd_info->def_stmts))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -7764,6 +7766,24 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
node, node_instance, cost_vec);
}
+/* Verify if we can externalize a set of internal defs. */
+
+static bool
+vect_slp_can_convert_to_external (const vec<stmt_vec_info> &stmts)
+{
+ basic_block bb = NULL;
+ for (stmt_vec_info stmt : stmts)
+ if (!stmt)
+ return false;
+ /* Constant generation uses get_later_stmt which can only handle
+ defs from the same BB. */
+ else if (!bb)
+ bb = gimple_bb (stmt->stmt);
+ else if (gimple_bb (stmt->stmt) != bb)
+ return false;
+ return true;
+}
+
/* Try to build NODE from scalars, returning true on success.
NODE_INSTANCE is the SLP instance that contains NODE. */
@@ -7779,13 +7799,10 @@ vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
|| !SLP_TREE_SCALAR_STMTS (node).exists ()
|| vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node))
/* Force the mask use to be built from scalars instead. */
- || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)))
+ || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node))
+ || !vect_slp_can_convert_to_external (SLP_TREE_SCALAR_STMTS (node)))
return false;
- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
- if (!stmt_info)
- return false;
-
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"Building vector operands of %p from scalars instead\n",