@@ -16,5 +16,5 @@
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 1} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 2} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: detected:[^\n]* \(unsigned char\)} "vect" } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -48,5 +48,5 @@ main (void)
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* \+} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* / 2} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: detected:[^\n]* = \(signed char\)} "vect" } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -15,5 +15,5 @@
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* \+} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 1} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: detected:[^\n]* = \(unsigned char\)} "vect" } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -46,5 +46,5 @@ main (void)
adopts realign_load scheme. It requires rs6000_builtin_mask_for_load to
generate mask whose return type is vector char. */
/* { dg-final { scan-tree-dump-not {vector[^\n]*char} "vect" { target vect_hw_misalign } } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -47,5 +47,5 @@ main (void)
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* |} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* <<} "vect" } } */
/* { dg-final { scan-tree-dump {vector[^\n]*char} "vect" } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -49,5 +49,5 @@ main (void)
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* \+ } "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 1} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: detected:[^\n]* \(signed char\)} "vect" } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -13,5 +13,5 @@
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* \+ } "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 1} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: detected:[^\n]* \(unsigned char\)} "vect" } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -51,5 +51,5 @@ main (void)
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* \+ } "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 2} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: detected:[^\n]* \(signed char\)} "vect" } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -16,5 +16,5 @@
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* \+ } "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 2} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: detected:[^\n]* \(unsigned char\)} "vect" } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -56,5 +56,5 @@ main (void)
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 1} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_over_widening_pattern: detected:[^\n]* >> 2} "vect" } } */
/* { dg-final { scan-tree-dump {vect_recog_cast_forwprop_pattern: detected:[^\n]* \(signed char\)} "vect" } } */
-/* { dg-final { scan-tree-dump-not {vector[^ ]* int} "vect" } } */
+/* { dg-final { scan-tree-dump-not {vector[^ ]* int vect__} "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loop" 1 "vect" } } */
@@ -37,9 +37,6 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
TEST_ALL (VEC_PERM)
/* These loops can't use SLP. */
-/* { dg-final { scan-assembler-not {\tld1b\t} } } */
-/* { dg-final { scan-assembler-not {\tld1h\t} } } */
-/* { dg-final { scan-assembler-not {\tld1w\t} } } */
/* { dg-final { scan-assembler-not {\tld1d\t} } } */
/* { dg-final { scan-assembler {\tld3b\t} } } */
/* { dg-final { scan-assembler {\tld3h\t} } } */
@@ -1073,8 +1073,12 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
}
/* If populating the vector type requires unrolling then fail
- before adjusting *max_nunits for basic-block vectorization. */
+ before adjusting *max_nunits for basic-block vectorization.
+ Allow group sizes that are indivisible by the vector length only if they
+ are known not to exceed the vector length. We may be able to support such
+ cases by generating constant masks. */
if (is_a <bb_vec_info> (vinfo)
+ && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype))
&& !multiple_p (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
{
if (dump_enabled_p ())
@@ -1126,12 +1130,29 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
tree soft_fail_nunits_vectype = NULL_TREE;
tree vectype, nunits_vectype;
+ bool unsupported_datatype = false;
if (!vect_get_vector_types_for_stmt (vinfo, first_stmt_info, &vectype,
- &nunits_vectype, group_size))
+ &nunits_vectype, &unsupported_datatype,
+ group_size))
{
- /* Fatal mismatch. */
- matches[0] = false;
- return false;
+ /* Try to get fallback vector types and continue analysis, producing
+ matches[] as if vectype was not an issue. This allows splitting of
+ groups to happen. */
+ if (unsupported_datatype
+ && vect_get_vector_types_for_stmt (vinfo, first_stmt_info, &vectype,
+ &nunits_vectype,
+ &unsupported_datatype))
+ {
+ gcc_assert (is_a<bb_vec_info> (vinfo));
+ maybe_soft_fail = true;
+ soft_fail_nunits_vectype = nunits_vectype;
+ }
+ else
+ {
+ /* Fatal mismatch. */
+ matches[0] = false;
+ return false;
+ }
}
if (is_a <bb_vec_info> (vinfo)
&& known_le (TYPE_VECTOR_SUBPARTS (vectype), 1U))
@@ -1659,16 +1680,22 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
if (maybe_soft_fail)
{
- unsigned HOST_WIDE_INT const_nunits;
- if (!TYPE_VECTOR_SUBPARTS
- (soft_fail_nunits_vectype).is_constant (&const_nunits)
- || const_nunits > group_size)
+ /* Use the known minimum number of subparts for VLA because we still need
+ to choose a splitting point although the choice is more arbitrary. */
+ unsigned HOST_WIDE_INT const_nunits = constant_lower_bound (
+ TYPE_VECTOR_SUBPARTS (soft_fail_nunits_vectype));
+
+ if (const_nunits > group_size)
matches[0] = false;
else
{
/* With constant vector elements simulate a mismatch at the
point we need to split. */
+ gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
unsigned tail = group_size & (const_nunits - 1);
+ if (tail == 0)
+ tail = const_nunits;
+ gcc_assert (group_size >= tail);
memset (&matches[group_size - tail], 0, sizeof (bool) * tail);
}
return false;
@@ -2399,13 +2426,21 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
/* Check whether we can build the invariant. If we can't
we never will be able to. */
tree type = TREE_TYPE (chains[0][n].op);
- if (!GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
- && (TREE_CODE (type) == BOOLEAN_TYPE
- || !can_duplicate_and_interleave_p (vinfo, group_size,
- type)))
+ if (!GET_MODE_SIZE (vinfo->vector_mode).is_constant ())
{
- matches[0] = false;
- goto out;
+ if (TREE_CODE (type) == BOOLEAN_TYPE)
+ {
+ matches[0] = false;
+ goto out;
+ }
+
+ if (!is_a<bb_vec_info> (vinfo)
+ && !can_duplicate_and_interleave_p (vinfo, group_size,
+ type))
+ {
+ matches[0] = false;
+ goto out;
+ }
}
}
else if (dt != vect_internal_def)
@@ -2834,7 +2869,7 @@ out:
uniform_val = NULL_TREE;
break;
}
- if (!uniform_val
+ if (!uniform_val && !is_a<bb_vec_info> (vinfo)
&& !can_duplicate_and_interleave_p (vinfo,
oprnd_info->ops.length (),
TREE_TYPE (op0)))
@@ -4910,6 +4945,53 @@ vect_analyze_slp_reductions (loop_vec_info loop_vinfo,
return true;
}
+/* Update MIN_NUNITS to reflect the minimum number of subparts for all of the
+ vector types used by the SLP subgraph rooted at NODE. VISITED is used to
+ avoid reevaluating any node in the subgraph; it thereby prevents infinite
+ recursion should a cycle be encountered. The value of MIN_NUNITS will only be
+ updated if any node in the subgraph has a vector type with a number of
+ subparts that is smaller than the passed-in value of MIN_NUNITS. Before
+ calling this function for the first time, initialize MIN_NUNITS to
+ UINT64_MAX. */
+
+static void
+vect_update_slp_min_nunits_for_node (slp_tree node, poly_uint64 &min_nunits,
+ hash_set<slp_tree> &visited)
+{
+ if (!node || SLP_TREE_DEF_TYPE (node) != vect_internal_def)
+ return;
+
+ if (visited.add (node))
+ return;
+
+ for (slp_tree child : SLP_TREE_CHILDREN (node))
+ vect_update_slp_min_nunits_for_node (child, min_nunits, visited);
+
+ tree vectype = SLP_TREE_VECTYPE (node);
+ if (!vectype)
+ return;
+
+ /* All unit counts have the form vec_info::vector_size * X for some
+ rational X, therefore we know the values are ordered. */
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ min_nunits = known_eq (min_nunits, UINT64_MAX)
+ ? nunits
+ : ordered_min (min_nunits, nunits);
+}
+
+/* For NODE, return the minimum number of subparts for all of the vector
+ types used in the given SLP graph. */
+
+static poly_uint64
+vect_slp_tree_min_nunits (slp_tree node)
+{
+ poly_uint64 min_nunits = UINT64_MAX;
+ hash_set<slp_tree> visited;
+ vect_update_slp_min_nunits_for_node (node, min_nunits, visited);
+ gcc_checking_assert (known_ne (min_nunits, UINT64_MAX));
+ return min_nunits;
+}
+
/* Analyze an SLP instance starting from a group of grouped stores. Call
vect_build_slp_tree to build a tree of packed stmts if possible.
Return FALSE if it's impossible to SLP any stmt in the group. */
@@ -4979,8 +5061,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
poly_uint64 unrolling_factor
= calculate_unrolling_factor (max_nunits, group_size);
- if (maybe_ne (unrolling_factor, 1U)
- && is_a <bb_vec_info> (vinfo))
+ if (maybe_ne (unrolling_factor, 1U) && is_a<bb_vec_info> (vinfo)
+ && !known_ge (vect_slp_tree_min_nunits (node), group_size))
{
unsigned HOST_WIDE_INT const_max_nunits;
if (!max_nunits.is_constant (&const_max_nunits)
@@ -5065,9 +5147,10 @@ vect_analyze_slp_instance (vec_info *vinfo,
= TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
1 << floor_log2 (i));
- unsigned HOST_WIDE_INT const_nunits;
- if (vectype
- && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits))
+ unsigned HOST_WIDE_INT const_nunits
+ = vectype ? constant_lower_bound (TYPE_VECTOR_SUBPARTS (vectype))
+ : 0;
+ if (const_nunits > 1 && (i % const_nunits) == 0)
{
/* Split into two groups at the first vector boundary. */
gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
@@ -11652,7 +11735,21 @@ vectorizable_slp_permutation_1 (vec_info *vinfo, gimple_stmt_iterator *gsi,
unpack_factor = 1;
}
unsigned olanes = unpack_factor * ncopies * SLP_TREE_LANES (node);
- gcc_assert (repeating_p || multiple_p (olanes, nunits));
+
+ /* With fully-predicated BB-SLP, an external node's number of lanes can be
+ incompatible with the chosen vector width (e.g., lane packs of 3 with a
+ natural 2-lane vector type). */
+ if (!repeating_p && !multiple_p (olanes, nunits))
+ {
+ if (dump_p)
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "unsupported permutation %p: vector type %T,"
+ " nunits=" HOST_WIDE_INT_PRINT_UNSIGNED
+ " ncopies=%" PRIu64 ", lanes=%u and unpack=%u\n",
+ (void *) node, vectype, estimated_poly_value (nunits),
+ ncopies, SLP_TREE_LANES (node), unpack_factor);
+ return -1;
+ }
/* Compute the { { SLP operand, vector index}, lane } permutation sequence
from the { SLP operand, scalar lane } permutation as recorded in the
@@ -1672,23 +1672,27 @@ check_load_store_for_partial_vectors (vec_info *vinfo, tree vectype,
unsigned int nvectors;
if (can_div_away_from_zero_p (size, nunits, &nvectors))
return nvectors;
- gcc_unreachable ();
+
+ gcc_assert (known_le (size, nunits));
+ return 1u;
};
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
- poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ poly_uint64 size = loop_vinfo
+ ? group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+ : SLP_TREE_LANES (slp_node);
unsigned factor;
vect_partial_vector_style partial_vector_style
= vect_get_partial_vector_style (vectype, is_load, &factor, elsvals);
if (partial_vector_style == vect_partial_vectors_len)
{
- nvectors = group_memory_nvectors (group_size * vf, nunits);
+ nvectors = group_memory_nvectors (size, nunits);
vect_record_len (vinfo, slp_node, nvectors, vectype, factor);
}
else if (partial_vector_style == vect_partial_vectors_while_ult)
{
- nvectors = group_memory_nvectors (group_size * vf, nunits);
+ nvectors = group_memory_nvectors (size, nunits);
vect_record_mask (vinfo, slp_node, nvectors, vectype, scalar_mask);
}
else
@@ -3351,12 +3355,11 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info, slp_tree node,
static tree
vect_get_loop_variant_data_ptr_increment (
- vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
+ loop_vec_info loop_vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
vec_loop_lens *loop_lens, dr_vec_info *dr_info,
vect_memory_access_type memory_access_type)
{
- loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
- tree step = vect_dr_behavior (vinfo, dr_info)->step;
+ tree step = vect_dr_behavior (loop_vinfo, dr_info)->step;
/* gather/scatter never reach here. */
gcc_assert (!mat_gather_scatter_p (memory_access_type));
@@ -3400,7 +3403,7 @@ vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
- return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
+ return vect_get_loop_variant_data_ptr_increment (loop_vinfo, aggr_type, gsi,
loop_lens, dr_info,
memory_access_type);
@@ -5215,7 +5218,7 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
call the function recursively. */
static void
-vect_create_vectorized_promotion_stmts (vec_info *vinfo,
+vect_create_vectorized_promotion_stmts (vec_info *vinfo, slp_tree slp_node,
vec<tree> *vec_oprnds0,
vec<tree> *vec_oprnds1,
stmt_vec_info stmt_info, tree vec_dest,
@@ -5228,37 +5231,39 @@ vect_create_vectorized_promotion_stmts (vec_info *vinfo,
gimple *new_stmt1, *new_stmt2;
vec<tree> vec_tmp = vNULL;
- vec_tmp.create (vec_oprnds0->length () * 2);
+ const unsigned ncopies = vect_get_num_copies (vinfo, slp_node);
+ vec_tmp.create (ncopies);
+ gcc_assert (vec_oprnds0->length () <= ncopies);
FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
{
+ if (vec_tmp.length () >= ncopies)
+ break;
+
if (op_type == binary_op)
vop1 = (*vec_oprnds1)[i];
else
vop1 = NULL_TREE;
/* Generate the two halves of promotion operation. */
- new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
- op_type, vec_dest, gsi,
- stmt_info);
- new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
- op_type, vec_dest, gsi,
- stmt_info);
- if (is_gimple_call (new_stmt1))
- {
- new_tmp1 = gimple_call_lhs (new_stmt1);
- new_tmp2 = gimple_call_lhs (new_stmt2);
- }
- else
+ new_stmt1
+ = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1, op_type,
+ vec_dest, gsi, stmt_info);
+ new_tmp1 = is_gimple_call (new_stmt1) ? gimple_call_lhs (new_stmt1)
+ : gimple_assign_lhs (new_stmt1);
+ vec_tmp.quick_push (new_tmp1);
+
+ if (vec_tmp.length () < ncopies)
{
- new_tmp1 = gimple_assign_lhs (new_stmt1);
- new_tmp2 = gimple_assign_lhs (new_stmt2);
+ new_stmt2
+ = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1, op_type,
+ vec_dest, gsi, stmt_info);
+ new_tmp2 = is_gimple_call (new_stmt2) ? gimple_call_lhs (new_stmt2)
+ : gimple_assign_lhs (new_stmt2);
+ vec_tmp.quick_push (new_tmp2);
}
-
- /* Store the results for the next step. */
- vec_tmp.quick_push (new_tmp1);
- vec_tmp.quick_push (new_tmp2);
}
+ gcc_assert (vec_tmp.length () <= ncopies);
vec_oprnds0->release ();
*vec_oprnds0 = vec_tmp;
}
@@ -5470,6 +5475,7 @@ vectorizable_conversion (vec_info *vinfo,
from the scalar type. */
if (!vectype_in)
vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
+
if (!cost_vec)
gcc_assert (vectype_in);
if (!vectype_in)
@@ -5860,12 +5866,15 @@ vectorizable_conversion (vec_info *vinfo,
stmt_info, this_dest, gsi, c1,
op_type);
else
- vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
- &vec_oprnds1, stmt_info,
- this_dest, gsi,
+ vect_create_vectorized_promotion_stmts (vinfo, slp_node,
+ &vec_oprnds0, &vec_oprnds1,
+ stmt_info, this_dest, gsi,
c1, c2, op_type);
}
+ gcc_assert (vec_oprnds0.length ()
+ == vect_get_num_copies (vinfo, slp_node));
+
FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
{
gimple *new_stmt;
@@ -5889,6 +5898,16 @@ vectorizable_conversion (vec_info *vinfo,
generate more than one vector stmt - i.e - we need to "unroll"
the vector stmt by a factor VF/nunits. */
vect_get_vec_defs (vinfo, slp_node, op0, &vec_oprnds0);
+
+ /* Promotion no longer produces redundant defs (since support was
+ added for length/mask-predicated BB SLP of awkward-sized groups),
+ therefore demotion now has to handle that case too. */
+ if (vec_oprnds0.length () % 2 != 0)
+ {
+ tree vectype = TREE_TYPE (vec_oprnds0[0]);
+ vec_oprnds0.safe_push (build_zero_cst (vectype));
+ }
+
/* Arguments are ready. Create the new vector stmts. */
if (cvt_type && modifier == NARROW_DST)
FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
@@ -10672,7 +10691,7 @@ vectorizable_load (vec_info *vinfo,
aggr_type = build_array_type_nelts (elem_type, group_size * nunits);
if (!costing_p)
- bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
+ bump = vect_get_data_ptr_increment (loop_vinfo, gsi, dr_info, aggr_type,
memory_access_type, loop_lens);
unsigned int inside_cost = 0, prologue_cost = 0;
@@ -13239,6 +13258,37 @@ vect_analyze_stmt (vec_info *vinfo,
" live stmt not supported: %G",
stmt_info->stmt);
+ if (bb_vinfo)
+ {
+ unsigned int group_size = SLP_TREE_LANES (node);
+ tree vectype = SLP_TREE_VECTYPE (node);
+ poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ bool needs_partial = maybe_lt (group_size, nunits);
+ if (needs_partial)
+ {
+ /* If partial vectors are required then they must be supported by the
+ target; however, don't assume that a partial vectors style has
+ been set because a mask or length may not be required for the
+ statement. */
+ if (!SLP_TREE_CAN_USE_PARTIAL_VECTORS_P (node))
+ return opt_result::failure_at (stmt_info->stmt,
+ "not vectorized: SLP node needs but "
+ "cannot use partial vectors: %G",
+ stmt_info->stmt);
+ }
+ else
+ {
+ /* If we don't need partial vectors then we don't care about whether
+ they are supported or not; however, we need to clear any partial
+ vectors style that might have been chosen because it will be used
+ to control generation of lengths or masks. */
+ SLP_TREE_PARTIAL_VECTORS_STYLE (node) = vect_partial_vectors_none;
+ }
+
+ if (maybe_gt (group_size, nunits))
+ gcc_assert (multiple_p (group_size, nunits));
+ }
+
return opt_result::success ();
}
@@ -13541,13 +13591,7 @@ tree
get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
unsigned int group_size)
{
- /* For BB vectorization, we should always have a group size once we've
- constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
- are tentative requests during things like early data reference
- analysis and pattern recognition. */
- if (is_a <bb_vec_info> (vinfo))
- gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
- else
+ if (!is_a <bb_vec_info> (vinfo))
group_size = 0;
tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
@@ -13561,10 +13605,18 @@ get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
vinfo->used_vector_modes.add (TYPE_MODE (vectype));
/* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
- try again with an explicit number of elements. */
- if (vectype
- && group_size
- && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
+ try again with an explicit number of elements. A vector type satisfies
+ GROUP_SIZE if it is definitely not too long to store the whole group,
+ or we are able to generate masks to handle the unknown number of excess
+ lanes that might exist. Otherwise, we must substitute a vector type that
+ can be used to carve up the group.
+ */
+ if (vectype && group_size
+ && maybe_gt (TYPE_VECTOR_SUBPARTS (vectype), group_size)
+ && (vect_get_partial_vector_style (vectype, true)
+ == vect_partial_vectors_none
+ || vect_get_partial_vector_style (vectype, false)
+ == vect_partial_vectors_none))
{
/* Start with the biggest number of units that fits within
GROUP_SIZE and halve it until we find a valid vector type.
@@ -13880,7 +13932,36 @@ vect_maybe_update_slp_op_vectype (vec_info *vinfo, slp_tree op, tree vectype)
&& SLP_TREE_DEF_TYPE (op) == vect_external_def
&& SLP_TREE_LANES (op) > 1)
return false;
- (void) vinfo; /* FORNOW */
+
+ /* When the vectorizer falls back to building vector operands from scalars,
+ it can create SLP trees with external defs that have a number of lanes not
+ divisible by the number of subparts in a vector type naively inferred from
+ the scalar type. Reject such types to avoid ICE when later computing the
+ prologue cost for invariant operands. */
+ if (SLP_TREE_DEF_TYPE (op) == vect_external_def)
+ {
+ poly_uint64 vf = 1;
+
+ if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo))
+ vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ vf *= SLP_TREE_LANES (op);
+
+ if (maybe_lt (TYPE_VECTOR_SUBPARTS (vectype), vf)
+ && !multiple_p (vf, TYPE_VECTOR_SUBPARTS (vectype)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "lanes=" HOST_WIDE_INT_PRINT_UNSIGNED
+ " is not divisible by "
+ "subparts=" HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
+ estimated_poly_value (vf),
+ estimated_poly_value (
+ TYPE_VECTOR_SUBPARTS (vectype)));
+ return false;
+ }
+ }
+
SLP_TREE_VECTYPE (op) = vectype;
return true;
}
@@ -14602,27 +14683,32 @@ vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
- Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
number of units needed to vectorize STMT_INFO, or NULL_TREE if the
- statement does not help to determine the overall number of units. */
+ statement does not help to determine the overall number of units.
+
+ - Set *UNSUPPORTED_DATATYPE to false.
+
+ On failure:
+
+ - Set *UNSUPPORTED_DATATYPE to true if the statement can't be vectorized
+ because it uses a data type that the target doesn't support in vector form
+ for a group of the given GROUP_SIZE.
+ */
opt_result
vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
tree *stmt_vectype_out,
tree *nunits_vectype_out,
+ bool *unsupported_datatype,
unsigned int group_size)
{
gimple *stmt = stmt_info->stmt;
- /* For BB vectorization, we should always have a group size once we've
- constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
- are tentative requests during things like early data reference
- analysis and pattern recognition. */
- if (is_a <bb_vec_info> (vinfo))
- gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
- else
+ if (!is_a<bb_vec_info> (vinfo))
group_size = 0;
*stmt_vectype_out = NULL_TREE;
*nunits_vectype_out = NULL_TREE;
+ *unsupported_datatype = false;
if (gimple_get_lhs (stmt) == NULL_TREE
/* Allow vector conditionals through here. */
@@ -14695,10 +14781,13 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
}
vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
if (!vectype)
- return opt_result::failure_at (stmt,
- "not vectorized:"
- " unsupported data-type %T\n",
- scalar_type);
+ {
+ *unsupported_datatype = true;
+ return opt_result::failure_at (stmt,
+ "not vectorized:"
+ " unsupported data-type %T\n",
+ scalar_type);
+ }
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
@@ -2320,6 +2320,8 @@ vect_get_num_copies (vec_info *vinfo, slp_tree node)
vf *= SLP_TREE_LANES (node);
tree vectype = SLP_TREE_VECTYPE (node);
+ if (known_ge (TYPE_VECTOR_SUBPARTS (vectype), vf))
+ return 1;
return vect_get_num_vectors (vf, vectype);
}
@@ -2587,9 +2589,9 @@ extern tree vect_gen_while (gimple_seq *, tree, tree, tree,
const char * = nullptr);
extern void vect_gen_while_ssa_name (gimple_seq *, tree, tree, tree, tree);
extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
-extern opt_result vect_get_vector_types_for_stmt (vec_info *,
- stmt_vec_info, tree *,
- tree *, unsigned int = 0);
+extern opt_result vect_get_vector_types_for_stmt (vec_info *, stmt_vec_info,
+ tree *, tree *,
+ bool *, unsigned int = 0);
extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0);
/* In tree-if-conv.cc. */
@@ -2923,9 +2925,8 @@ vect_can_use_partial_vectors_p (vec_info *vinfo, slp_tree slp_node)
loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
if (loop_vinfo)
return LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
-
- (void) slp_node; /* FORNOW */
- return false;
+ else
+ return SLP_TREE_CAN_USE_PARTIAL_VECTORS_P (slp_node);
}
/* If VINFO is vectorizer state for loop vectorization then record that we no