@@ -2718,7 +2718,7 @@ vect_determine_partial_vectors_and_peeling (loop_vec_info loop_vinfo)
static opt_result
vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
unsigned *suggested_unroll_factor,
- bool& slp_done_for_suggested_uf)
+ unsigned& slp_done_for_suggested_uf)
{
opt_result ok = opt_result::success ();
int res;
@@ -2787,11 +2787,11 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
/* If the slp decision is false when suggested unroll factor is worked
out, and we are applying suggested unroll factor, we can simply skip
all slp related analyses this time. */
- bool slp = !applying_suggested_uf || slp_done_for_suggested_uf;
+ unsigned slp = !applying_suggested_uf ? 2 : slp_done_for_suggested_uf;
/* Classify all cross-iteration scalar data-flow cycles.
Cross-iteration cycles caused by virtual phis are analyzed separately. */
- vect_analyze_scalar_cycles (loop_vinfo, slp);
+ vect_analyze_scalar_cycles (loop_vinfo, slp == 2);
vect_pattern_recog (loop_vinfo);
@@ -2854,18 +2854,23 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
vect_compute_single_scalar_iteration_cost (loop_vinfo);
poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ bool saved_can_use_partial_vectors_p
+ = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
+
+ /* This is the point where we can re-start analysis with SLP forced off. */
+start_over:
if (slp)
{
/* Check the SLP opportunities in the loop, analyze and build
SLP trees. */
- ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
+ ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo),
+ slp == 1);
if (!ok)
return ok;
/* If there are any SLP instances mark them as pure_slp. */
- slp = vect_make_slp_decision (loop_vinfo);
- if (slp)
+ if (vect_make_slp_decision (loop_vinfo))
{
/* Find stmts that need to be both vectorized and SLPed. */
vect_detect_hybrid_slp (loop_vinfo);
@@ -2881,16 +2886,10 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
}
}
- bool saved_can_use_partial_vectors_p
- = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
-
/* We don't expect to have to roll back to anything other than an empty
set of rgroups. */
gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
- /* This is the point where we can re-start analysis with SLP forced off. */
-start_over:
-
/* When we arrive here with SLP disabled and we are supposed
to use SLP for everything fail vectorization. */
if (!slp && param_vect_force_slp)
@@ -3218,15 +3217,14 @@ again:
/* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */
gcc_assert (!ok);
- /* Try again with SLP forced off but if we didn't do any SLP there is
+ /* Try again with SLP degraded but if we didn't do any SLP there is
no point in re-trying. */
if (!slp)
return ok;
- /* If the slp decision is true when suggested unroll factor is worked
- out, and we are applying suggested unroll factor, we don't need to
- re-try any more. */
- if (applying_suggested_uf && slp_done_for_suggested_uf)
+ /* If we are applying suggested unroll factor, we don't need to
+ re-try any more as we want to keep the SLP mode fixed. */
+ if (applying_suggested_uf)
return ok;
/* If there are reduction chains re-trying will fail anyway. */
@@ -3271,11 +3269,18 @@ again:
}
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "re-trying with SLP disabled\n");
+ {
+ if (slp)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "re-trying with single-lane SLP\n");
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "re-trying with SLP disabled\n");
+ }
- /* Roll back state appropriately. No SLP this time. */
- slp = false;
+ /* Roll back state appropriately. Degrade SLP this time. From multi-
+ to single-lane to disabled. */
+ --slp;
/* Restore vectorization factor as it were without SLP. */
LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
/* Free the SLP instances. */
@@ -3420,7 +3425,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
machine_mode vector_mode = vector_modes[mode_i];
loop_vinfo->vector_mode = vector_mode;
unsigned int suggested_unroll_factor = 1;
- bool slp_done_for_suggested_uf = false;
+ unsigned slp_done_for_suggested_uf = 0;
/* Run the main analysis. */
opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
@@ -3488,7 +3488,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
scalar_stmts_to_slp_tree_map_t *bst_map,
stmt_vec_info stmt_info, slp_instance_kind kind,
unsigned max_tree_size, unsigned *limit,
- bool force_single_lane = false);
+ bool force_single_lane);
/* Build an interleaving scheme for the store sources RHS_NODES from
SCALAR_STMTS. */
@@ -3684,7 +3684,7 @@ vect_build_slp_instance (vec_info *vinfo,
scalar_stmts_to_slp_tree_map_t *bst_map,
/* ??? We need stmt_info for group splitting. */
stmt_vec_info stmt_info_,
- bool force_single_lane = false)
+ bool force_single_lane)
{
/* If there's no budget left bail out early. */
if (*limit == 0)
@@ -3891,7 +3891,7 @@ vect_build_slp_instance (vec_info *vinfo,
group1_size);
bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
kind, max_tree_size,
- limit);
+ limit, false);
/* Split the rest at the failure point and possibly
re-analyze the remaining matching part if it has
at least two lanes. */
@@ -3904,14 +3904,14 @@ vect_build_slp_instance (vec_info *vinfo,
if (i - group1_size > 1)
res |= vect_analyze_slp_instance (vinfo, bst_map, rest2,
kind, max_tree_size,
- limit);
+ limit, false);
}
/* Re-analyze the non-matching tail if it has at least
two lanes. */
if (i + 1 < group_size)
res |= vect_analyze_slp_instance (vinfo, bst_map,
rest, kind, max_tree_size,
- limit);
+ limit, false);
return res;
}
}
@@ -4544,7 +4544,8 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo,
trees of packed scalar stmts if SLP is possible. */
opt_result
-vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
+vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size,
+ bool force_single_lane)
{
loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
unsigned int i;
@@ -4561,7 +4562,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
/* Find SLP sequences starting from groups of grouped stores. */
FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
vect_analyze_slp_instance (vinfo, bst_map, first_element,
- slp_inst_kind_store, max_tree_size, &limit);
+ slp_inst_kind_store, max_tree_size, &limit,
+ force_single_lane);
/* For loops also start SLP discovery from non-grouped stores. */
if (loop_vinfo)
@@ -4581,7 +4583,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
stmts.quick_push (stmt_info);
vect_build_slp_instance (vinfo, slp_inst_kind_store,
stmts, roots, remain, max_tree_size,
- &limit, bst_map, NULL);
+ &limit, bst_map, NULL, force_single_lane);
}
}
@@ -4598,7 +4600,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
bb_vinfo->roots[i].stmts,
bb_vinfo->roots[i].roots,
bb_vinfo->roots[i].remain,
- max_tree_size, &limit, bst_map, NULL))
+ max_tree_size, &limit, bst_map, NULL,
+ false))
{
bb_vinfo->roots[i].stmts = vNULL;
bb_vinfo->roots[i].roots = vNULL;
@@ -4614,9 +4617,11 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
if (! STMT_VINFO_RELEVANT_P (first_element)
&& ! STMT_VINFO_LIVE_P (first_element))
;
- else if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
- slp_inst_kind_reduc_chain,
- max_tree_size, &limit))
+ else if (force_single_lane
+ || ! vect_analyze_slp_instance (vinfo, bst_map, first_element,
+ slp_inst_kind_reduc_chain,
+ max_tree_size, &limit,
+ force_single_lane))
{
/* Dissolve reduction chain group. */
stmt_vec_info vinfo = first_element;
@@ -4656,7 +4661,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
{
/* Do not discover SLP reductions combining lane-reducing
ops, that will fail later. */
- if (!lane_reducing_stmt_p (STMT_VINFO_STMT (next_info)))
+ if (!force_single_lane
+ && !lane_reducing_stmt_p (STMT_VINFO_STMT (next_info)))
scalar_stmts.quick_push (next_info);
else
{
@@ -4670,7 +4676,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
slp_inst_kind_reduc_group,
stmts, roots, remain,
max_tree_size, &limit,
- bst_map, NULL);
+ bst_map, NULL,
+ force_single_lane);
}
}
}
@@ -4683,7 +4690,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
slp_inst_kind_reduc_group,
scalar_stmts, roots, remain,
max_tree_size, &limit, bst_map,
- NULL))
+ NULL, force_single_lane))
{
if (scalar_stmts.length () <= 1)
scalar_stmts.release ();
@@ -4699,7 +4706,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
slp_inst_kind_reduc_group,
stmts, roots, remain,
max_tree_size, &limit,
- bst_map, NULL);
+ bst_map, NULL, force_single_lane);
}
saved_stmts.release ();
}
@@ -4731,7 +4738,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
slp_inst_kind_reduc_group,
stmts, roots, remain,
max_tree_size, &limit,
- bst_map, NULL);
+ bst_map, NULL, force_single_lane);
}
}
}
@@ -8934,7 +8941,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
/* Check the SLP opportunities in the basic block, analyze and build SLP
trees. */
- if (!vect_analyze_slp (bb_vinfo, n_stmts))
+ if (!vect_analyze_slp (bb_vinfo, n_stmts, false))
{
if (dump_enabled_p ())
{
@@ -2538,7 +2538,7 @@ extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree>
unsigned * = nullptr, bool = false);
extern bool vect_slp_analyze_operations (vec_info *);
extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &);
-extern opt_result vect_analyze_slp (vec_info *, unsigned);
+extern opt_result vect_analyze_slp (vec_info *, unsigned, bool);
extern bool vect_make_slp_decision (loop_vec_info);
extern void vect_detect_hybrid_slp (loop_vec_info);
extern void vect_optimize_slp (vec_info *);