@@ -53,4 +53,4 @@ main (void)
}
/* { dg-final { scan-tree-dump "optimizing condition reduction with FOLD_EXTRACT_LAST" "vect" { target vect_fold_extract_last } } } */
-/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target aarch64*-*-* } } } */
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { aarch64*-*-* } || { vect_multiple_sizes } } } } */
new file mode 100644
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=znver4 -fdump-tree-vect" } */
+
+double x[1024];
+char y[1024];
+void foo ()
+{
+ for (int i = 0 ; i < 16; ++i)
+ {
+ x[i] = i;
+ y[i] = i;
+ }
+}
+
+/* We expect to see AVX512 vectors for x[] and a SSE vector for y[]. */
+/* { dg-final { scan-tree-dump-times "MEM <vector\\\(8\\\) double>" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "MEM <vector\\\(16\\\) char>" 1 "vect" } } */
@@ -4716,18 +4716,19 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
vf = TYPE_VECTOR_SUBPARTS (vectype);
*min_vf = upper_bound (*min_vf, vf);
- /* Leave the BB vectorizer to pick the vector type later, based on
- the final dataref group size and SLP node size. */
- if (is_a <loop_vec_info> (vinfo))
- STMT_VINFO_VECTYPE (stmt_info) = vectype;
-
if (gatherscatter != SG_NONE)
{
+ /* ??? We should perform a coarser check here, or none at all.
+ We're checking this again later, in particular during
+ relevancy analysis where we hook on the discovered offset
+ operand. */
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
gather_scatter_info gs_info;
if (!vect_check_gather_scatter (stmt_info,
as_a <loop_vec_info> (vinfo),
&gs_info))
{
+ STMT_VINFO_VECTYPE (stmt_info) = NULL_TREE;
if (fatal)
*fatal = false;
return opt_result::failure_at
@@ -189,22 +189,19 @@ vect_determine_vf_for_stmt_1 (vec_info *vinfo, stmt_vec_info stmt_info,
if (!res)
return res;
- if (stmt_vectype)
+ if (nunits_vectype)
{
- if (STMT_VINFO_VECTYPE (stmt_info))
- /* The only case when a vectype had been already set is for stmts
- that contain a data ref, or for "pattern-stmts" (stmts generated
- by the vectorizer to represent/replace a certain idiom). */
- gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)
- || vectype_maybe_set_p)
- && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype);
- else
- STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
+ poly_uint64 saved_vf = *vf;
+ vect_update_max_nunits (vf, nunits_vectype);
+ if (maybe_ne (*vf, saved_vf) && dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location, "updated "
+ "vectorization factor to ");
+ dump_dec (MSG_NOTE, *vf);
+ dump_printf (MSG_NOTE, "\n");
+ }
}
- if (nunits_vectype)
- vect_update_max_nunits (vf, nunits_vectype);
-
return opt_result::success ();
}
@@ -330,20 +327,17 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo,
"not vectorized: unsupported "
"data-type %T\n",
scalar_type);
- STMT_VINFO_VECTYPE (stmt_info) = vectype;
-
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
- vectype);
- if (dump_enabled_p ())
+ poly_uint64 saved_vectorization_factor = vectorization_factor;
+ vect_update_max_nunits (&vectorization_factor, vectype);
+ if (maybe_ne (vectorization_factor, saved_vectorization_factor)
+ && dump_enabled_p ())
{
- dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
- dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
+ dump_printf_loc (MSG_NOTE, vect_location, "updated "
+ "vectorization factor to ");
+ dump_dec (MSG_NOTE, vectorization_factor);
dump_printf (MSG_NOTE, "\n");
}
-
- vect_update_max_nunits (&vectorization_factor, vectype);
}
}
@@ -2864,6 +2858,114 @@ start_over:
gcc_assert (known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 0U));
loop_vinfo->vectorization_factor = vectorization_factor;
+ /* At this point we have the vectorization factor that should determine
+ the vector types to use decided. The unrolling factor should not
+ influence that since otherwise we'd eventually use larger vectors
+ rather than doing actual effective unrolling.
+
+ Note that with re-starting without SLP we actually will have the
+ original loop VF so we're off here - but then non-SLP should go
+ away ... */
+ /* Check that nothing set STMT_VINFO_VECTYPE so nothing could have
+ relied on it. ??? Same for SLP. ??? That also catches pattern
+ stmts which might be more difficult to "fix". */
+ for (stmt_vec_info stmt_info : loop_vinfo->stmt_vec_infos)
+ {
+ if (!stmt_info
+ || gimple_clobber_p (stmt_info->stmt))
+ continue;
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_LIVE_P (stmt_info))
+ continue;
+
+ if (STMT_VINFO_VECTYPE (stmt_info))
+ {
+ /* Pattern stmts and gather/scatter may have a precomputed
+ vector type. */
+ gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info)
+ || STMT_VINFO_GATHER_SCATTER_P (stmt_info));
+ continue;
+ }
+
+ /* ??? This is still a coarse vector type decision. Multiple
+ up/down passes over use-def chains should be used to set
+ vector types from within vectorizable_* itself, in a new
+ special mode. Possibly identifying the responsible worker early.
+ Not worth spending much time on this in the non-SLP path. */
+ tree stmt_vectype, nunits_vectype;
+ opt_result res
+ = vect_get_vector_types_for_stmt (loop_vinfo, stmt_info, &stmt_vectype,
+ &nunits_vectype);
+ gcc_assert (res);
+ if (!stmt_vectype)
+ /* OMP SIMD calls without LHS. */
+ continue;
+
+ tree scalar_type = NULL_TREE;
+ if (vect_use_mask_type_p (stmt_info))
+ {
+ if (is_a <gphi *> (stmt_info->stmt))
+ {
+ /* Only with BB vectorization or as PHI in a nested cycle. */
+ gcc_assert (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
+ gimple_bb (stmt_info->stmt)));
+ /* ??? vectorizable_* should set the vector type. */
+ continue;
+ }
+ else
+ {
+ tree_code code = gimple_assign_rhs_code (stmt_info->stmt);
+ if (is_gimple_assign (stmt_info->stmt)
+ && TREE_CODE_CLASS (code) == tcc_comparison)
+ scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
+ else
+ /* ??? vectorizable_* should set the vector type. */
+ continue;
+ }
+ }
+ else
+ scalar_type = TREE_TYPE (stmt_vectype);
+
+ /* Try to use a larger vector type when the above one has less lanes
+ than the chosen VF, up to the one recommended by the perferred vector
+ mode hook. This keeps ncopies down, generating more efficient code
+ and in some cases enables vectorizing in the first place. */
+ tree preferred_vectype = get_related_vectype_for_scalar_type (VOIDmode,
+ scalar_type,
+ 0);
+ if (known_lt (TYPE_VECTOR_SUBPARTS (stmt_vectype),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo))
+ && known_lt (TYPE_VECTOR_SUBPARTS (stmt_vectype),
+ TYPE_VECTOR_SUBPARTS (preferred_vectype))
+ && ordered_p (TYPE_VECTOR_SUBPARTS (preferred_vectype),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+ {
+ /* ??? Could try all nunits between stmt_vectype and MIN. */
+ poly_uint64 nunits
+ = ordered_min (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+ TYPE_VECTOR_SUBPARTS (preferred_vectype));
+ tree cand = get_related_vectype_for_scalar_type
+ (TYPE_MODE (preferred_vectype), scalar_type, nunits);
+ if (cand)
+ {
+ if (VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
+ cand = truth_type_for (cand);
+ stmt_vectype = cand;
+ }
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "==> examining statement: %G", stmt_info->stmt);
+ dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
+ stmt_vectype);
+ }
+
+ STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
+ }
+
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
@@ -6490,7 +6490,6 @@ vectorizable_operation (vec_info *vinfo,
int ndts = 3;
poly_uint64 nunits_in;
poly_uint64 nunits_out;
- tree vectype_out;
int ncopies, vec_num;
int i;
vec<tree> vec_oprnds0 = vNULL;
@@ -6550,25 +6549,6 @@ vectorizable_operation (vec_info *vinfo,
return false;
}
- scalar_dest = gimple_assign_lhs (stmt);
- vectype_out = STMT_VINFO_VECTYPE (stmt_info);
-
- /* Most operations cannot handle bit-precision types without extra
- truncations. */
- bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
- if (!mask_op_p
- && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
- /* Exception are bitwise binary operations. */
- && code != BIT_IOR_EXPR
- && code != BIT_XOR_EXPR
- && code != BIT_AND_EXPR)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "bit-precision arithmetic not supported.\n");
- return false;
- }
-
slp_tree slp_op0;
if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
0, &op0, &slp_op0, &dt[0], &vectype))
@@ -6580,47 +6560,6 @@ vectorizable_operation (vec_info *vinfo,
}
bool is_invariant = (dt[0] == vect_external_def
|| dt[0] == vect_constant_def);
- /* If op0 is an external or constant def, infer the vector type
- from the scalar type. */
- if (!vectype)
- {
- /* For boolean type we cannot determine vectype by
- invariant value (don't know whether it is a vector
- of booleans or vector of integers). We use output
- vectype because operations on boolean don't change
- type. */
- if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
- {
- if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not supported operation on bool value.\n");
- return false;
- }
- vectype = vectype_out;
- }
- else
- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
- slp_node);
- }
- if (vec_stmt)
- gcc_assert (vectype);
- if (!vectype)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "no vectype for scalar type %T\n",
- TREE_TYPE (op0));
-
- return false;
- }
-
- nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
- if (maybe_ne (nunits_out, nunits_in))
- return false;
-
tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
slp_tree slp_op1 = NULL, slp_op2 = NULL;
if (op_type == binary_op || op_type == ternary_op)
@@ -6635,9 +6574,8 @@ vectorizable_operation (vec_info *vinfo,
}
is_invariant &= (dt[1] == vect_external_def
|| dt[1] == vect_constant_def);
- if (vectype2
- && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
- return false;
+ if (!vectype)
+ vectype = vectype2;
}
if (op_type == ternary_op)
{
@@ -6651,9 +6589,52 @@ vectorizable_operation (vec_info *vinfo,
}
is_invariant &= (dt[2] == vect_external_def
|| dt[2] == vect_constant_def);
- if (vectype3
- && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
- return false;
+ if (!vectype)
+ vectype = vectype3;
+ }
+
+ if (!vectype)
+ vectype = STMT_VINFO_VECTYPE (stmt_info);
+ if (!vectype)
+ {
+ /* We want to pre-assign sth here. */
+ gcc_assert (!vec_stmt
+ && is_invariant
+ && !vect_use_mask_type_p (stmt_info));
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
+ }
+
+ tree vectype_out = vectype;
+ nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+ nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
+ if (maybe_ne (nunits_out, nunits_in))
+ return false;
+ /* ??? Isn't the constraint the types are the same apart from
+ signednes (ABSU_EXPR for example)? The rest suggests this as
+ we are using 'vectype' for constants/invariants. */
+ if (vectype2
+ && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
+ return false;
+ if (vectype3
+ && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
+ return false;
+
+ scalar_dest = gimple_assign_lhs (stmt);
+
+ /* Most operations cannot handle bit-precision types without extra
+ truncations. */
+ bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
+ if (!mask_op_p
+ && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
+ /* Exception are bitwise binary operations. */
+ && code != BIT_IOR_EXPR
+ && code != BIT_XOR_EXPR
+ && code != BIT_AND_EXPR)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "bit-precision arithmetic not supported.\n");
+ return false;
}
/* Multiple types in SLP are handled by creating the appropriate number of
@@ -6788,6 +6769,8 @@ vectorizable_operation (vec_info *vinfo,
return false;
}
+ if (!STMT_VINFO_VECTYPE (stmt_info))
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
DUMP_VECT_SCOPE ("vectorizable_operation");
vect_model_simple_cost (vinfo, stmt_info,
@@ -12890,7 +12873,9 @@ vect_analyze_stmt (vec_info *vinfo,
{
gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
- || (call && gimple_call_lhs (call) == NULL_TREE));
+ || (call && gimple_call_lhs (call) == NULL_TREE)
+ /* ??? Inconsistently so. */
+ || vect_use_mask_type_p (stmt_info));
*need_to_vectorize = true;
}