diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 4ab23b0ab33..e08b94c0447 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5454,17 +5454,23 @@ rs6000_update_target_cost_per_stmt (rs6000_cost_data *data,
{
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
unsigned int nunits = vect_nunits_for_cost (vectype);
- unsigned int extra_cost = nunits * stmt_cost;
- /* As function rs6000_builtin_vectorization_cost shows, we have
- priced much on V16QI/V8HI vector construction as their units,
- if we penalize them with nunits * stmt_cost, it can result in
- an unreliable body cost, eg: for V16QI on Power8, stmt_cost
- is 20 and nunits is 16, the extra cost is 320 which looks
- much exaggerated. So let's use one maximum bound for the
- extra penalized cost for vector construction here. */
- const unsigned int MAX_PENALIZED_COST_FOR_CTOR = 12;
- if (extra_cost > MAX_PENALIZED_COST_FOR_CTOR)
- extra_cost = MAX_PENALIZED_COST_FOR_CTOR;
+ /* As function rs6000_builtin_vectorization_cost shows, we
+ have priced much on V16QI/V8HI vector construction by
+ considering their units, if we penalize them with nunits
+ * stmt_cost here, it can result in an unreliable body cost,
+ eg: for V16QI on Power8, stmt_cost is 20 and nunits is 16,
+ the penalty will be 320 which looks much exaggerated. But
+ there are actually nunits scalar loads, so we try to adopt
+ one reasonable penalized cost for each load rather than
+ stmt_cost. Here, with stmt_cost dividing by log2(nunits)^2,
+ we can still retain the necessary penalty for small nunits
+ meanwhile stabilize the penalty for big nunits. */
+ int nunits_log2 = exact_log2 (nunits);
+ gcc_assert (nunits_log2 > 0);
+ unsigned int nunits_sq = nunits_log2 * nunits_log2;
+ unsigned int adjusted_cost = stmt_cost / nunits_sq;
+ gcc_assert (adjusted_cost > 0);
+ unsigned int extra_cost = nunits * adjusted_cost;
data->extra_ctor_cost += extra_cost;
}
}