tree-optimization/102318 - reduction epilogue re-use
Commit Message
This refines the fix for PR102226 to do the mode conversion
from V2DI to VNx2DI separately from the sign-conversion, retaining
the signedness of the saved accumulator as before the original fix.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
2021-09-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/102318
* tree-vect-loop.c (vect_transform_cycle_phi): Revert
previous change and do the mode conversion separately from
the sign conversion.
* gcc.dg/vect/pr102318.c: New testcase.
---
gcc/testsuite/gcc.dg/vect/pr102318.c | 21 +++++++++++++++++++++
gcc/tree-vect-loop.c | 13 +++++++++++--
2 files changed, 32 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr102318.c
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+
+void
+vec_slp_int16_t (short int *restrict a, short int *restrict b, int n)
+{
+ short int x0 = b[0];
+ short int x1 = b[1];
+ short int x2 = b[2];
+ short int x3 = b[3];
+ for (int i = 0; i < n; ++i)
+ {
+ x0 += a[i * 4];
+ x1 += a[i * 4 + 1];
+ x2 += a[i * 4 + 2];
+ x3 += a[i * 4 + 3];
+ }
+ b[0] = x0;
+ b[1] = x1;
+ b[2] = x2;
+ b[3] = x3;
+}
@@ -7755,11 +7755,20 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
(reduc_info),
&stmts);
}
- if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
- def = gimple_convert (&stmts, vectype_out, def);
+ /* The epilogue loop might use a different vector mode, like
+ VNx2DI vs. V2DI. */
+ if (TYPE_MODE (vectype_out) != TYPE_MODE (TREE_TYPE (def)))
+ {
+ tree reduc_type = build_vector_type_for_mode
+ (TREE_TYPE (TREE_TYPE (def)), TYPE_MODE (vectype_out));
+ def = gimple_convert (&stmts, reduc_type, def);
+ }
/* Adjust the input so we pick up the partially reduced value
for the skip edge in vect_create_epilog_for_reduction. */
accumulator->reduc_input = def;
+ /* And the reduction could be carried out using a different sign. */
+ if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
+ def = gimple_convert (&stmts, vectype_out, def);
if (loop_vinfo->main_loop_edge)
{
/* While we'd like to insert on the edge this will split