tree-optimization/124743 - SLP scheduling of invariant internal ops
Commit Message
The following makes us avoid scheduling invariant internal operations
to random points in the CFG when doing loop vectorization since
in that case the stmt UIDs we use for dominance checks are not
initialized outside of loop bodies. Instead schedule such
operations by appending to the loop preheader which is where
invariants generally end up for loop vectorization.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
PR tree-optimization/124743
* tree-vect-slp.cc (vect_schedule_slp_node): Schedule
loop invariant operations in the loop preheader.
* gcc.dg/vect/vect-pr124743.c: New testcase.
---
gcc/testsuite/gcc.dg/vect/vect-pr124743.c | 12 ++++++
gcc/tree-vect-slp.cc | 49 ++++++++++++++---------
2 files changed, 43 insertions(+), 18 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-pr124743.c
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 -fno-tree-loop-distribute-patterns -fno-tree-loop-distribution -fno-tree-ccp -fno-tree-copy-prop -fno-tree-dse" } */
+
+int a, b, c, d;
+int e(int f, int g) { return g < 0 || g > 1 ? 0 : f >> g; }
+int h(int i) { return a > 1 ? 0 : i << a; }
+int main()
+{
+ for (; b; b++)
+ c = e(h(1), d);
+ return 0;
+}
@@ -12081,25 +12081,38 @@ vect_schedule_slp_node (vec_info *vinfo,
si = gsi_for_stmt (last_stmt);
gsi_next (&si);
- /* Avoid scheduling internal defs outside of the loop when
- we might have only implicitly tracked loop mask/len defs. */
if (auto loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
- if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
- || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
- {
- gimple_stmt_iterator si2
- = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header);
- if ((gsi_end_p (si2)
- && (LOOP_VINFO_LOOP (loop_vinfo)->header
- != gimple_bb (last_stmt))
- && dominated_by_p (CDI_DOMINATORS,
- LOOP_VINFO_LOOP (loop_vinfo)->header,
- gimple_bb (last_stmt)))
- || (!gsi_end_p (si2)
- && last_stmt != *si2
- && vect_stmt_dominates_stmt_p (last_stmt, *si2)))
- si = si2;
- }
+ {
+ /* Avoid scheduling stmts to random places in the CFG, any
+ stmt dominance check we performed is possibly wrong as UIDs
+ are not initialized for all of the function for loop
+ vectorization. Instead append to the loop preheader. */
+ if ((LOOP_VINFO_LOOP (loop_vinfo)->header
+ != gimple_bb (last_stmt))
+ && dominated_by_p (CDI_DOMINATORS,
+ LOOP_VINFO_LOOP (loop_vinfo)->header,
+ gimple_bb (last_stmt)))
+ si = gsi_end_bb (loop_preheader_edge
+ (LOOP_VINFO_LOOP (loop_vinfo))->src);
+ /* Avoid scheduling internal defs outside of the loop when
+ we might have only implicitly tracked loop mask/len defs. */
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ {
+ gimple_stmt_iterator si2
+ = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header);
+ if ((gsi_end_p (si2)
+ && (LOOP_VINFO_LOOP (loop_vinfo)->header
+ != gimple_bb (last_stmt))
+ && dominated_by_p (CDI_DOMINATORS,
+ LOOP_VINFO_LOOP (loop_vinfo)->header,
+ gimple_bb (last_stmt)))
+ || (!gsi_end_p (si2)
+ && last_stmt != *si2
+ && vect_stmt_dominates_stmt_p (last_stmt, *si2)))
+ si = si2;
+ }
+ }
}
}