tree-optimization/116842 - vectorizer load hosting breaks UID order

Message ID 20240928125718.7404B13A6E@imap1.dmz-prg2.suse.org
State Committed
Commit 71bf3daa8dabe45aa14e7315195a70ad0d883337
Headers
Series tree-optimization/116842 - vectorizer load hosting breaks UID order |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed

Commit Message

Richard Biener Sept. 28, 2024, 12:57 p.m. UTC
  The following fixes the case when vectorizing a load hoists an invariant
load and dependent stmts, thereby breaking UID order of said stmts.
While we duplicate the load we just move the dependences.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

	PR tree-optimization/116842
	* tree-vect-stmts.cc (hoist_defs_of_uses): Sort stmts to hoist
	after UID to avoid breaking vect_stmt_dominates_stmt_p.

	* g++.dg/torture/pr116842.C: New testcase.
---
 gcc/testsuite/g++.dg/torture/pr116842.C | 16 +++++++++++
 gcc/tree-vect-stmts.cc                  | 36 ++++++++++++++++---------
 2 files changed, 40 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr116842.C
  

Patch

diff --git a/gcc/testsuite/g++.dg/torture/pr116842.C b/gcc/testsuite/g++.dg/torture/pr116842.C
new file mode 100644
index 00000000000..39821390046
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr116842.C
@@ -0,0 +1,16 @@ 
+// { dg-do compile }
+
+short a, b, c;
+unsigned d(unsigned, int e) { return e; }
+void f(bool g, short e[][3][3][3][3], unsigned h[][3][3], char i[][8],
+       short j[][18][18][18], short k[][18][18][18], short l[][8][8][8][8])
+{
+  for (char m;;)
+    {
+      for (short n = 0; n < 8; n += 5)
+	a = j[m][6][2][m];
+      for (short o(l[m][m][m][m][m] / i[m][m] ?: e[m][m][4][m][2]); o; o = g)
+	for (char p; p < (c && i[g]) + 7; p += 2)
+	  b = d(h[6][g][2], k[m][5][g][2] != m);
+    }
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 0e75e3b4956..540a9b73308 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9788,6 +9788,20 @@  permute_vec_elements (vec_info *vinfo,
   return data_ref;
 }
 
+/* Comparator for qsort, sorting after GIMPLE UID.  */
+
+static int
+sort_after_uid (const void *a_, const void *b_)
+{
+  const gimple *a = *(const gimple * const *)a_;
+  const gimple *b = *(const gimple * const *)b_;
+  if (gimple_uid (a) < gimple_uid (b))
+    return -1;
+  else if (gimple_uid (a) > gimple_uid (b))
+    return 1;
+  return 0;
+}
+
 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
    inserting them on the loops preheader edge.  Returns true if we
    were successful in doing so (and thus STMT_INFO can be moved then),
@@ -9799,7 +9813,7 @@  hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
 {
   ssa_op_iter i;
   tree op;
-  bool any = false;
+  auto_vec<gimple *, 8> to_hoist;
 
   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
     {
@@ -9822,26 +9836,24 @@  hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
 		  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
 		return false;
 	    }
-	  any = true;
+	  to_hoist.safe_push (def_stmt);
 	}
     }
 
-  if (!any)
+  if (to_hoist.is_empty ())
     return true;
 
   if (!hoist_p)
     return true;
 
-  FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
+  /* Preserve UID order, otherwise we break dominance checks.  */
+  to_hoist.qsort (sort_after_uid);
+
+  for (gimple *def_stmt : to_hoist)
     {
-      gimple *def_stmt = SSA_NAME_DEF_STMT (op);
-      if (!gimple_nop_p (def_stmt)
-	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
-	{
-	  gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
-	  gsi_remove (&gsi, false);
-	  gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
-	}
+      gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
+      gsi_remove (&gsi, false);
+      gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
     }
 
   return true;