tree-optimization/114081 - dominator update for prologue peeling

Message ID 20240226144014.9D4683858C62@sourceware.org
State Committed
Commit 8a5d9409584aeb777b06f9c19c7d1a3552d496ad
Headers
Series tree-optimization/114081 - dominator update for prologue peeling |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Testing passed

Commit Message

Richard Biener Feb. 26, 2024, 2:39 p.m. UTC
  The following implements manual update for multi-exit loop prologue
peeling during vectorization.

Boostrap / regtest running on x86_64-unknown-linux-gnu.

I think the amount of coverage for prologue peeling with early exits
is very low, so my testing success might not mean much.

Richard.

	PR tree-optimization/114081
	* tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
	Perform manual dominator update for prologue peeling.
	(vect_do_peeling): Properly update dominators after adding the
	prologue-around guard.

	* gcc.dg/vect/vect-early-break_121-pr114081.c: New testcase.
---
 .../vect/vect-early-break_121-pr114081.c      | 39 ++++++++++
 gcc/tree-vect-loop-manip.cc                   | 78 +++++++++++++------
 2 files changed, 95 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
  

Patch

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
new file mode 100644
index 00000000000..423ff0b566b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
@@ -0,0 +1,39 @@ 
+/* { dg-do compile } */
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-O3" } */
+/* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+typedef struct filter_list_entry {
+  const char *name;
+  int id;
+  void (*function)();
+} filter_list_entry;
+
+static const filter_list_entry filter_list[9] = {0};
+
+void php_zval_filter(int filter, int id1) {
+  filter_list_entry filter_func;
+
+  int size = 9;
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == filter) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+
+#pragma GCC novector
+  for (int i = 0; i < size; ++i) {
+    if (filter_list[i].id == 0x0204) {
+      filter_func = filter_list[i];
+      goto done;
+    }
+  }
+done:
+  if (!filter_func.id)
+    filter_func.function();
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 137b053ac35..f72da915103 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -1594,7 +1594,6 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
   auto loop_exits = get_loop_exit_edges (loop);
   bool multiple_exits_p = loop_exits.length () > 1;
   auto_vec<basic_block> doms;
-  class loop *update_loop = NULL;
 
   if (at_exit) /* Add the loop copy at exit.  */
     {
@@ -1856,11 +1855,33 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
 	 correct.  */
       if (multiple_exits_p)
 	{
-	  update_loop = new_loop;
+	  class loop *update_loop = new_loop;
 	  doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header);
 	  for (unsigned i = 0; i < doms.length (); ++i)
 	    if (flow_bb_inside_loop_p (loop, doms[i]))
 	      doms.unordered_remove (i);
+
+	  for (edge e : get_loop_exit_edges (update_loop))
+	    {
+	      edge ex;
+	      edge_iterator ei;
+	      FOR_EACH_EDGE (ex, ei, e->dest->succs)
+		{
+		  /* Find the first non-fallthrough block as fall-throughs can't
+		     dominate other blocks.  */
+		  if (single_succ_p (ex->dest))
+		    {
+		      doms.safe_push (ex->dest);
+		      ex = single_succ_edge (ex->dest);
+		    }
+		  doms.safe_push (ex->dest);
+		}
+	      doms.safe_push (e->dest);
+	    }
+
+	  iterate_fix_dominators (CDI_DOMINATORS, doms, false);
+	  if (updated_doms)
+	    updated_doms->safe_splice (doms);
 	}
     }
   else /* Add the copy at entry.  */
@@ -1910,33 +1931,28 @@  slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit,
       set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
 			       loop_preheader_edge (new_loop)->src);
 
+      /* Update dominators for multiple exits.  */
       if (multiple_exits_p)
-	update_loop = loop;
-    }
-
-  if (multiple_exits_p)
-    {
-      for (edge e : get_loop_exit_edges (update_loop))
 	{
-	  edge ex;
-	  edge_iterator ei;
-	  FOR_EACH_EDGE (ex, ei, e->dest->succs)
+	  for (edge alt_e : loop_exits)
 	    {
-	      /* Find the first non-fallthrough block as fall-throughs can't
-		 dominate other blocks.  */
-	      if (single_succ_p (ex->dest))
+	      if (alt_e == loop_exit)
+		continue;
+	      basic_block old_dom
+		= get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
+	      if (flow_bb_inside_loop_p (loop, old_dom))
 		{
-		  doms.safe_push (ex->dest);
-		  ex = single_succ_edge (ex->dest);
+		  auto_vec<basic_block, 8> queue;
+		  for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
+		       son; son = next_dom_son (CDI_DOMINATORS, son))
+		    if (!flow_bb_inside_loop_p (loop, son))
+		      queue.safe_push (son);
+		  for (auto son : queue)
+		    set_immediate_dominator (CDI_DOMINATORS,
+					     son, get_bb_copy (old_dom));
 		}
-	      doms.safe_push (ex->dest);
 	    }
-	  doms.safe_push (e->dest);
 	}
-
-      iterate_fix_dominators (CDI_DOMINATORS, doms, false);
-      if (updated_doms)
-	updated_doms->safe_splice (doms);
     }
 
   free (new_bbs);
@@ -3368,6 +3384,24 @@  vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 					   guard_to, guard_bb,
 					   prob_prolog.invert (),
 					   irred_flag);
+	  for (edge alt_e : get_loop_exit_edges (prolog))
+	    {
+	      if (alt_e == prolog_e)
+		continue;
+	      basic_block old_dom
+		= get_immediate_dominator (CDI_DOMINATORS, alt_e->dest);
+	      if (flow_bb_inside_loop_p (prolog, old_dom))
+		{
+		  auto_vec<basic_block, 8> queue;
+		  for (auto son = first_dom_son (CDI_DOMINATORS, old_dom);
+		       son; son = next_dom_son (CDI_DOMINATORS, son))
+		    if (!flow_bb_inside_loop_p (prolog, son))
+		      queue.safe_push (son);
+		  for (auto son : queue)
+		    set_immediate_dominator (CDI_DOMINATORS, son, guard_bb);
+		}
+	    }
+
 	  e = EDGE_PRED (guard_to, 0);
 	  e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
 	  slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);