unswitching of outer loops

Message ID 20221107142620.72D1F13AC7@imap2.suse-dmz.suse.de
State New
Headers
Series unswitching of outer loops |

Commit Message

Richard Biener Nov. 7, 2022, 2:26 p.m. UTC
  This allows loop unswitching to unswitch outer loops conditions are
invariant in.  We restrict ourselves to unswitch conditions in innermost
loops and will only unswitch loop nests that do not contain any sibling loops.
To simplify the implementation the loop nest unswitched is the deepest all
unswitching candidates are invariant in.

For 507.cactuBSSN_r it can be observed we unswitch the outer loops
of the compute kernels for the fdOrder parameter.  It seems to be within
the existing growth limitations to perform the unswitchings, a performance
benefit is not seen.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

If there are no comments I plan to push this later this week.

Richard.

	* tree-ssa-loop-unswitch.cc (init_loop_unswitch_info): First collect
	candidates and determine the outermost loop to unswitch.
	(tree_ssa_unswitch_loops): First perform all guard hoisting,
	then perform unswitching on innermost loop predicates.
	(find_unswitching_predicates_for_bb): Keep track of the
	outermost loop to unswitch.
	(evaluate_bbs): Adjust exit test.
	(tree_unswitch_single_loop): Dump whether we unswitched an outer
	loop.
	(tree_unswitch_loop): Remove assert we unswitch only innermost
	loops.

	* gcc.dg/loop-unswitch-18.c: New testcase.
	* gcc.dg/tree-ssa/loopclosedphi.c: Disable unswitching,
	adjust expected counts.
	* gcc.dg/torture/pr71462.c: Add -w to ignore undefined
	behavior diagnostics after now unswitching outer loops.
---
 gcc/testsuite/gcc.dg/loop-unswitch-18.c       |  13 ++
 gcc/testsuite/gcc.dg/torture/pr71462.c        |   1 +
 gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c |   4 +-
 gcc/tree-ssa-loop-unswitch.cc                 | 203 +++++++++++-------
 4 files changed, 140 insertions(+), 81 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/loop-unswitch-18.c
  

Patch

diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-18.c b/gcc/testsuite/gcc.dg/loop-unswitch-18.c
new file mode 100644
index 00000000000..91dc2014922
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-18.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-unswitch-optimized" } */
+
+void bar();
+void foo (int x, int n, int m)
+{
+  for (int i = 0; i < n; ++i)
+    for (int j = 0; j < m; ++j)
+      if (x)
+        bar ();
+}
+
+/* { dg-final { scan-tree-dump "unswitching outer loop" "unswitch" } } */
diff --git a/gcc/testsuite/gcc.dg/torture/pr71462.c b/gcc/testsuite/gcc.dg/torture/pr71462.c
index 390b88673e3..996596321ca 100644
--- a/gcc/testsuite/gcc.dg/torture/pr71462.c
+++ b/gcc/testsuite/gcc.dg/torture/pr71462.c
@@ -1,4 +1,5 @@ 
 /* { dg-do compile } */
+/* { dg-additional-options "-w" } */
 
 short a;
 long b;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c b/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c
index d71b757fbca..0a8a4174f5f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loopclosedphi.c
@@ -1,5 +1,5 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O3 -fno-tree-ch -w -fdump-tree-loopdone-details" } */
+/* { dg-options "-O3 -fno-tree-ch -fno-unswitch-loops -w -fdump-tree-loopdone-details" } */
 
 void
 t6 (int qz, int wh)
@@ -18,4 +18,4 @@  t6 (int qz, int wh)
     qz = jl * wh;
 }
 
-/* { dg-final { scan-tree-dump-times "Replacing" 2 "loopdone"} } */
+/* { dg-final { scan-tree-dump-times "Replacing" 3 "loopdone"} } */
diff --git a/gcc/tree-ssa-loop-unswitch.cc b/gcc/tree-ssa-loop-unswitch.cc
index dfe75f52f12..186ae953f04 100644
--- a/gcc/tree-ssa-loop-unswitch.cc
+++ b/gcc/tree-ssa-loop-unswitch.cc
@@ -217,6 +217,7 @@  static bool tree_unswitch_single_loop (class loop *, dump_user_location_t,
 				       basic_block = NULL);
 static void
 find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
+				    class loop *&outer_loop,
 				    vec<unswitch_predicate *> &candidates,
 				    unswitch_predicate *&hottest,
 				    basic_block &hottest_bb);
@@ -249,36 +250,31 @@  set_predicates_for_bb (basic_block bb, vec<unswitch_predicate *> predicates)
 }
 
 /* Initialize LOOP information reused during the unswitching pass.
-   Return total number of instructions in the loop.  */
+   Return total number of instructions in the loop.  Adjusts LOOP to
+   the outermost loop all candidates are invariant in.  */
 
 static unsigned
-init_loop_unswitch_info (class loop *loop, unswitch_predicate *&hottest,
+init_loop_unswitch_info (class loop *&loop, unswitch_predicate *&hottest,
 			 basic_block &hottest_bb)
 {
   unsigned total_insns = 0;
 
-  /* Calculate instruction count.  */
   basic_block *bbs = get_loop_body (loop);
-  for (unsigned i = 0; i < loop->num_nodes; i++)
-    {
-      unsigned insns = 0;
-      for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
-	   gsi_next (&gsi))
-	insns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
-
-      bbs[i]->aux = (void *)(uintptr_t)insns;
-      total_insns += insns;
-    }
 
+  /* Unswitch only nests with no sibling loops.  */
+  class loop *outer_loop = loop;
+  while (loop_outer (outer_loop)->num != 0
+	 && !loop_outer (outer_loop)->inner->next)
+    outer_loop = loop_outer (outer_loop);
   hottest = NULL;
   hottest_bb = NULL;
-  /* Find all unswitching candidates.  */
+  /* Find all unswitching candidates in the innermost loop.  */
   for (unsigned i = 0; i != loop->num_nodes; i++)
     {
       /* Find a bb to unswitch on.  */
       vec<unswitch_predicate *> candidates;
       candidates.create (1);
-      find_unswitching_predicates_for_bb (bbs[i], loop, candidates,
+      find_unswitching_predicates_for_bb (bbs[i], loop, outer_loop, candidates,
 					  hottest, hottest_bb);
       if (!candidates.is_empty ())
 	set_predicates_for_bb (bbs[i], candidates);
@@ -291,8 +287,34 @@  init_loop_unswitch_info (class loop *loop, unswitch_predicate *&hottest,
 	}
     }
 
+  if (outer_loop != loop)
+    {
+      free (bbs);
+      bbs = get_loop_body (outer_loop);
+    }
+
+  /* Calculate instruction count.  */
+  for (unsigned i = 0; i < outer_loop->num_nodes; i++)
+    {
+      unsigned insns = 0;
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
+	   gsi_next (&gsi))
+	insns += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights);
+      /* No predicates to unswitch on in the outer loops.  */
+      if (!flow_bb_inside_loop_p (loop, bbs[i]))
+	{
+	  gimple *last = last_stmt (bbs[i]);
+	  if (last != NULL)
+	    gimple_set_uid (last, 0);
+	}
+
+      bbs[i]->aux = (void *)(uintptr_t)insns;
+      total_insns += insns;
+    }
+
   free (bbs);
 
+  loop = outer_loop;
   return total_insns;
 }
 
@@ -307,72 +329,74 @@  tree_ssa_unswitch_loops (function *fun)
 
   ranger = enable_ranger (fun);
 
-  /* Go through all loops starting from innermost.  */
+  /* Go through all loops starting from innermost, hoisting guards.  */
   for (auto loop : loops_list (fun, LI_FROM_INNERMOST))
     {
-      if (!loop->inner)
-	{
-	  /* Perform initial tests if unswitch is eligible.  */
-	  dump_user_location_t loc = find_loop_location (loop);
+      if (loop->inner)
+	changed_hoist |= tree_unswitch_outer_loop (loop);
+    }
 
-	  /* Do not unswitch in cold regions. */
-	  if (optimize_loop_for_size_p (loop))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_NOTE, loc,
-				 "Not unswitching cold loops\n");
-	      continue;
-	    }
+  /* Go through innermost loops, unswitching on invariant predicates
+     within those.  */
+  for (auto loop : loops_list (fun, LI_ONLY_INNERMOST))
+    {
+      /* Perform initial tests if unswitch is eligible.  */
+      dump_user_location_t loc = find_loop_location (loop);
 
-	  /* If the loop is not expected to iterate, there is no need
-	     for unswitching.  */
-	  HOST_WIDE_INT iterations = estimated_loop_iterations_int (loop);
-	  if (iterations < 0)
-	    iterations = likely_max_loop_iterations_int (loop);
-	  if (iterations >= 0 && iterations <= 1)
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_NOTE, loc,
-				 "Not unswitching, loop is not expected"
-				 " to iterate\n");
-	      continue;
-	    }
+      /* Do not unswitch in cold regions. */
+      if (optimize_loop_for_size_p (loop))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, loc,
+			     "Not unswitching cold loops\n");
+	  continue;
+	}
 
-	  bb_predicates = new vec<vec<unswitch_predicate *>> ();
-	  bb_predicates->safe_push (vec<unswitch_predicate *> ());
-	  unswitch_predicate::predicates = new vec<unswitch_predicate *> ();
-
-	  /* Unswitch innermost loop.  */
-	  unswitch_predicate *hottest;
-	  basic_block hottest_bb;
-	  unsigned int loop_size = init_loop_unswitch_info (loop, hottest,
-							    hottest_bb);
-	  unsigned int budget = loop_size + param_max_unswitch_insns;
-
-	  predicate_vector predicate_path;
-	  predicate_path.create (8);
-	  auto_bitmap handled;
-	  changed_unswitch
-	    |= tree_unswitch_single_loop (loop, loc, predicate_path,
-					  loop_size, budget,
-					  ignored_edge_flag, handled,
-					  hottest, hottest_bb);
-	  predicate_path.release ();
-
-	  for (auto predlist : bb_predicates)
-	    predlist.release ();
-	  bb_predicates->release ();
-	  delete bb_predicates;
-	  bb_predicates = NULL;
-
-	  for (auto pred : unswitch_predicate::predicates)
-	    delete pred;
-	  unswitch_predicate::predicates->release ();
-	  delete unswitch_predicate::predicates;
-	  unswitch_predicate::predicates = NULL;
+      /* If the loop is not expected to iterate, there is no need
+	 for unswitching.  */
+      HOST_WIDE_INT iterations = estimated_loop_iterations_int (loop);
+      if (iterations < 0)
+	iterations = likely_max_loop_iterations_int (loop);
+      if (iterations >= 0 && iterations <= 1)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, loc,
+			     "Not unswitching, loop is not expected"
+			     " to iterate\n");
+	  continue;
 	}
-      else
-	changed_hoist |= tree_unswitch_outer_loop (loop);
+
+      bb_predicates = new vec<vec<unswitch_predicate *>> ();
+      bb_predicates->safe_push (vec<unswitch_predicate *> ());
+      unswitch_predicate::predicates = new vec<unswitch_predicate *> ();
+
+      /* Unswitch loop.  */
+      unswitch_predicate *hottest;
+      basic_block hottest_bb;
+      unsigned int loop_size = init_loop_unswitch_info (loop, hottest,
+							hottest_bb);
+      unsigned int budget = loop_size + param_max_unswitch_insns;
+
+      predicate_vector predicate_path;
+      predicate_path.create (8);
+      auto_bitmap handled;
+      changed_unswitch |= tree_unswitch_single_loop (loop, loc, predicate_path,
+						     loop_size, budget,
+						     ignored_edge_flag, handled,
+						     hottest, hottest_bb);
+      predicate_path.release ();
+
+      for (auto predlist : bb_predicates)
+	predlist.release ();
+      bb_predicates->release ();
+      delete bb_predicates;
+      bb_predicates = NULL;
+
+      for (auto pred : unswitch_predicate::predicates)
+	delete pred;
+      unswitch_predicate::predicates->release ();
+      delete unswitch_predicate::predicates;
+      unswitch_predicate::predicates = NULL;
     }
 
   disable_ranger (fun);
@@ -463,10 +487,13 @@  is_maybe_undefined (const tree name, gimple *stmt, class loop *loop)
 
 /* Checks whether we can unswitch LOOP on condition at end of BB -- one of its
    basic blocks (for what it means see comments below).
-   All candidates all filled to the provided vector CANDIDATES.  */
+   All candidates all filled to the provided vector CANDIDATES.
+   OUTER_LOOP is updated to the innermost loop all found candidates are
+   invariant in.  */
 
 static void
 find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
+				    class loop *&outer_loop,
 				    vec<unswitch_predicate *> &candidates,
 				    unswitch_predicate *&hottest,
 				    basic_block &hottest_bb)
@@ -506,6 +533,18 @@  find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
 	  if (is_maybe_undefined (use, stmt, loop))
 	    return;
 	}
+      /* Narrow OUTER_LOOP.  */
+      if (outer_loop != loop)
+	FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
+	  {
+	    def = SSA_NAME_DEF_STMT (use);
+	    def_bb = gimple_bb (def);
+	    while (outer_loop != loop
+		   && ((def_bb && flow_bb_inside_loop_p (outer_loop, def_bb))
+		       || is_maybe_undefined (use, stmt, outer_loop)))
+	      outer_loop = superloop_at_depth (loop,
+					       loop_depth (outer_loop) + 1);
+	  }
 
       unswitch_predicate *predicate = new unswitch_predicate (stmt);
       candidates.safe_push (predicate);
@@ -535,6 +574,12 @@  find_unswitching_predicates_for_bb (basic_block bb, class loop *loop,
 	 behavior that the original program might never exercise.  */
       if (is_maybe_undefined (idx, stmt, loop))
 	return;
+      /* Narrow OUTER_LOOP.  */
+      while (outer_loop != loop
+	     && ((def_bb && flow_bb_inside_loop_p (outer_loop, def_bb))
+		 || is_maybe_undefined (idx, stmt, outer_loop)))
+	outer_loop = superloop_at_depth (loop,
+					 loop_depth (outer_loop) + 1);
 
       /* Build compound expression for all outgoing edges of the switch.  */
       auto_vec<tree, 16> preds;
@@ -872,7 +917,7 @@  evaluate_bbs (class loop *loop, predicate_vector *predicate_path,
 	{
 	  basic_block dest = e->dest;
 
-	  if (dest->loop_father == loop
+	  if (flow_bb_inside_loop_p (loop, dest)
 	      && !(dest->flags & reachable_flag)
 	      && !(e->flags & flags)
 	      && !ignored_edges.contains (e))
@@ -992,7 +1037,8 @@  tree_unswitch_single_loop (class loop *loop, dump_user_location_t loc,
       if (dump_enabled_p ())
 	{
 	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
-			   "unswitching loop %d on %qs with condition: %T\n",
+			   "unswitching %sloop %d on %qs with condition: %T\n",
+			   loop->inner ? "outer " : "",
 			   loop->num, predicate->switch_p ? "switch" : "if",
 			   predicate->condition);
 	  dump_printf_loc (MSG_NOTE, loc,
@@ -1064,7 +1110,6 @@  tree_unswitch_loop (class loop *loop, edge edge_true, tree cond)
   /* Some sanity checking.  */
   gcc_assert (flow_bb_inside_loop_p (loop, edge_true->src));
   gcc_assert (EDGE_COUNT (edge_true->src->succs) >= 2);
-  gcc_assert (loop->inner == NULL);
 
   profile_probability prob_true = edge_true->probability;
   return loop_version (loop, unshare_expr (cond),