tree-optimization: Add register pressure in LICM at tree-ssa level optimization

Message ID 1894603a-deee-4ef4-8bea-9c0dcd672336@linux.ibm.com
State New
Headers
Series tree-optimization: Add register pressure in LICM at tree-ssa level optimization |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed

Commit Message

Ajit Agarwal Nov. 18, 2023, 8:36 a.m. UTC
  Hello All:

Loop invariant code motion can increase register pressure if the
live ranges increases and there could be spill happening.

In tree-ssa representaion, we have the proof tha livein at
the loop header will be live-in across all the basic blocks
inside the loop.

We want to evaluate that how register pressure behaves with
moving outside loop.

If we have live-in at the basic block of the statement moved
to outside the loop is greater than livein at the outer loop
header where it is moved. Such cases we marked at LIM_EXPENSIVE.

Moving the statement at the outerloop we measure register pressure
of stmt with respect to livein at outer loop header.

That evaluates to adding to cost and the cost increases with
above cases and there is less chance to do loop invariant
code motion.

Bootstrapped and regtested on powerpc64-linux-gnu and no issues.

SPEC 2017 benchmarks numbers are better with this optimization
for INT and FP benchmarks.

Ok for trunk?

Thanks & Regards
Ajit


tree-optimization: Add register pressure in LICM at tree-ssa level optimization

Loop invariant code motion can increase register pressure if the
live ranges increases and there could be spill happening.

In tree-ssa representaion, we have the proof tha livein at
the loop header will be live-in across all the basic blocks
inside the loop.

We want to evaluate that how register pressure behaves with
moving outside loop.

If we have live-in at the basic block of the statement moved
to outside the loop is greater than livein at the outer loop
header where it is moved. Such cases we marked at LIM_EXPENSIVE.

That evaluates to adding to cost and the cost increases with
above cases and there is less chance to do loop invariant
code motion.

2023-11-18  Ajit Kumar Agarwal  <aagarwa1@linux.ibm.com>

gcc/ChangeLog:

        * tree-ssa-loop-im.cc (has_stmt_increasing_reg_pressure):
	New function that evaluates register pressure for LICM.
        (stmt_cost): Return LIM_EXPENSIVE if register pressure
	increases.
	(lim_aux_data): Add Liveness field live in this data
	structure.
        (execute): Add live range analysis.
        * tree-ssa-live.cc (set_var_live_on_entry): Add virtual operand
        tests on ssa_names.
        (verify_live_on_entry): Ditto.
	(additional_var_map): New function.
---
 gcc/tree-ssa-live.cc    | 25 +++++++++++++++++++----
 gcc/tree-ssa-live.h     |  1 +
 gcc/tree-ssa-loop-im.cc | 45 ++++++++++++++++++++++++++++++++++++++---
 3 files changed, 64 insertions(+), 7 deletions(-)
  

Patch

diff --git a/gcc/tree-ssa-live.cc b/gcc/tree-ssa-live.cc
index f06daf23035..052e690d09f 100644
--- a/gcc/tree-ssa-live.cc
+++ b/gcc/tree-ssa-live.cc
@@ -75,6 +75,18 @@  var_map_base_fini (var_map map)
       map->num_basevars = 0;
     }
 }
+
+/* Add additional var_map data.  */
+
+void additional_var_map (var_map &map)
+{
+  map->bmp_bbs = BITMAP_ALLOC (NULL);
+  map->outofssa_p = false;
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, cfun)
+    bitmap_set_bit (map->bmp_bbs, bb->index);
+}
+
 /* Create a variable partition map of SIZE for region, initialize and return
    it.  Region is a loop if LOOP is non-NULL, otherwise is the current
    function.  If BITINT is non-NULL, only SSA_NAMEs from that bitmap
@@ -1141,7 +1153,8 @@  set_var_live_on_entry (tree ssa_name, tree_live_info_p live)
     def_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
 
   /* An undefined local variable does not need to be very alive.  */
-  if (ssa_undefined_value_p (ssa_name, false))
+  if (virtual_operand_p (ssa_name)
+      || ssa_undefined_value_p (ssa_name, false))
     return;
 
   /* Visit each use of SSA_NAME and if it isn't in the same block as the def,
@@ -1540,7 +1553,6 @@  debug (tree_live_info_d *ptr)
 
 
 /* Verify that the info in LIVE matches the current cfg.  */
-
 static void
 verify_live_on_entry (tree_live_info_p live)
 {
@@ -1569,11 +1581,15 @@  verify_live_on_entry (tree_live_info_p live)
 	  tree d = NULL_TREE;
 	  bitmap loe;
 	  var = partition_to_var (map, i);
+	  if (var == NULL_TREE)
+	    continue;
 	  stmt = SSA_NAME_DEF_STMT (var);
 	  tmp = gimple_bb (stmt);
+
 	  if (SSA_NAME_VAR (var))
 	    d = ssa_default_def (cfun, SSA_NAME_VAR (var));
-
+	  if (d == var)
+	    continue;
 	  loe = live_on_entry (live, e->dest);
 	  if (loe && bitmap_bit_p (loe, i))
 	    {
@@ -1614,7 +1630,8 @@  verify_live_on_entry (tree_live_info_p live)
 	      {
 		/* An undefined local variable does not need to be very
 		   alive.  */
-		if (ssa_undefined_value_p (var, false))
+		if (virtual_operand_p (var)
+		    || ssa_undefined_value_p (var, false))
 		  continue;
 
 		/* The only way this var shouldn't be marked live on entry is
diff --git a/gcc/tree-ssa-live.h b/gcc/tree-ssa-live.h
index 73191dc434d..2d7bbf20128 100644
--- a/gcc/tree-ssa-live.h
+++ b/gcc/tree-ssa-live.h
@@ -85,6 +85,7 @@  typedef struct _var_map
 #define NO_PARTITION		-1
 
 extern var_map init_var_map (int, class loop * = NULL, bitmap = NULL);
+extern void additional_var_map (var_map &);
 extern void delete_var_map (var_map);
 extern int var_union (var_map, tree, tree);
 extern void partition_view_normal (var_map);
diff --git a/gcc/tree-ssa-loop-im.cc b/gcc/tree-ssa-loop-im.cc
index 396963b6754..192694c11b3 100644
--- a/gcc/tree-ssa-loop-im.cc
+++ b/gcc/tree-ssa-loop-im.cc
@@ -48,7 +48,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-dfa.h"
 #include "tree-ssa.h"
 #include "dbgcnt.h"
-
+#include "tree-ssa-live.h"
 /* TODO:  Support for predicated code motion.  I.e.
 
    while (1)
@@ -96,6 +96,7 @@  struct lim_aux_data
 				   is hoisted; i.e. those that define the
 				   operands of the statement and are inside of
 				   the MAX_LOOP loop.  */
+  tree_live_info_p live;	/* liveness info.  */
 };
 
 /* Maps statements to their lim_aux_data.  */
@@ -603,6 +604,30 @@  add_dependency (tree def, struct lim_aux_data *data, class loop *loop,
   return true;
 }
 
+/* Return TRUE if livein (basic_block (stmt) is greater than
+ * livein (outer loop header where stmt is moved to).  */
+static bool
+has_stmt_increasing_reg_pressure (gimple *stmt)
+{
+  struct lim_aux_data *lim_data = get_lim_data (stmt);
+  tree_live_info_p live = lim_data->live;
+
+  if (lim_data && live &&  gimple_bb (stmt)->loop_father)
+    {
+      unsigned bb_live_cnt
+	= bitmap_count_bits (&live->livein[gimple_bb (stmt)->index]);
+
+      basic_block outer_loop_hdr = gimple_bb (stmt)->loop_father->header;
+      unsigned outer_live_cnt
+	= bitmap_count_bits (&live->livein[outer_loop_hdr->index]);
+
+      if (bb_live_cnt > outer_live_cnt)
+	return true;
+    }
+
+  return false;
+}
+
 /* Returns an estimate for a cost of statement STMT.  The values here
    are just ad-hoc constants, similar to costs for inlining.  */
 
@@ -685,6 +710,10 @@  stmt_cost (gimple *stmt)
       /* Comparisons are usually expensive.  */
       if (TREE_CODE_CLASS (code) == tcc_comparison)
 	return LIM_EXPENSIVE;
+
+      if (has_stmt_increasing_reg_pressure (stmt))
+	return LIM_EXPENSIVE;
+
       return 1;
     }
 }
@@ -1095,7 +1124,7 @@  rewrite_bittest (gimple_stmt_iterator *bsi)
    statements.  */
 
 static void
-compute_invariantness (basic_block bb)
+compute_invariantness (basic_block bb, tree_live_info_p &live)
 {
   enum move_pos pos;
   gimple_stmt_iterator bsi;
@@ -1131,6 +1160,7 @@  compute_invariantness (basic_block bb)
 	if (! lim_data)
 	  lim_data = init_lim_data (stmt);
 	lim_data->always_executed_in = outermost;
+	lim_data->live = live;
 
 	if (!determine_max_movement (stmt, false))
 	  {
@@ -1170,6 +1200,7 @@  compute_invariantness (basic_block bb)
 	      if (! lim_data)
 		lim_data = init_lim_data (stmt);
 	      lim_data->always_executed_in = outermost;
+	      lim_data->live = live;
 	    }
 	  continue;
 	}
@@ -1208,6 +1239,7 @@  compute_invariantness (basic_block bb)
       if (! lim_data)
 	lim_data = init_lim_data (stmt);
       lim_data->always_executed_in = outermost;
+      lim_data->live = live;
 
       if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
 	continue;
@@ -3613,10 +3645,15 @@  loop_invariant_motion_in_fun (function *fun, bool store_motion)
   int *rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
   int n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);
 
+  var_map map = init_var_map (num_ssa_names);
+  additional_var_map (map);
+
+  tree_live_info_p live = calculate_live_ranges (map, true);
+
   /* For each statement determine the outermost loop in that it is
      invariant and cost for computing the invariant.  */
   for (int i = 0; i < n; ++i)
-    compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]));
+    compute_invariantness (BASIC_BLOCK_FOR_FN (fun, rpo[i]), live);
 
   /* Execute store motion.  Force the necessary invariants to be moved
      out of the loops as well.  */
@@ -3624,6 +3661,8 @@  loop_invariant_motion_in_fun (function *fun, bool store_motion)
     do_store_motion ();
 
   free (rpo);
+  delete_tree_live_info (live);
+  delete_var_map (map);
   rpo = XNEWVEC (int, last_basic_block_for_fn (fun));
   n = pre_and_rev_post_order_compute_fn (fun, NULL, rpo, false);