[v2] tree-optimization/86270 - improve SSA coalescing for loop exit test

Message ID 20250213113942.CC1D23858C35@sourceware.org
State New
Headers
Series [v2] tree-optimization/86270 - improve SSA coalescing for loop exit test |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-aarch64-bootstrap success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-arm-bootstrap fail Build failed

Commit Message

Richard Biener Feb. 13, 2025, 11:38 a.m. UTC
  The PR indicates a very specific issue with regard to SSA coalescing
failures because there's a pre IV increment loop exit test.  While
IVOPTs created the desired IL we later simplify the exit test into
the undesirable form again.  The following fixes this up during RTL
expansion where we try to improve coalescing of IVs.  That seems
easier that trying to avoid the simplification with some weird
heuristics (it could also have been written this way).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

OK?

Thanks,
Richard.

	PR tree-optimization/86270
	* tree-outof-ssa.cc (insert_backedge_copies): Pattern
	match a single conflict in a loop condition and adjust
	that avoiding the conflict if possible.

	* gcc.target/i386/pr86270.c: Adjust to check for no reg-reg
	copies as well.
---
 gcc/testsuite/gcc.target/i386/pr86270.c |  3 ++
 gcc/tree-outof-ssa.cc                   | 51 ++++++++++++++++++++++---
 2 files changed, 49 insertions(+), 5 deletions(-)
  

Patch

diff --git a/gcc/testsuite/gcc.target/i386/pr86270.c b/gcc/testsuite/gcc.target/i386/pr86270.c
index 68562446fa4..89b9aeb317a 100644
--- a/gcc/testsuite/gcc.target/i386/pr86270.c
+++ b/gcc/testsuite/gcc.target/i386/pr86270.c
@@ -13,3 +13,6 @@  test ()
 
 /* Check we do not split the backedge but keep nice loop form.  */
 /* { dg-final { scan-assembler-times "L\[0-9\]+:" 2 } } */
+/* Check we do not end up with reg-reg moves from a pre-increment IV
+   exit test.  */
+/* { dg-final { scan-assembler-not "mov\[lq\]\?\t%\?\[er\].x, %\?\[er\].x" } } */
diff --git a/gcc/tree-outof-ssa.cc b/gcc/tree-outof-ssa.cc
index d340d4ba529..1b5b67c2e2b 100644
--- a/gcc/tree-outof-ssa.cc
+++ b/gcc/tree-outof-ssa.cc
@@ -46,6 +46,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "tree-outof-ssa.h"
 #include "dojump.h"
 #include "internal-fn.h"
+#include "gimple-fold.h"
 
 /* FIXME: A lot of code here deals with expanding to RTL.  All that code
    should be in cfgexpand.cc.  */
@@ -1259,10 +1260,9 @@  insert_backedge_copies (void)
 		  if (gimple_nop_p (def)
 		      || gimple_code (def) == GIMPLE_PHI)
 		    continue;
-		  tree name = copy_ssa_name (result);
-		  gimple *stmt = gimple_build_assign (name, result);
 		  imm_use_iterator imm_iter;
 		  gimple *use_stmt;
+		  auto_vec<use_operand_p, 8> uses;
 		  /* The following matches trivially_conflicts_p.  */
 		  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, result)
 		    {
@@ -1273,11 +1273,52 @@  insert_backedge_copies (void)
 			{
 			  use_operand_p use;
 			  FOR_EACH_IMM_USE_ON_STMT (use, imm_iter)
-			    SET_USE (use, name);
+			    uses.safe_push (use);
 			}
 		    }
-		  gimple_stmt_iterator gsi = gsi_for_stmt (def);
-		  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+		  /* When there is just a conflicting statement try to
+		     adjust that to refer to the new definition.
+		     In particular for now handle a conflict with the
+		     use in a (exit) condition with a NE compare,
+		     replacing a pre-IV-increment compare with a
+		     post-IV-increment one.  */
+		  if (uses.length () == 1
+		      && is_a <gcond *> (USE_STMT (uses[0]))
+		      && (gimple_cond_code (USE_STMT (uses[0])) == NE_EXPR
+			  || gimple_cond_code (USE_STMT (uses[0])) == EQ_EXPR)
+		      && is_gimple_assign (def)
+		      && gimple_assign_rhs1 (def) == result
+		      && (gimple_assign_rhs_code (def) == PLUS_EXPR
+			  || gimple_assign_rhs_code (def) == MINUS_EXPR
+			  || gimple_assign_rhs_code (def) == POINTER_PLUS_EXPR)
+		      && TREE_CODE (gimple_assign_rhs2 (def)) == INTEGER_CST)
+		    {
+		      gcond *cond = as_a <gcond *> (USE_STMT (uses[0]));
+		      tree *adj;
+		      if (gimple_cond_lhs (cond) == result)
+			adj = gimple_cond_rhs_ptr (cond);
+		      else
+			adj = gimple_cond_lhs_ptr (cond);
+		      gimple_stmt_iterator gsi = gsi_for_stmt (cond);
+		      tree newval
+			= gimple_build (&gsi, true, GSI_SAME_STMT,
+					UNKNOWN_LOCATION,
+					gimple_assign_rhs_code (def),
+					TREE_TYPE (*adj),
+					*adj, gimple_assign_rhs2 (def));
+		      *adj = newval;
+		      SET_USE (uses[0], arg);
+		      update_stmt (cond);
+		    }
+		  else
+		    {
+		      tree name = copy_ssa_name (result);
+		      gimple *stmt = gimple_build_assign (name, result);
+		      gimple_stmt_iterator gsi = gsi_for_stmt (def);
+		      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
+		      for (auto use : uses)
+			SET_USE (use, name);
+		    }
 		}
 	    }
 	}