[committed,PR102842] LRA: Consider all outputs in generation of matching reloads

Message ID 1f9a895e-3557-e859-de29-0b7d922592fa@redhat.com
State Committed
Headers
Series [committed,PR102842] LRA: Consider all outputs in generation of matching reloads |

Commit Message

Vladimir Makarov Oct. 26, 2021, 7:19 p.m. UTC
  The following patch fixes

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102842

As the patch touches a sensitive LRA code, the patch was bootstrapped 
tested on x86-64, aarch64, and ppc64.

I've committed the patch only in master branch.  Later (after some 
observation), I'll commit it into gcc-10 and gcc-11 branches.
  

Patch

commit 8c59f4118357789cfa8df2cf0d3ecb61be7e9041
Author: Vladimir N. Makarov <vmakarov@redhat.com>
Date:   Tue Oct 26 14:03:42 2021 -0400

    [PR102842] Consider all outputs in generation of matching reloads
    
    Without considering all output insn operands (not only processed
    before), in rare cases LRA can use the same hard register for
    different outputs of the insn on different assignment subpasses.  The
    patch fixes the problem.
    
    gcc/ChangeLog:
    
            PR rtl-optimization/102842
            * lra-constraints.c (match_reload): Ignore out in checking values
            of outs.
            (curr_insn_transform): Collect outputs before doing reloads of operands.
    
    gcc/testsuite/ChangeLog:
    
            PR rtl-optimization/102842
            * g++.target/arm/pr102842.C: New test.

diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 8f75125fc2e..0195b4fb9c3 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -1102,7 +1102,7 @@  match_reload (signed char out, signed char *ins, signed char *outs,
 	  for (i = 0; outs[i] >= 0; i++)
 	    {
 	      rtx other_out_rtx = *curr_id->operand_loc[outs[i]];
-	      if (REG_P (other_out_rtx)
+	      if (outs[i] != out && REG_P (other_out_rtx)
 		  && (regno_val_use_in (REGNO (in_rtx), other_out_rtx)
 		      != NULL_RTX))
 		{
@@ -4382,7 +4382,10 @@  curr_insn_transform (bool check_only_p)
       }
 
   n_outputs = 0;
-  outputs[0] = -1;
+  for (i = 0; i < n_operands; i++)
+    if (curr_static_id->operand[i].type == OP_OUT)
+      outputs[n_outputs++] = i;
+  outputs[n_outputs] = -1;
   for (i = 0; i < n_operands; i++)
     {
       int regno;
@@ -4457,8 +4460,6 @@  curr_insn_transform (bool check_only_p)
 		     lra-lives.c.  */
 		  match_reload (i, goal_alt_matched[i], outputs, goal_alt[i], &before,
 				&after, TRUE);
-		  outputs[n_outputs++] = i;
-		  outputs[n_outputs] = -1;
 		}
 	      continue;
 	    }
@@ -4636,14 +4637,6 @@  curr_insn_transform (bool check_only_p)
 	   process_alt_operands decides that it is possible.  */
 	gcc_unreachable ();
 
-      /* Memorise processed outputs so that output remaining to be processed
-	 can avoid using the same register value (see match_reload).  */
-      if (curr_static_id->operand[i].type == OP_OUT)
-	{
-	  outputs[n_outputs++] = i;
-	  outputs[n_outputs] = -1;
-	}
-
       if (optional_p)
 	{
 	  rtx reg = op;
diff --git a/gcc/testsuite/g++.target/arm/pr102842.C b/gcc/testsuite/g++.target/arm/pr102842.C
new file mode 100644
index 00000000000..a2bac66091a
--- /dev/null
+++ b/gcc/testsuite/g++.target/arm/pr102842.C
@@ -0,0 +1,30 @@ 
+/* PR rtl-optimization/102842 */
+/* { dg-do compile } */
+/* { dg-options "-fPIC  -O2 -fno-omit-frame-pointer -mthumb -march=armv7-a+fp" } */
+
+struct Plane {
+  using T = float;
+  T *Row();
+};
+using ImageF = Plane;
+long long Mirror_x;
+struct EnsurePaddingInPlaceRowByRow {
+  void Process() {
+    switch (strategy_) {
+    case kSlow:
+      float *row = img_.Row();
+      long long xsize = x1_;
+      while (Mirror_x >= xsize)
+        if (Mirror_x)
+          Mirror_x = 2 * xsize - 1;
+      *row = Mirror_x;
+    }
+  }
+  ImageF img_;
+  unsigned x1_;
+  enum { kSlow } strategy_;
+};
+void FinalizeImageRect() {
+  EnsurePaddingInPlaceRowByRow ensure_padding;
+  ensure_padding.Process();
+}