[pushed] aarch64: Add missing early-ra bookkeeping [PR113295]

Message ID mpt8r3bjmiu.fsf@arm.com
State Committed
Commit 8a16e06da97f51574cfad17e2cece2e58571305d
Headers
Series [pushed] aarch64: Add missing early-ra bookkeeping [PR113295] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm warning Patch is already merged
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 warning Patch is already merged

Commit Message

Richard Sandiford Feb. 23, 2024, 2:14 p.m. UTC
  416.gamess showed up two wrong-code bugs in early-ra.  This patch
fixes the first of them.  It was difficult to reduce the source code
to something that would meaningfully show the situation, so the
testcase uses a direct RTL sequence instead.

In the sequence:

(a) register <2> is set more than once
(b) register <2> is copied to a temporary (<4>)
(c) register <2> is the destination of an FCSEL between <4> and
    another value (<5>)
(d) <4> and <2> are equivalent for <4>'s live range
(e) <5>'s and <2>'s live ranges do not intersect, and there is
    a pseudo-copy between <5> and <2>

On its own, (d) implies that <4> can be treated as equivalent to <2>.
And on its own, (e) implies that <5> can share <2>'s register.  But
<4>'s and <5>'s live ranges conflict, meaning that they cannot both
share the register together.  A bit of missing bookkeeping meant that
the mechanism for detecting this didn't fire.  We therefore ended up
with an FCSEL in which both inputs were the same register.

Tested on aarch64-linux-gnu & pushed.

Richard


gcc/
	PR target/113295
	* config/aarch64/aarch64-early-ra.cc
	(early_ra::find_related_start): Account for definitions by shared
	registers when testing for a single register definition.
	(early_ra::accumulate_defs): New function.
	(early_ra::record_copy): If A shares B's register, fold A's
	definition information into B's.  Fold A's use information into B's.

gcc/testsuite/
	PR target/113295
	* gcc.dg/rtl/aarch64/pr113295-1.c: New test.
---
 gcc/config/aarch64/aarch64-early-ra.cc        | 27 ++++++++++
 gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c | 53 +++++++++++++++++++
 2 files changed, 80 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c
  

Patch

diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc
index 1a03d86e94b..58ae5a49913 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -440,6 +440,7 @@  private:
   void record_allocno_use (allocno_info *);
   void record_allocno_def (allocno_info *);
   allocno_info *find_related_start (allocno_info *, allocno_info *, bool);
+  void accumulate_defs (allocno_info *, allocno_info *);
   void record_copy (rtx, rtx, bool = false);
   void record_constraints (rtx_insn *);
   void record_artificial_refs (unsigned int);
@@ -1569,6 +1570,8 @@  early_ra::find_related_start (allocno_info *dest_allocno,
 	}
 
       if (dest_allocno->group_size != 1
+	  // Account for definitions by shared registers.
+	  || dest_allocno->num_defs > 1
 	  || DF_REG_DEF_COUNT (dest_allocno->group ()->regno) != 1)
 	// Currently only single allocnos that are defined once can
 	// share registers with non-equivalent allocnos.  This could be
@@ -1593,6 +1596,20 @@  early_ra::find_related_start (allocno_info *dest_allocno,
     }
 }
 
+// Add FROM_ALLOCNO's definition information to TO_ALLOCNO's.
+void
+early_ra::accumulate_defs (allocno_info *to_allocno,
+			   allocno_info *from_allocno)
+{
+  if (from_allocno->num_defs > 0)
+    {
+      to_allocno->num_defs = MIN (from_allocno->num_defs
+				  + to_allocno->num_defs, 2);
+      to_allocno->last_def_point = MAX (to_allocno->last_def_point,
+					from_allocno->last_def_point);
+    }
+}
+
 // Record any relevant allocno-related information for an actual or imagined
 // copy from SRC to DEST.  FROM_MOVE_P is true if the copy was an explicit
 // move instruction, false if it represents one way of satisfying the previous
@@ -1687,6 +1704,16 @@  early_ra::record_copy (rtx dest, rtx src, bool from_move_p)
 		  next_allocno->related_allocno = src_allocno->id;
 		  next_allocno->is_equiv = (start_allocno == dest_allocno
 					    && from_move_p);
+		  // If we're sharing two allocnos that are not equivalent,
+		  // carry across the definition information.  This is needed
+		  // to prevent multiple incompatible attempts to share with
+		  // the same register.
+		  if (next_allocno->is_shared ())
+		    accumulate_defs (src_allocno, next_allocno);
+		  src_allocno->last_use_point
+		    = MAX (src_allocno->last_use_point,
+			   next_allocno->last_use_point);
+
 		  if (next_allocno == start_allocno)
 		    break;
 		  next_allocno = m_allocnos[next_allocno->copy_dest];
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c b/gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c
new file mode 100644
index 00000000000..481fb813f61
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/pr113295-1.c
@@ -0,0 +1,53 @@ 
+// { dg-options "-O2" }
+// { dg-do run }
+
+struct data {
+  double x;
+  double y;
+  long long cond;
+  double res;
+};
+
+void __RTL (startwith ("early_ra")) foo (struct data *ptr)
+{
+  (function "foo"
+    (param "ptr"
+      (DECL_RTL (reg/v:DI <0> [ ptr ]))
+      (DECL_RTL_INCOMING (reg/v:DI x0 [ ptr ]))
+    ) ;; param "ptr"
+    (insn-chain
+      (block 2
+	(edge-from entry (flags "FALLTHRU"))
+	(cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+	(insn 4 (set (reg:DI <0>) (reg:DI x0)))
+	(insn 5 (set (reg:DF <1>) (mem:DF (reg:DI <0>) [1 S8 A8])))
+        (insn 6 (set (reg:DF <2>) (mem:DF (plus:DI (reg:DI <0>)
+						   (const_int 8)) [1 S8 A8])))
+        (insn 7 (set (reg:DI <3>) (mem:DI (plus:DI (reg:DI <0>)
+						   (const_int 16)) [1 S8 A8])))
+        (insn 8 (set (reg:CC cc) (compare:CC (reg:DI <3>) (const_int 0))))
+        (insn 9 (set (reg:DF <4>) (reg:DF <2>)))
+	(insn 10 (set (reg:DF <5>) (plus:DF (reg:DF <1>) (reg:DF <2>))))
+	(insn 11 (set (reg:DF <2>) (if_then_else:DF
+				     (ge (reg:CC cc) (const_int 0))
+				     (reg:DF <4>)
+				     (reg:DF <5>))))
+        (insn 12 (set (mem:DF (plus:DI (reg:DI <0>)
+				       (const_int 24)) [1 S8 A8])
+		      (reg:DF <2>)))
+	(edge-to exit (flags "FALLTHRU"))
+      ) ;; block 2
+    ) ;; insn-chain
+  ) ;; function
+}
+
+int
+main (void)
+{
+  struct data d1 = { 1, 2, -1, 0 };
+  struct data d2 = { 3, 4, 1, 0 };
+  foo (&d1);
+  foo (&d2);
+  if (d1.res != 3 || d2.res != 4)
+    __builtin_abort ();
+}