aarch64: Fix normal returns inside functions which use eh_returns [PR114843]

Message ID 20240426170740.3001529-1-quic_apinski@quicinc.com
State Superseded, archived
Headers
Series aarch64: Fix normal returns inside functions which use eh_returns [PR114843] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm fail Testing failed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Testing passed

Commit Message

Andrew Pinski April 26, 2024, 5:07 p.m. UTC
  The problem here is that on a normal return path, we still restore the
eh data return when we should not.
Instead of one return path in the case of eh_return, this changes over
to use multiple returns pathes just like a normal function.
On the normal path (non-eh return), we need to skip restoring of the eh
return data registers.

This fixes the code generation of _Unwind_RaiseException where the return value
was currupted.

Note this adds some testcases which might fail on some targets.
I know of the following targets will fail also:
arm is recorded as PR 114847.
powerpc is recorded as PR 114846.

Build and tested for aarch64-linux-gnu with no regressions.

	PR target/114843

gcc/ChangeLog:

	* config/aarch64/aarch64-protos.h (aarch64_expand_epilogue): New
	prototype.
	* config/aarch64/aarch64.cc (aarch64_pop_regs): Skip over the
	eh return data if not eh return. Generate an add if both registers
	was skipped and return true. Return false if we generated a pop.
	(aarch64_restore_callee_saves): Skip over the eh return data regs
	if not eh return.
	(aarch64_expand_epilogue): New function, pass false.
	(aarch64_expand_epilogue): Add was_eh_return argument.
	Update calls to aarch64_restore_callee_saves and aarch64_pop_regs.
	If aarch64_pop_regs return true, make sure we add the CFA note.
	For eh_returns, update the sp and do an indirect jump.
	Don't check EH_RETURN_TAKEN_RTX any more.
	* config/aarch64/aarch64.h (EH_RETURN_TAKEN_RTX): Delete.
	* config/aarch64/aarch64.md (eh_return): New define_expand.
	(eh_return_internal): New pattern for eh_returns.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/eh_return-3.c: Update testcase.
	* gcc.c-torture/execute/eh_return-1.c: New test.
	* gcc.c-torture/execute/eh_return-2.c: New test.
	* gcc.c-torture/execute/eh_return-3.c: New test.
	* gcc.c-torture/execute/eh_return-4.c: New test.
	* gcc.target/aarch64/eh_return-4.c: New test.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/config/aarch64/aarch64-protos.h           |  1 +
 gcc/config/aarch64/aarch64.cc                 | 69 ++++++++++++-------
 gcc/config/aarch64/aarch64.h                  |  4 +-
 gcc/config/aarch64/aarch64.md                 | 24 +++++++
 .../gcc.c-torture/execute/eh_return-1.c       | 20 ++++++
 .../gcc.c-torture/execute/eh_return-2.c       | 23 +++++++
 .../gcc.c-torture/execute/eh_return-3.c       | 25 +++++++
 .../gcc.c-torture/execute/eh_return-4.c       | 25 +++++++
 .../gcc.target/aarch64/eh_return-3.c          | 11 ++-
 .../gcc.target/aarch64/eh_return-4.c          | 30 ++++++++
 10 files changed, 198 insertions(+), 34 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-1.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-2.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-3.c
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/eh_return-4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/eh_return-4.c
  

Patch

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 42639e9efcf..efe86d52873 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -904,6 +904,7 @@  const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
 const char * aarch64_output_probe_stack_range (rtx, rtx);
 const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx);
 void aarch64_err_no_fpadvsimd (machine_mode);
+void aarch64_expand_epilogue (rtx_call_insn *, bool);
 void aarch64_expand_epilogue (rtx_call_insn *);
 rtx aarch64_ptrue_all (unsigned int);
 opt_machine_mode aarch64_ptrue_all_mode (rtx);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 1beec94629d..99dc6f7f1c0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8270,10 +8270,28 @@  aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
    afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
    into CFI_OPS.  */
 
-static void
+static bool
 aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
-		  rtx *cfi_ops)
+		  rtx *cfi_ops, bool was_eh_return)
 {
+  /* Skip over eh return data if not eh_return. */
+  if (!was_eh_return
+      && EH_RETURN_DATA_REGNO (regno1) != INVALID_REGNUM
+      && (!df_regs_ever_live_p (regno1)
+	  || crtl->abi->clobbers_full_reg_p (regno1)))
+    {
+      if (regno2 == INVALID_REGNUM
+	  || (EH_RETURN_DATA_REGNO (regno2) != INVALID_REGNUM
+	      && (!df_regs_ever_live_p (regno2)
+		  || crtl->abi->clobbers_full_reg_p (regno2))))
+	{
+	  rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
+	  emit_move_insn (stack_pointer_rtx, mem);
+	  return true;
+	}
+      regno1 = regno2;
+      regno2 = INVALID_REGNUM;
+    }
   machine_mode mode = aarch64_reg_save_mode (regno1);
   rtx reg1 = gen_rtx_REG (mode, regno1);
 
@@ -8292,6 +8310,7 @@  aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
       emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
 					  reg2, adjustment));
     }
+  return true;
 }
 
 /* Given an ldp/stp register operand mode MODE, return a suitable mode to use
@@ -8658,7 +8677,8 @@  aarch64_save_callee_saves (poly_int64 bytes_below_sp,
 
 static void
 aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
-			      array_slice<unsigned int> regs, rtx *cfi_ops)
+			      array_slice<unsigned int> regs, rtx *cfi_ops,
+			      bool was_eh_return = false)
 {
   aarch64_frame &frame = cfun->machine->frame;
   poly_int64 offset;
@@ -8677,6 +8697,14 @@  aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
       if (frame.is_scs_enabled && regno == LR_REGNUM)
 	return true;
 
+      /* Skip the eh return data registers if we are
+	 returning normally rather than via eh_return. */
+      if (!was_eh_return
+	  && EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM
+	  && (!df_regs_ever_live_p (regno)
+	      || crtl->abi->clobbers_full_reg_p (regno)))
+	return true;
+
       return false;
     };
 
@@ -9801,6 +9829,11 @@  aarch64_use_return_insn_p (void)
 
   return known_eq (cfun->machine->frame.frame_size, 0);
 }
+void
+aarch64_expand_epilogue (rtx_call_insn *sibcall)
+{
+  aarch64_expand_epilogue (sibcall, false);
+}
 
 /* Generate the epilogue instructions for returning from a function.
    This is almost exactly the reverse of the prolog sequence, except
@@ -9808,7 +9841,7 @@  aarch64_use_return_insn_p (void)
    from a deallocated stack, and we optimize the unwind records by
    emitting them all together if possible.  */
 void
-aarch64_expand_epilogue (rtx_call_insn *sibcall)
+aarch64_expand_epilogue (rtx_call_insn *sibcall, bool was_eh_return)
 {
   aarch64_frame &frame = cfun->machine->frame;
   poly_int64 initial_adjust = frame.initial_adjust;
@@ -9915,19 +9948,21 @@  aarch64_expand_epilogue (rtx_call_insn *sibcall)
      restore x30, we don't need to restore x30 again in the traditional
      way.  */
   aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
-				frame.saved_gprs, &cfi_ops);
+				frame.saved_gprs, &cfi_ops, was_eh_return);
 
   if (need_barrier_p)
     aarch64_emit_stack_tie (stack_pointer_rtx);
 
+  bool has_adjustment = false;
   if (callee_adjust != 0)
-    aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
+    has_adjustment = aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops, was_eh_return);
 
   /* If we have no register restore information, the CFA must have been
      defined in terms of the stack pointer since the end of the prologue.  */
   gcc_assert (cfi_ops || !frame_pointer_needed);
 
-  if (cfi_ops && (callee_adjust != 0 || maybe_gt (initial_adjust, 65536)))
+  if ((has_adjustment || cfi_ops)
+      && (callee_adjust != 0 || maybe_gt (initial_adjust, 65536)))
     {
       /* Emit delayed restores and set the CFA to be SP + initial_adjust.  */
       insn = get_last_insn ();
@@ -9964,26 +9999,12 @@  aarch64_expand_epilogue (rtx_call_insn *sibcall)
     }
 
   /* Stack adjustment for exception handler.  */
-  if (crtl->calls_eh_return && !sibcall)
-    {
-      /* If the EH_RETURN_TAKEN_RTX flag is set then we need
-	 to unwind the stack and jump to the handler, otherwise
-	 skip this eh_return logic and continue with normal
-	 return after the label.  We have already reset the CFA
-	 to be SP; letting the CFA move during this adjustment
-	 is just as correct as retaining the CFA from the body
-	 of the function.  Therefore, do nothing special.  */
-      rtx_code_label *label = gen_label_rtx ();
-      rtx x = aarch64_gen_compare_zero_and_branch (EQ, EH_RETURN_TAKEN_RTX,
-						   label);
-      rtx jump = emit_jump_insn (x);
-      JUMP_LABEL (jump) = label;
-      LABEL_NUSES (label)++;
+  if (was_eh_return && !sibcall)
+    {
       emit_insn (gen_add2_insn (stack_pointer_rtx,
 				EH_RETURN_STACKADJ_RTX));
       emit_jump_insn (gen_indirect_jump (EH_RETURN_HANDLER_RTX));
-      emit_barrier ();
-      emit_label (label);
+      return;
     }
 
   /* We prefer to emit the combined return/authenticate instruction RETAA,
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 4fa1dfc7906..83a1c6e5fc1 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -723,10 +723,8 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
 /* Output assembly strings after .cfi_startproc is emitted.  */
 #define ASM_POST_CFI_STARTPROC  aarch64_post_cfi_startproc
 
-/* For EH returns X4 is a flag that is set in the EH return
-   code paths and then X5 and X6 contain the stack adjustment
+/* For EH returns X5 and X6 contain the stack adjustment
    and return address respectively.  */
-#define EH_RETURN_TAKEN_RTX	gen_rtx_REG (Pmode, R4_REGNUM)
 #define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, R5_REGNUM)
 #define EH_RETURN_HANDLER_RTX	gen_rtx_REG (Pmode, R6_REGNUM)
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 046a249475d..a6cedd0f1b8 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1058,6 +1058,30 @@  (define_insn "simple_return"
    (set_attr "sls_length" "retbr")]
 )
 
+;; This is used in compiling the unwind routines.
+(define_expand "eh_return"
+  [(use (match_operand 0 "general_operand"))]
+  ""
+{
+  emit_move_insn (EH_RETURN_HANDLER_RTX, operands[0]);
+
+  emit_jump_insn (gen_eh_return_internal ());
+  emit_barrier ();
+  DONE;
+})
+
+(define_insn_and_split "eh_return_internal"
+  [(eh_return)]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  aarch64_expand_epilogue (nullptr, true);
+  DONE;
+})
+
+
 (define_insn "aarch64_cb<optab><mode>1"
   [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
 				(const_int 0))
diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-1.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-1.c
new file mode 100644
index 00000000000..3928a58d841
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-1.c
@@ -0,0 +1,20 @@ 
+/* { dg-do run }  */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered. */
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+  if (*a == 5)
+    return 5;
+  __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+  int t = 5;
+  if (f(&t, 0, 0) != 5)
+    __builtin_abort();
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-2.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-2.c
new file mode 100644
index 00000000000..41fb48c99f9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-2.c
@@ -0,0 +1,23 @@ 
+/* { dg-do run }  */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered, also use alloca. */
+__attribute__((noipa))
+void g(void*) {}
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+  g(__builtin_alloca(offset));
+  if (*a == 5)
+    return 5;
+  __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+  int t = 5;
+  if (f(&t, 67, 0) != 5)
+    __builtin_abort();
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-3.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-3.c
new file mode 100644
index 00000000000..dfd73d35a43
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-3.c
@@ -0,0 +1,25 @@ 
+/* { dg-do run }  */
+/* { dg-require-effective-target builtin_eh_return } */
+
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered, also use large stack. */
+__attribute__((noipa))
+void g(void *a) {}
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+  int h[10245];
+  g(h);
+  if (*a == 5)
+    return 5;
+  __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+  int t = 5;
+  if (f(&t, 67, 0) != 5)
+    __builtin_abort();
+}
+
diff --git a/gcc/testsuite/gcc.c-torture/execute/eh_return-4.c b/gcc/testsuite/gcc.c-torture/execute/eh_return-4.c
new file mode 100644
index 00000000000..2f1126ba7c5
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/eh_return-4.c
@@ -0,0 +1,25 @@ 
+/* { dg-do run }  */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered, using sibcall. */
+__attribute__((noipa))
+int g()
+{
+   return 5;
+}
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+  if (*a == 5)
+    return g();
+  __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+  int t = 5;
+  if (f(&t, 67, 0) != 5)
+    __builtin_abort();
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return-3.c b/gcc/testsuite/gcc.target/aarch64/eh_return-3.c
index d180fa7c455..16fcf41ebee 100644
--- a/gcc/testsuite/gcc.target/aarch64/eh_return-3.c
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return-3.c
@@ -7,11 +7,7 @@ 
 **	hint	25 // paciasp
 **	...
 **	cbz	w2, .*
-**	mov	x4, 0
-**	...
-**	cbz	x4, .*
-**	add	sp, sp, x5
-**	br	x6
+**	add	sp, sp, 32
 ** (
 **	hint	29 // autiasp
 **	ret
@@ -19,9 +15,10 @@ 
 **	retaa
 ** )
 **	mov	x5, x0
-**	mov	x4, 1
 **	mov	x6, x1
-**	b	.*
+**	...
+**	add	sp, sp, x5
+**	br	x6
 */
 void
 foo (unsigned long off, void *handler, int c)
diff --git a/gcc/testsuite/gcc.target/aarch64/eh_return-4.c b/gcc/testsuite/gcc.target/aarch64/eh_return-4.c
new file mode 100644
index 00000000000..4267d123d5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/eh_return-4.c
@@ -0,0 +1,30 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=pac-ret+leaf -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+**foo:
+**	hint	25 // paciasp
+**	...
+**	mov	x5, x0
+**	cbz	w2, .*
+**	mov	w0, 5
+**	add	sp, sp, 32
+** (
+**	hint	29 // autiasp
+**	ret
+** |
+**	retaa
+** )
+**	mov	x6, x1
+**	...
+**	add	sp, sp, x5
+**	br	x6
+*/
+int
+foo (unsigned long off, void *handler, int c)
+{
+  if (c)
+    return 5;
+  __builtin_eh_return (off, handler);
+}