@@ -904,6 +904,7 @@ const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
const char * aarch64_output_probe_stack_range (rtx, rtx);
const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx);
void aarch64_err_no_fpadvsimd (machine_mode);
+void aarch64_expand_epilogue (rtx_call_insn *, bool);
void aarch64_expand_epilogue (rtx_call_insn *);
rtx aarch64_ptrue_all (unsigned int);
opt_machine_mode aarch64_ptrue_all_mode (rtx);
@@ -8270,10 +8270,28 @@ aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes
into CFI_OPS. */
-static void
+static bool
aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
- rtx *cfi_ops)
+ rtx *cfi_ops, bool was_eh_return)
{
+ /* Skip over eh return data if not eh_return. */
+ if (!was_eh_return
+ && EH_RETURN_DATA_REGNO (regno1) != INVALID_REGNUM
+ && (!df_regs_ever_live_p (regno1)
+ || crtl->abi->clobbers_full_reg_p (regno1)))
+ {
+ if (regno2 == INVALID_REGNUM
+ || (EH_RETURN_DATA_REGNO (regno2) != INVALID_REGNUM
+ && (!df_regs_ever_live_p (regno2)
+ || crtl->abi->clobbers_full_reg_p (regno2))))
+ {
+ rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment);
+ emit_move_insn (stack_pointer_rtx, mem);
+ return true;
+ }
+ regno1 = regno2;
+ regno2 = INVALID_REGNUM;
+ }
machine_mode mode = aarch64_reg_save_mode (regno1);
rtx reg1 = gen_rtx_REG (mode, regno1);
@@ -8292,6 +8310,7 @@ aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment,
emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
reg2, adjustment));
}
+ return true;
}
/* Given an ldp/stp register operand mode MODE, return a suitable mode to use
@@ -8658,7 +8677,8 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
static void
aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
- array_slice<unsigned int> regs, rtx *cfi_ops)
+ array_slice<unsigned int> regs, rtx *cfi_ops,
+ bool was_eh_return = false)
{
aarch64_frame &frame = cfun->machine->frame;
poly_int64 offset;
@@ -8677,6 +8697,14 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
if (frame.is_scs_enabled && regno == LR_REGNUM)
return true;
+ /* Skip the eh return data registers if we are
+ returning normally rather than via eh_return. */
+ if (!was_eh_return
+ && EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM
+ && (!df_regs_ever_live_p (regno)
+ || crtl->abi->clobbers_full_reg_p (regno)))
+ return true;
+
return false;
};
@@ -9801,6 +9829,11 @@ aarch64_use_return_insn_p (void)
return known_eq (cfun->machine->frame.frame_size, 0);
}
+void
+aarch64_expand_epilogue (rtx_call_insn *sibcall)
+{
+ aarch64_expand_epilogue (sibcall, false);
+}
/* Generate the epilogue instructions for returning from a function.
This is almost exactly the reverse of the prolog sequence, except
@@ -9808,7 +9841,7 @@ aarch64_use_return_insn_p (void)
from a deallocated stack, and we optimize the unwind records by
emitting them all together if possible. */
void
-aarch64_expand_epilogue (rtx_call_insn *sibcall)
+aarch64_expand_epilogue (rtx_call_insn *sibcall, bool was_eh_return)
{
aarch64_frame &frame = cfun->machine->frame;
poly_int64 initial_adjust = frame.initial_adjust;
@@ -9915,19 +9948,21 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
restore x30, we don't need to restore x30 again in the traditional
way. */
aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
- frame.saved_gprs, &cfi_ops);
+ frame.saved_gprs, &cfi_ops, was_eh_return);
if (need_barrier_p)
aarch64_emit_stack_tie (stack_pointer_rtx);
+ bool has_adjustment = false;
if (callee_adjust != 0)
- aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
+ has_adjustment = aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops, was_eh_return);
/* If we have no register restore information, the CFA must have been
defined in terms of the stack pointer since the end of the prologue. */
gcc_assert (cfi_ops || !frame_pointer_needed);
- if (cfi_ops && (callee_adjust != 0 || maybe_gt (initial_adjust, 65536)))
+ if ((has_adjustment || cfi_ops)
+ && (callee_adjust != 0 || maybe_gt (initial_adjust, 65536)))
{
/* Emit delayed restores and set the CFA to be SP + initial_adjust. */
insn = get_last_insn ();
@@ -9964,26 +9999,12 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
}
/* Stack adjustment for exception handler. */
- if (crtl->calls_eh_return && !sibcall)
- {
- /* If the EH_RETURN_TAKEN_RTX flag is set then we need
- to unwind the stack and jump to the handler, otherwise
- skip this eh_return logic and continue with normal
- return after the label. We have already reset the CFA
- to be SP; letting the CFA move during this adjustment
- is just as correct as retaining the CFA from the body
- of the function. Therefore, do nothing special. */
- rtx_code_label *label = gen_label_rtx ();
- rtx x = aarch64_gen_compare_zero_and_branch (EQ, EH_RETURN_TAKEN_RTX,
- label);
- rtx jump = emit_jump_insn (x);
- JUMP_LABEL (jump) = label;
- LABEL_NUSES (label)++;
+ if (was_eh_return && !sibcall)
+ {
emit_insn (gen_add2_insn (stack_pointer_rtx,
EH_RETURN_STACKADJ_RTX));
emit_jump_insn (gen_indirect_jump (EH_RETURN_HANDLER_RTX));
- emit_barrier ();
- emit_label (label);
+ return;
}
/* We prefer to emit the combined return/authenticate instruction RETAA,
@@ -723,10 +723,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
/* Output assembly strings after .cfi_startproc is emitted. */
#define ASM_POST_CFI_STARTPROC aarch64_post_cfi_startproc
-/* For EH returns X4 is a flag that is set in the EH return
- code paths and then X5 and X6 contain the stack adjustment
+/* For EH returns X5 and X6 contain the stack adjustment
and return address respectively. */
-#define EH_RETURN_TAKEN_RTX gen_rtx_REG (Pmode, R4_REGNUM)
#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, R5_REGNUM)
#define EH_RETURN_HANDLER_RTX gen_rtx_REG (Pmode, R6_REGNUM)
@@ -1058,6 +1058,30 @@ (define_insn "simple_return"
(set_attr "sls_length" "retbr")]
)
+;; This is used in compiling the unwind routines.
+(define_expand "eh_return"
+ [(use (match_operand 0 "general_operand"))]
+ ""
+{
+ emit_move_insn (EH_RETURN_HANDLER_RTX, operands[0]);
+
+ emit_jump_insn (gen_eh_return_internal ());
+ emit_barrier ();
+ DONE;
+})
+
+(define_insn_and_split "eh_return_internal"
+ [(eh_return)]
+ ""
+ "#"
+ "epilogue_completed"
+ [(const_int 0)]
+{
+ aarch64_expand_epilogue (nullptr, true);
+ DONE;
+})
+
+
(define_insn "aarch64_cb<optab><mode>1"
[(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
(const_int 0))
new file mode 100644
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered. */
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+ if (*a == 5)
+ return 5;
+ __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+ int t = 5;
+ if (f(&t, 0, 0) != 5)
+ __builtin_abort();
+}
+
new file mode 100644
@@ -0,0 +1,23 @@
+/* { dg-do run } */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered, also use alloca. */
+__attribute__((noipa))
+void g(void*) {}
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+ g(__builtin_alloca(offset));
+ if (*a == 5)
+ return 5;
+ __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+ int t = 5;
+ if (f(&t, 67, 0) != 5)
+ __builtin_abort();
+}
+
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-require-effective-target builtin_eh_return } */
+
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered, also use large stack. */
+__attribute__((noipa))
+void g(void *a) {}
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+ int h[10245];
+ g(h);
+ if (*a == 5)
+ return 5;
+ __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+ int t = 5;
+ if (f(&t, 67, 0) != 5)
+ __builtin_abort();
+}
+
new file mode 100644
@@ -0,0 +1,25 @@
+/* { dg-do run } */
+/* { dg-require-effective-target builtin_eh_return } */
+
+/* PR target/114843 */
+/* Test to make sure the return value does not get clobbered, using sibcall. */
+__attribute__((noipa))
+int g()
+{
+ return 5;
+}
+__attribute__((noipa))
+int f(int *a, long offset, void *handler)
+{
+ if (*a == 5)
+ return g();
+ __builtin_eh_return (offset, handler);
+}
+
+int main()
+{
+ int t = 5;
+ if (f(&t, 67, 0) != 5)
+ __builtin_abort();
+}
+
@@ -7,11 +7,7 @@
** hint 25 // paciasp
** ...
** cbz w2, .*
-** mov x4, 0
-** ...
-** cbz x4, .*
-** add sp, sp, x5
-** br x6
+** add sp, sp, 32
** (
** hint 29 // autiasp
** ret
@@ -19,9 +15,10 @@
** retaa
** )
** mov x5, x0
-** mov x4, 1
** mov x6, x1
-** b .*
+** ...
+** add sp, sp, x5
+** br x6
*/
void
foo (unsigned long off, void *handler, int c)
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbranch-protection=pac-ret+leaf -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+**foo:
+** hint 25 // paciasp
+** ...
+** mov x5, x0
+** cbz w2, .*
+** mov w0, 5
+** add sp, sp, 32
+** (
+** hint 29 // autiasp
+** ret
+** |
+** retaa
+** )
+** mov x6, x1
+** ...
+** add sp, sp, x5
+** br x6
+*/
+int
+foo (unsigned long off, void *handler, int c)
+{
+ if (c)
+ return 5;
+ __builtin_eh_return (off, handler);
+}