Under the LA architecture, when the stack is dropped too far, the process
of dropping the stack is divided into two steps.
step1: After dropping the stack, save callee saved registers on the stack.
step2: The rest of it.
The stack drop operation is optimized when frame->total_size minus
frame->sp_fp_offset is an integer multiple of 4096, can reduce the number
of instructions required to drop the stack. However, this optimization is
not effective because of the original calculation method
The following case:
int main()
{
char buf[1024 * 12];
printf ("%p\n", buf);
return 0;
}
As you can see from the generated assembler, the old GCC has two more
instructions than the new GCC, lines 14 and line 24.
new old
10 main: │ 11 main:
11 addi.d $r3,$r3,-16 │ 12 lu12i.w $r13,-12288>>12
12 lu12i.w $r13,-12288>>12 │ 13 addi.d $r3,$r3,-2032
13 lu12i.w $r5,-12288>>12 │ 14 ori $r13,$r13,2016
14 lu12i.w $r12,12288>>12 │ 15 lu12i.w $r5,-12288>>12
15 st.d $r1,$r3,8 │ 16 lu12i.w $r12,12288>>12
16 add.d $r12,$r12,$r5 │ 17 st.d $r1,$r3,2024
17 add.d $r3,$r3,$r13 │ 18 add.d $r12,$r12,$r5
18 add.d $r5,$r12,$r3 │ 19 add.d $r3,$r3,$r13
19 la.local $r4,.LC0 │ 20 add.d $r5,$r12,$r3
20 bl %plt(printf) │ 21 la.local $r4,.LC0
21 lu12i.w $r13,12288>>12 │ 22 bl %plt(printf)
22 add.d $r3,$r3,$r13 │ 23 lu12i.w $r13,8192>>12
23 ld.d $r1,$r3,8 │ 24 ori $r13,$r13,2080
24 or $r4,$r0,$r0 │ 25 add.d $r3,$r3,$r13
25 addi.d $r3,$r3,16 │ 26 ld.d $r1,$r3,2024
26 jr $r1 │ 27 or $r4,$r0,$r0
│ 28 addi.d $r3,$r3,2032
│ 29 jr $r1
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_compute_frame_info):
Modify fp_sp_offset and gp_sp_offset's calculation method,
when frame->mask or frame->fmask is zero, don't minus UNITS_PER_WORD
or UNITS_PER_FP_REG.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/prolog-opt.c: New test.
---
gcc/config/loongarch/loongarch.cc | 12 ++++++--
.../gcc.target/loongarch/prolog-opt.c | 29 +++++++++++++++++++
2 files changed, 38 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/prolog-opt.c
@@ -917,8 +917,12 @@ loongarch_compute_frame_info (void)
frame->frame_pointer_offset = offset;
/* Next are the callee-saved FPRs. */
if (frame->fmask)
- offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
- frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
+ {
+ offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
+ frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
+ }
+ else
+ frame->fp_sp_offset = offset;
/* Next are the callee-saved GPRs. */
if (frame->mask)
{
@@ -931,8 +935,10 @@ loongarch_compute_frame_info (void)
frame->save_libcall_adjustment = x_save_size;
offset += x_save_size;
+ frame->gp_sp_offset = offset - UNITS_PER_WORD;
}
- frame->gp_sp_offset = offset - UNITS_PER_WORD;
+ else
+ frame->gp_sp_offset = offset;
/* The hard frame pointer points above the callee-saved GPRs. */
frame->hard_frame_pointer_offset = offset;
/* Above the hard frame pointer is the callee-allocated varags save area. */
new file mode 100644
@@ -0,0 +1,29 @@
+/* Test that LoongArch backend stack drop operation optimized. */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "addi.d\t\\\$r3,\\\$r3,-16" } } */
+
+struct test
+{
+ int empty1[0];
+ double empty2[0];
+ int : 0;
+ float x;
+ long empty3[0];
+ long : 0;
+ float y;
+ unsigned : 0;
+ char empty4[0];
+};
+
+extern void callee (struct test);
+
+void
+caller (void)
+{
+ struct test test;
+ test.x = 114;
+ test.y = 514;
+ callee (test);
+}