[2/3] RISC-V: optimize stack manipulation in save-restore
Commit Message
The stack that save-restore reserves is not well accumulated in stack allocation and deallocation.
This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled.
before patch:
bar:
call t0,__riscv_save_4
addi sp,sp,-64
...
li t0,-12288
addi t0,t0,-1968 # optimized out after patch
add sp,sp,t0 # prologue
...
li t0,12288 # epilogue
addi t0,t0,2000 # optimized out after patch
add sp,sp,t0
...
addi sp,sp,32
tail __riscv_restore_4
after patch:
bar:
call t0,__riscv_save_4
addi sp,sp,-2032
...
li t0,-12288
add sp,sp,t0 # prologue
...
li t0,12288 # epilogue
add sp,sp,t0
...
addi sp,sp,2032
tail __riscv_restore_4
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_expand_prologue): consider save-restore in stack allocation.
(riscv_expand_epilogue): consider save-restore in stack deallocation.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/stack_save_restore.c: New test.
---
gcc/config/riscv/riscv.cc | 50 ++++++++++---------
.../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++++
2 files changed, 66 insertions(+), 24 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c
Comments
Gentle ping.
The patch I previously submitted:
| Date: Wed, 30 Nov 2022 00:38:08 -0800
| Subject: [PATCH] RISC-V: optimize stack manipulation in save-restore
| Message-ID: <gaofei@eswincomputing.com>
I split the patches as per Palmer's review comment.
BR
Fei
>The stack that save-restore reserves is not well accumulated in stack allocation and deallocation.
>This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled.
>
>before patch:
> bar:
> call t0,__riscv_save_4
> addi sp,sp,-64
> ...
> li t0,-12288
> addi t0,t0,-1968 # optimized out after patch
> add sp,sp,t0 # prologue
> ...
> li t0,12288 # epilogue
> addi t0,t0,2000 # optimized out after patch
> add sp,sp,t0
> ...
> addi sp,sp,32
> tail __riscv_restore_4
>
>after patch:
> bar:
> call t0,__riscv_save_4
> addi sp,sp,-2032
> ...
> li t0,-12288
> add sp,sp,t0 # prologue
> ...
> li t0,12288 # epilogue
> add sp,sp,t0
> ...
> addi sp,sp,2032
> tail __riscv_restore_4
>
>gcc/ChangeLog:
>
> * config/riscv/riscv.cc (riscv_expand_prologue): consider save-restore in stack allocation.
> (riscv_expand_epilogue): consider save-restore in stack deallocation.
>
>gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/stack_save_restore.c: New test.
>---
> gcc/config/riscv/riscv.cc | 50 ++++++++++---------
> .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++++
> 2 files changed, 66 insertions(+), 24 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c
>
>diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
>index f0bbcd6d6be..a50f2303032 100644
>--- a/gcc/config/riscv/riscv.cc
>+++ b/gcc/config/riscv/riscv.cc
>@@ -5010,12 +5010,12 @@ void
> riscv_expand_prologue (void)
> {
> struct riscv_frame_info *frame = &cfun->machine->frame;
>- poly_int64 size = frame->total_size;
>+ poly_int64 remaining_size = frame->total_size;
> unsigned mask = frame->mask;
> rtx insn;
>
> if (flag_stack_usage_info)
>- current_function_static_stack_size = constant_lower_bound (size);
>+ current_function_static_stack_size = constant_lower_bound (remaining_size);
>
> if (cfun->machine->naked_p)
> return;
>@@ -5026,7 +5026,7 @@ riscv_expand_prologue (void)
> rtx dwarf = NULL_RTX;
> dwarf = riscv_adjust_libcall_cfi_prologue ();
>
>- size -= frame->save_libcall_adjustment;
>+ remaining_size -= frame->save_libcall_adjustment;
> insn = emit_insn (riscv_gen_gpr_save_insn (frame));
> frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
>
>@@ -5037,16 +5037,14 @@ riscv_expand_prologue (void)
> /* Save the registers. */
> if ((frame->mask | frame->fmask) != 0)
> {
>- HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
>- if (size.is_constant ())
>- step1 = MIN (size.to_constant(), step1);
>+ HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
>
> insn = gen_add3_insn (stack_pointer_rtx,
> stack_pointer_rtx,
> GEN_INT (-step1));
> RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
>- size -= step1;
>- riscv_for_each_saved_reg (size, riscv_save_reg, false, false);
>+ remaining_size -= step1;
>+ riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
> }
>
> frame->mask = mask; /* Undo the above fib. */
>@@ -5055,29 +5053,29 @@ riscv_expand_prologue (void)
> if (frame_pointer_needed)
> {
> insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
>- GEN_INT ((frame->hard_frame_pointer_offset - size).to_constant ()));
>+ GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ()));
> RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
>
> riscv_emit_stack_tie ();
> }
>
> /* Allocate the rest of the frame. */
>- if (known_gt (size, 0))
>+ if (known_gt (remaining_size, 0))
> {
> /* Two step adjustment:
> 1.scalable frame. 2.constant frame. */
> poly_int64 scalable_frame (0, 0);
>- if (!size.is_constant ())
>+ if (!remaining_size.is_constant ())
> {
> /* First for scalable frame. */
>- poly_int64 scalable_frame = size;
>- scalable_frame.coeffs[0] = size.coeffs[1];
>+ poly_int64 scalable_frame = remaining_size;
>+ scalable_frame.coeffs[0] = remaining_size.coeffs[1];
> riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
>- size -= scalable_frame;
>+ remaining_size -= scalable_frame;
> }
>
> /* Second step for constant frame. */
>- HOST_WIDE_INT constant_frame = size.to_constant ();
>+ HOST_WIDE_INT constant_frame = remaining_size.to_constant ();
> if (constant_frame == 0)
> return;
>
>@@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style)
> HOST_WIDE_INT step2 = 0;
> bool use_restore_libcall = ((style == NORMAL_RETURN)
> && riscv_use_save_libcall (frame));
>+ unsigned libcall_size = use_restore_libcall ?
>+ frame->save_libcall_adjustment : 0;
> rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
> rtx insn;
>
>@@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style)
> REG_NOTES (insn) = dwarf;
> }
>
>+ if (use_restore_libcall)
>+ frame->mask = 0; /* Temporarily fib for GPRs. */
>+
> /* If we need to restore registers, deallocate as much stack as
> possible in the second step without going out of range. */
> if ((frame->mask | frame->fmask) != 0)
>- {
>- step2 = riscv_first_stack_step (frame, frame->total_size);
>- step1 -= step2;
>- }
>+ step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
>+
>+ if (use_restore_libcall)
>+ frame->mask = mask; /* Undo the above fib. */
>+
>+ step1 -= step2 + libcall_size;
>
> /* Set TARGET to BASE + STEP1. */
> if (known_gt (step1, 0))
>@@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style)
> frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
>
> /* Restore the registers. */
>- riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg,
>+ riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
>+ riscv_restore_reg,
> true, style == EXCEPTION_RETURN);
>
> if (use_restore_libcall)
>- {
> frame->mask = mask; /* Undo the above fib. */
>- gcc_assert (step2 >= frame->save_libcall_adjustment);
>- step2 -= frame->save_libcall_adjustment;
>- }
>
> if (need_barrier_p)
> riscv_emit_stack_tie ();
>diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
>new file mode 100644
>index 00000000000..522e706cfbf
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c
>@@ -0,0 +1,40 @@
>+/* { dg-do compile } */
>+/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops -fno-lto" } */
>+/* { dg-final { check-function-bodies "**" "" } } */
>+
>+char my_getchar();
>+float getf();
>+
>+/*
>+**bar:
>+** call t0,__riscv_save_4
>+** addi sp,sp,-2032
>+** ...
>+** li t0,-12288
>+** add sp,sp,t0
>+** ...
>+** li t0,12288
>+** add sp,sp,t0
>+** ...
>+** addi sp,sp,2032
>+** tail __riscv_restore_4
>+*/
>+int bar()
>+{
>+ float volatile farray[3568];
>+
>+ float sum = 0;
>+ float f1 = getf();
>+ float f2 = getf();
>+ float f3 = getf();
>+ float f4 = getf();
>+
>+ for (int i = 0; i < 3568; i++)
>+ {
>+ farray[i] = my_getchar() * 1.2;
>+ sum += farray[i];
>+ }
>+
>+ return sum + f1 + f2 + f3 + f4;
>+}
>+
>--
>2.17.1
On 12/1/22 03:03, Fei Gao wrote:
> The stack that save-restore reserves is not well accumulated in stack allocation and deallocation.
> This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled.
Haha! I should have read the whole series before commenting on the
first patch. I think this addresses the precise issue I was asking
about in my prior message.
Jeff
On 12/1/22 03:03, Fei Gao wrote:
> The stack that save-restore reserves is not well accumulated in stack allocation and deallocation.
> This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled.
>
> before patch:
> bar:
> call t0,__riscv_save_4
> addi sp,sp,-64
> ...
> li t0,-12288
> addi t0,t0,-1968 # optimized out after patch
> add sp,sp,t0 # prologue
> ...
> li t0,12288 # epilogue
> addi t0,t0,2000 # optimized out after patch
> add sp,sp,t0
> ...
> addi sp,sp,32
> tail __riscv_restore_4
>
> after patch:
> bar:
> call t0,__riscv_save_4
> addi sp,sp,-2032
> ...
> li t0,-12288
> add sp,sp,t0 # prologue
> ...
> li t0,12288 # epilogue
> add sp,sp,t0
> ...
> addi sp,sp,2032
> tail __riscv_restore_4
>
> gcc/ChangeLog:
>
> * config/riscv/riscv.cc (riscv_expand_prologue): consider save-restore in stack allocation.
> (riscv_expand_epilogue): consider save-restore in stack deallocation.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/stack_save_restore.c: New test.
I made a couple of whitespace fixes and pushed this to the trunk after
running it through a cross testing cycle.
Thanks!
jeff
@@ -5010,12 +5010,12 @@ void
riscv_expand_prologue (void)
{
struct riscv_frame_info *frame = &cfun->machine->frame;
- poly_int64 size = frame->total_size;
+ poly_int64 remaining_size = frame->total_size;
unsigned mask = frame->mask;
rtx insn;
if (flag_stack_usage_info)
- current_function_static_stack_size = constant_lower_bound (size);
+ current_function_static_stack_size = constant_lower_bound (remaining_size);
if (cfun->machine->naked_p)
return;
@@ -5026,7 +5026,7 @@ riscv_expand_prologue (void)
rtx dwarf = NULL_RTX;
dwarf = riscv_adjust_libcall_cfi_prologue ();
- size -= frame->save_libcall_adjustment;
+ remaining_size -= frame->save_libcall_adjustment;
insn = emit_insn (riscv_gen_gpr_save_insn (frame));
frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
@@ -5037,16 +5037,14 @@ riscv_expand_prologue (void)
/* Save the registers. */
if ((frame->mask | frame->fmask) != 0)
{
- HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
- if (size.is_constant ())
- step1 = MIN (size.to_constant(), step1);
+ HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size);
insn = gen_add3_insn (stack_pointer_rtx,
stack_pointer_rtx,
GEN_INT (-step1));
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
- size -= step1;
- riscv_for_each_saved_reg (size, riscv_save_reg, false, false);
+ remaining_size -= step1;
+ riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
}
frame->mask = mask; /* Undo the above fib. */
@@ -5055,29 +5053,29 @@ riscv_expand_prologue (void)
if (frame_pointer_needed)
{
insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
- GEN_INT ((frame->hard_frame_pointer_offset - size).to_constant ()));
+ GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ()));
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
riscv_emit_stack_tie ();
}
/* Allocate the rest of the frame. */
- if (known_gt (size, 0))
+ if (known_gt (remaining_size, 0))
{
/* Two step adjustment:
1.scalable frame. 2.constant frame. */
poly_int64 scalable_frame (0, 0);
- if (!size.is_constant ())
+ if (!remaining_size.is_constant ())
{
/* First for scalable frame. */
- poly_int64 scalable_frame = size;
- scalable_frame.coeffs[0] = size.coeffs[1];
+ poly_int64 scalable_frame = remaining_size;
+ scalable_frame.coeffs[0] = remaining_size.coeffs[1];
riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false);
- size -= scalable_frame;
+ remaining_size -= scalable_frame;
}
/* Second step for constant frame. */
- HOST_WIDE_INT constant_frame = size.to_constant ();
+ HOST_WIDE_INT constant_frame = remaining_size.to_constant ();
if (constant_frame == 0)
return;
@@ -5142,6 +5140,8 @@ riscv_expand_epilogue (int style)
HOST_WIDE_INT step2 = 0;
bool use_restore_libcall = ((style == NORMAL_RETURN)
&& riscv_use_save_libcall (frame));
+ unsigned libcall_size = use_restore_libcall ?
+ frame->save_libcall_adjustment : 0;
rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
rtx insn;
@@ -5212,13 +5212,18 @@ riscv_expand_epilogue (int style)
REG_NOTES (insn) = dwarf;
}
+ if (use_restore_libcall)
+ frame->mask = 0; /* Temporarily fib for GPRs. */
+
/* If we need to restore registers, deallocate as much stack as
possible in the second step without going out of range. */
if ((frame->mask | frame->fmask) != 0)
- {
- step2 = riscv_first_stack_step (frame, frame->total_size);
- step1 -= step2;
- }
+ step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size);
+
+ if (use_restore_libcall)
+ frame->mask = mask; /* Undo the above fib. */
+
+ step1 -= step2 + libcall_size;
/* Set TARGET to BASE + STEP1. */
if (known_gt (step1, 0))
@@ -5272,15 +5277,12 @@ riscv_expand_epilogue (int style)
frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
/* Restore the registers. */
- riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg,
+ riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size,
+ riscv_restore_reg,
true, style == EXCEPTION_RETURN);
if (use_restore_libcall)
- {
frame->mask = mask; /* Undo the above fib. */
- gcc_assert (step2 >= frame->save_libcall_adjustment);
- step2 -= frame->save_libcall_adjustment;
- }
if (need_barrier_p)
riscv_emit_stack_tie ();
new file mode 100644
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops -fno-lto" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+char my_getchar();
+float getf();
+
+/*
+**bar:
+** call t0,__riscv_save_4
+** addi sp,sp,-2032
+** ...
+** li t0,-12288
+** add sp,sp,t0
+** ...
+** li t0,12288
+** add sp,sp,t0
+** ...
+** addi sp,sp,2032
+** tail __riscv_restore_4
+*/
+int bar()
+{
+ float volatile farray[3568];
+
+ float sum = 0;
+ float f1 = getf();
+ float f2 = getf();
+ float f3 = getf();
+ float f4 = getf();
+
+ for (int i = 0; i < 3568; i++)
+ {
+ farray[i] = my_getchar() * 1.2;
+ sum += farray[i];
+ }
+
+ return sum + f1 + f2 + f3 + f4;
+}
+