[1/3] RISC-V: add a new parameter in riscv_first_stack_step.

Message ID 20221201100332.22226-2-gaofei@eswincomputing.com
State Deferred, archived
Headers
Series RISC-V: optimize stack manipulation in save-restore |

Commit Message

Fei Gao Dec. 1, 2022, 10:03 a.m. UTC
  frame->total_size to remaining_size conversion is done as an independent patch without
functionality change as per review comment.

gcc/ChangeLog:

        * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size.
        (riscv_compute_frame_info): adapt new riscv_first_stack_step interface.
        (riscv_expand_prologue): likewise.
        (riscv_expand_epilogue): likewise.
---
 gcc/config/riscv/riscv.cc | 48 +++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 24 deletions(-)
  

Comments

Fei Gao Feb. 3, 2023, 8:52 a.m. UTC | #1
Gentle ping.

The patch I previously submitted:
| Date: Wed, 30 Nov 2022 00:38:08 -0800
| Subject: [PATCH] RISC-V: optimize stack manipulation in save-restore
| Message-ID: <gaofei@eswincomputing.com>

I split the patches as per Palmer's review comment.

BR
Fei

>frame->total_size to remaining_size conversion is done as an independent patch without
>functionality change as per review comment.
>
>gcc/ChangeLog:
>
>        * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size.
>        (riscv_compute_frame_info): adapt new riscv_first_stack_step interface.
>        (riscv_expand_prologue): likewise.
>        (riscv_expand_epilogue): likewise.
>---
> gcc/config/riscv/riscv.cc | 48 +++++++++++++++++++--------------------
> 1 file changed, 24 insertions(+), 24 deletions(-)
>
>diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
>index 05bdba5ab4d..f0bbcd6d6be 100644
>--- a/gcc/config/riscv/riscv.cc
>+++ b/gcc/config/riscv/riscv.cc
>@@ -4634,7 +4634,7 @@ riscv_save_libcall_count (unsigned mask)
>    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
>    hard_frame_pointer_rtx unchanged.  */
>
>-static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame);
>+static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
>
> /* Handle stack align for poly_int.  */
> static poly_int64
>@@ -4663,7 +4663,7 @@ riscv_compute_frame_info (void)
>      save/restore t0.  We check for this before clearing the frame struct.  */
>   if (cfun->machine->interrupt_handler_p)
>     {
>-      HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
>+      HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
>       if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
> interrupt_save_prologue_temp = true;
>     }
>@@ -4913,45 +4913,45 @@ riscv_restore_reg (rtx reg, rtx mem)
>    without adding extra instructions.  */
>
> static HOST_WIDE_INT
>-riscv_first_stack_step (struct riscv_frame_info *frame)
>+riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
> {
>-  HOST_WIDE_INT frame_total_constant_size;
>-  if (!frame->total_size.is_constant ())
>-    frame_total_constant_size
>-      = riscv_stack_align (frame->total_size.coeffs[0])
>-	- riscv_stack_align (frame->total_size.coeffs[1]);
>+  HOST_WIDE_INT remaining_const_size;
>+  if (!remaining_size.is_constant ())
>+    remaining_const_size
>+      = riscv_stack_align (remaining_size.coeffs[0])
>+        - riscv_stack_align (remaining_size.coeffs[1]);
>   else
>-    frame_total_constant_size = frame->total_size.to_constant ();
>+    remaining_const_size = remaining_size.to_constant ();
>
>-  if (SMALL_OPERAND (frame_total_constant_size))
>-    return frame_total_constant_size;
>+  if (SMALL_OPERAND (remaining_const_size))
>+    return remaining_const_size;
>
>   HOST_WIDE_INT min_first_step =
>-    RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant());
>+    riscv_stack_align ((remaining_size - frame->frame_pointer_offset).to_constant());
>   HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
>-  HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step;
>+  HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
>   gcc_assert (min_first_step <= max_first_step);
>
>   /* As an optimization, use the least-significant bits of the total frame
>      size, so that the second adjustment step is just LUI + ADD.  */
>   if (!SMALL_OPERAND (min_second_step)
>-      && frame_total_constant_size % IMM_REACH < IMM_REACH / 2
>-      && frame_total_constant_size % IMM_REACH >= min_first_step)
>-    return frame_total_constant_size % IMM_REACH;
>+      && remaining_const_size % IMM_REACH < IMM_REACH / 2
>+      && remaining_const_size % IMM_REACH >= min_first_step)
>+    return remaining_const_size % IMM_REACH;
>
>   if (TARGET_RVC)
>     {
>       /* If we need two subtracts, and one is small enough to allow compressed
>-	loads and stores, then put that one first.  */
>+         loads and stores, then put that one first.  */
>       if (IN_RANGE (min_second_step, 0,
>-	    (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
>-	return MAX (min_second_step, min_first_step);
>+                    (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
>+       return MAX (min_second_step, min_first_step);
>
>       /* If we need LUI + ADDI + ADD for the second adjustment step, then start
>-	with the minimum first step, so that we can get compressed loads and
>-	stores.  */
>+         with the minimum first step, so that we can get compressed loads and
>+         stores.  */
>       else if (!SMALL_OPERAND (min_second_step))
>-	return min_first_step;
>+       return min_first_step;
>     }
>
>   return max_first_step;
>@@ -5037,7 +5037,7 @@ riscv_expand_prologue (void)
>   /* Save the registers.  */
>   if ((frame->mask | frame->fmask) != 0)
>     {
>-      HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
>+      HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
>       if (size.is_constant ())
> step1 = MIN (size.to_constant(), step1);
>
>@@ -5216,7 +5216,7 @@ riscv_expand_epilogue (int style)
>      possible in the second step without going out of range.  */
>   if ((frame->mask | frame->fmask) != 0)
>     {
>-      step2 = riscv_first_stack_step (frame);
>+      step2 = riscv_first_stack_step (frame, frame->total_size);
>       step1 -= step2;
>     }
>
>--
>2.17.1
  
Jeff Law April 16, 2023, 4:40 p.m. UTC | #2
On 12/1/22 03:03, Fei Gao wrote:
> frame->total_size to remaining_size conversion is done as an independent patch without
> functionality change as per review comment.
> 
> gcc/ChangeLog:
> 
>          * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size.
>          (riscv_compute_frame_info): adapt new riscv_first_stack_step interface.
>          (riscv_expand_prologue): likewise.
>          (riscv_expand_epilogue): likewise.
> ---
>   gcc/config/riscv/riscv.cc | 48 +++++++++++++++++++--------------------
>   1 file changed, 24 insertions(+), 24 deletions(-)
> 




> @@ -5037,7 +5037,7 @@ riscv_expand_prologue (void)
>     /* Save the registers.  */
>     if ((frame->mask | frame->fmask) != 0)
>       {
> -      HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
> +      HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
>         if (size.is_constant ())
>   	step1 = MIN (size.to_constant(), step1);
Hmm.  I generally agree that this patch has no functional changes in 
behavior.   But I wonder if there's a latent bug in the prologue code.

It seems to me that if we are optimizing for size and need to save both 
GPRs and FPRs that we don't want to be using frame->total_size as the 
libcall path to save the GPRs will have done a partial allocation, thus 
reducing the amount of stack still to allocate.  Or am I missing 
something here?



Jeff
  
Jeff Law April 17, 2023, 6:09 p.m. UTC | #3
On 12/1/22 03:03, Fei Gao wrote:
> frame->total_size to remaining_size conversion is done as an independent patch without
> functionality change as per review comment.
> 
> gcc/ChangeLog:
> 
>          * config/riscv/riscv.cc (riscv_first_stack_step): add a new function parameter remaining_size.
>          (riscv_compute_frame_info): adapt new riscv_first_stack_step interface.
>          (riscv_expand_prologue): likewise.
>          (riscv_expand_epilogue): likewise.
I fixed the capitalization issues in the ChangeLog and a few places 
where tabs should have been used instead of spaces and pushed this to 
the trunk.

Thanks for your patience,

Jeff
  

Patch

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 05bdba5ab4d..f0bbcd6d6be 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4634,7 +4634,7 @@  riscv_save_libcall_count (unsigned mask)
    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
    hard_frame_pointer_rtx unchanged.  */
 
-static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame);
+static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size);
 
 /* Handle stack align for poly_int.  */
 static poly_int64
@@ -4663,7 +4663,7 @@  riscv_compute_frame_info (void)
      save/restore t0.  We check for this before clearing the frame struct.  */
   if (cfun->machine->interrupt_handler_p)
     {
-      HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
+      HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
       if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1)))
 	interrupt_save_prologue_temp = true;
     }
@@ -4913,45 +4913,45 @@  riscv_restore_reg (rtx reg, rtx mem)
    without adding extra instructions.  */
 
 static HOST_WIDE_INT
-riscv_first_stack_step (struct riscv_frame_info *frame)
+riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size)
 {
-  HOST_WIDE_INT frame_total_constant_size;
-  if (!frame->total_size.is_constant ())
-    frame_total_constant_size
-      = riscv_stack_align (frame->total_size.coeffs[0])
-	- riscv_stack_align (frame->total_size.coeffs[1]);
+  HOST_WIDE_INT remaining_const_size;
+  if (!remaining_size.is_constant ())
+    remaining_const_size
+      = riscv_stack_align (remaining_size.coeffs[0])
+        - riscv_stack_align (remaining_size.coeffs[1]);
   else
-    frame_total_constant_size = frame->total_size.to_constant ();
+    remaining_const_size = remaining_size.to_constant ();
 
-  if (SMALL_OPERAND (frame_total_constant_size))
-    return frame_total_constant_size;
+  if (SMALL_OPERAND (remaining_const_size))
+    return remaining_const_size;
 
   HOST_WIDE_INT min_first_step =
-    RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant());
+    riscv_stack_align ((remaining_size - frame->frame_pointer_offset).to_constant());
   HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
-  HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step;
+  HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step;
   gcc_assert (min_first_step <= max_first_step);
 
   /* As an optimization, use the least-significant bits of the total frame
      size, so that the second adjustment step is just LUI + ADD.  */
   if (!SMALL_OPERAND (min_second_step)
-      && frame_total_constant_size % IMM_REACH < IMM_REACH / 2
-      && frame_total_constant_size % IMM_REACH >= min_first_step)
-    return frame_total_constant_size % IMM_REACH;
+      && remaining_const_size % IMM_REACH < IMM_REACH / 2
+      && remaining_const_size % IMM_REACH >= min_first_step)
+    return remaining_const_size % IMM_REACH;
 
   if (TARGET_RVC)
     {
       /* If we need two subtracts, and one is small enough to allow compressed
-	 loads and stores, then put that one first.  */
+         loads and stores, then put that one first.  */
       if (IN_RANGE (min_second_step, 0,
-		    (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
-	return MAX (min_second_step, min_first_step);
+                    (TARGET_64BIT ? SDSP_REACH : SWSP_REACH)))
+       return MAX (min_second_step, min_first_step);
 
       /* If we need LUI + ADDI + ADD for the second adjustment step, then start
-	 with the minimum first step, so that we can get compressed loads and
-	 stores.  */
+         with the minimum first step, so that we can get compressed loads and
+         stores.  */
       else if (!SMALL_OPERAND (min_second_step))
-	return min_first_step;
+       return min_first_step;
     }
 
   return max_first_step;
@@ -5037,7 +5037,7 @@  riscv_expand_prologue (void)
   /* Save the registers.  */
   if ((frame->mask | frame->fmask) != 0)
     {
-      HOST_WIDE_INT step1 = riscv_first_stack_step (frame);
+      HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size);
       if (size.is_constant ())
 	step1 = MIN (size.to_constant(), step1);
 
@@ -5216,7 +5216,7 @@  riscv_expand_epilogue (int style)
      possible in the second step without going out of range.  */
   if ((frame->mask | frame->fmask) != 0)
     {
-      step2 = riscv_first_stack_step (frame);
+      step2 = riscv_first_stack_step (frame, frame->total_size);
       step1 -= step2;
     }