[v2,4/4] gdb/arm: Use new dwarf2 function cache

Message ID 20221118155252.113476-5-torbjorn.svensson@foss.st.com
State New
Headers
Series v2 gdb/arm: Fixes for Cortex-M stack unwinding |

Commit Message

Torbjorn SVENSSON Nov. 18, 2022, 3:52 p.m. UTC
  This patch resolves the performance issue reported in pr/29738 by
caching the values for the stack pointers for the inner frame.  By
doing so, the impact can be reduced to checking the state and
returning the appropriate value.

Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
Signed-off-by: Yvan Roux <yvan.roux@foss.st.com>
---
 gdb/arm-tdep.c | 96 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 64 insertions(+), 32 deletions(-)
  

Comments

Luis Machado Nov. 21, 2022, 9:04 p.m. UTC | #1
Hi,

On 11/18/22 15:52, Torbjörn SVENSSON wrote:
> This patch resolves the performance issue reported in pr/29738 by
> caching the values for the stack pointers for the inner frame.  By
> doing so, the impact can be reduced to checking the state and
> returning the appropriate value.
> 
> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
> Signed-off-by: Yvan Roux <yvan.roux@foss.st.com>
> ---
>   gdb/arm-tdep.c | 96 +++++++++++++++++++++++++++++++++-----------------
>   1 file changed, 64 insertions(+), 32 deletions(-)
> 
> diff --git a/gdb/arm-tdep.c b/gdb/arm-tdep.c
> index c011b2aa973..59cd0964d96 100644
> --- a/gdb/arm-tdep.c
> +++ b/gdb/arm-tdep.c
> @@ -3953,6 +3953,18 @@ struct frame_base arm_normal_base = {
>     arm_normal_frame_base
>   };
>   
> +struct arm_dwarf2_prev_register_cache
> +{
> +  /* Cached value of the coresponding stack pointer for the inner frame.  */

coresponding -> corresponding

> +  CORE_ADDR sp;
> +  CORE_ADDR msp;
> +  CORE_ADDR msp_s;
> +  CORE_ADDR msp_ns;
> +  CORE_ADDR psp;
> +  CORE_ADDR psp_s;
> +  CORE_ADDR psp_ns;
> +};
> +

Given SP is the cfa, do we need to cache it here?

>   static struct value *
>   arm_dwarf2_prev_register (frame_info_ptr this_frame, void **this_cache,
>   			  int regnum)
> @@ -3961,6 +3973,48 @@ arm_dwarf2_prev_register (frame_info_ptr this_frame, void **this_cache,
>     arm_gdbarch_tdep *tdep = gdbarch_tdep<arm_gdbarch_tdep> (gdbarch);
>     CORE_ADDR lr;
>     ULONGEST cpsr;
> +  struct arm_dwarf2_prev_register_cache *cache
> +    = (struct arm_dwarf2_prev_register_cache *) dwarf2_frame_get_fn_data (
> +      this_frame, this_cache, arm_dwarf2_prev_register);
> +
> +  if (!cache)
> +    {
> +      const unsigned int size = sizeof (struct arm_dwarf2_prev_register_cache);
> +      cache = (struct arm_dwarf2_prev_register_cache *)
> +	dwarf2_frame_allocate_fn_data (this_frame, this_cache,
> +				       arm_dwarf2_prev_register, size);
> +
> +      if (tdep->have_sec_ext)
> +	{
> +	  cache->sp
> +	    = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);

We fetch ARM_SP_REGNUM in both legs of the conditional. How about moving it outside of the conditional blocks?

> +
> +	  cache->msp_s
> +	    = get_frame_register_unsigned (this_frame,
> +					   tdep->m_profile_msp_s_regnum);
> +	  cache->msp_ns
> +	    = get_frame_register_unsigned (this_frame,
> +					   tdep->m_profile_msp_ns_regnum);
> +	  cache->psp_s
> +	    = get_frame_register_unsigned (this_frame,
> +					   tdep->m_profile_psp_s_regnum);
> +	  cache->psp_ns
> +	    = get_frame_register_unsigned (this_frame,
> +					   tdep->m_profile_psp_ns_regnum);
> +	}
> +      else if (tdep->is_m)
> +	{
> +	  cache->sp
> +	    = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
> +
> +	  cache->msp
> +	    = get_frame_register_unsigned (this_frame,
> +					   tdep->m_profile_msp_regnum);
> +	  cache->psp
> +	    = get_frame_register_unsigned (this_frame,
> +					   tdep->m_profile_psp_regnum);
> +	}
> +    }
>   
>     if (regnum == ARM_PC_REGNUM)
>       {
> @@ -4000,33 +4054,18 @@ arm_dwarf2_prev_register (frame_info_ptr this_frame, void **this_cache,
>   
>         if (tdep->have_sec_ext)
>   	{
> -	  CORE_ADDR sp
> -	    = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
> -	  CORE_ADDR msp_s
> -	    = get_frame_register_unsigned (this_frame,
> -					   tdep->m_profile_msp_s_regnum);
> -	  CORE_ADDR msp_ns
> -	    = get_frame_register_unsigned (this_frame,
> -					   tdep->m_profile_msp_ns_regnum);
> -	  CORE_ADDR psp_s
> -	    = get_frame_register_unsigned (this_frame,
> -					   tdep->m_profile_psp_s_regnum);
> -	  CORE_ADDR psp_ns
> -	    = get_frame_register_unsigned (this_frame,
> -					   tdep->m_profile_psp_ns_regnum);
> -
>   	  bool is_msp = (regnum == tdep->m_profile_msp_regnum)
> -	    && (msp_s == sp || msp_ns == sp);
> +	    && (cache->msp_s == cache->sp || cache->msp_ns == cache->sp);
>   	  bool is_msp_s = (regnum == tdep->m_profile_msp_s_regnum)
> -	    && (msp_s == sp);
> +	    && (cache->msp_s == cache->sp);
>   	  bool is_msp_ns = (regnum == tdep->m_profile_msp_ns_regnum)
> -	    && (msp_ns == sp);
> +	    && (cache->msp_ns == cache->sp);
>   	  bool is_psp = (regnum == tdep->m_profile_psp_regnum)
> -	    && (psp_s == sp || psp_ns == sp);
> +	    && (cache->psp_s == cache->sp || cache->psp_ns == cache->sp);
>   	  bool is_psp_s = (regnum == tdep->m_profile_psp_s_regnum)
> -	    && (psp_s == sp);
> +	    && (cache->psp_s == cache->sp);
>   	  bool is_psp_ns = (regnum == tdep->m_profile_psp_ns_regnum)
> -	    && (psp_ns == sp);
> +	    && (cache->psp_ns == cache->sp);
>   
>   	  override_with_sp_value = is_msp || is_msp_s || is_msp_ns
>   	    || is_psp || is_psp_s || is_psp_ns;
> @@ -4034,17 +4073,10 @@ arm_dwarf2_prev_register (frame_info_ptr this_frame, void **this_cache,
>   	}
>         else if (tdep->is_m)
>   	{
> -	  CORE_ADDR sp
> -	    = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
> -	  CORE_ADDR msp
> -	    = get_frame_register_unsigned (this_frame,
> -					   tdep->m_profile_msp_regnum);
> -	  CORE_ADDR psp
> -	    = get_frame_register_unsigned (this_frame,
> -					   tdep->m_profile_psp_regnum);
> -
> -	  bool is_msp = (regnum == tdep->m_profile_msp_regnum) && (sp == msp);
> -	  bool is_psp = (regnum == tdep->m_profile_psp_regnum) && (sp == psp);
> +	  bool is_msp = (regnum == tdep->m_profile_msp_regnum)
> +	    && (cache->sp == cache->msp);
> +	  bool is_psp = (regnum == tdep->m_profile_psp_regnum)
> +	    && (cache->sp == cache->psp);
>   
>   	  override_with_sp_value = is_msp || is_psp;
>   	}

As we've discussed off-list, I think we can reduce the number of get_frame_register_unsigned calls we do for each call to arm_dwarf2_prev_register by using some conditionals.
  
Torbjorn SVENSSON Nov. 29, 2022, 3:19 p.m. UTC | #2
Hi,

I've had a long discussion with Luis on IRC regarding the points 
mentioned here, but I'll reply to the list now in order to get more eyes 
on the topic.

On 2022-11-21 22:04, Luis Machado wrote:
> Hi,
> 
> On 11/18/22 15:52, Torbjörn SVENSSON wrote:
>> This patch resolves the performance issue reported in pr/29738 by
>> caching the values for the stack pointers for the inner frame.  By
>> doing so, the impact can be reduced to checking the state and
>> returning the appropriate value.
>>
>> Signed-off-by: Torbjörn SVENSSON <torbjorn.svensson@foss.st.com>
>> Signed-off-by: Yvan Roux <yvan.roux@foss.st.com>
>> ---
>>   gdb/arm-tdep.c | 96 +++++++++++++++++++++++++++++++++-----------------
>>   1 file changed, 64 insertions(+), 32 deletions(-)
>>
>> diff --git a/gdb/arm-tdep.c b/gdb/arm-tdep.c
>> index c011b2aa973..59cd0964d96 100644
>> --- a/gdb/arm-tdep.c
>> +++ b/gdb/arm-tdep.c
>> @@ -3953,6 +3953,18 @@ struct frame_base arm_normal_base = {
>>     arm_normal_frame_base
>>   };
>> +struct arm_dwarf2_prev_register_cache
>> +{
>> +  /* Cached value of the coresponding stack pointer for the inner 
>> frame.  */
> 
> coresponding -> corresponding
> 
>> +  CORE_ADDR sp;
>> +  CORE_ADDR msp;
>> +  CORE_ADDR msp_s;
>> +  CORE_ADDR msp_ns;
>> +  CORE_ADDR psp;
>> +  CORE_ADDR psp_s;
>> +  CORE_ADDR psp_ns;
>> +};
>> +
> 
> Given SP is the cfa, do we need to cache it here?

As I said off-list, it's not the value of sp, msp etc, it's the value of 
the inner frame, so what is actually "cached" here is basically the 
state of the frame, not the values.
The cache could be simplified in a few ways, but before doing the 
polishing, I would like to know if it's an acceptable way to implement 
the fix for the performance issue.

> 
>>   static struct value *
>>   arm_dwarf2_prev_register (frame_info_ptr this_frame, void **this_cache,
>>                 int regnum)
>> @@ -3961,6 +3973,48 @@ arm_dwarf2_prev_register (frame_info_ptr 
>> this_frame, void **this_cache,
>>     arm_gdbarch_tdep *tdep = gdbarch_tdep<arm_gdbarch_tdep> (gdbarch);
>>     CORE_ADDR lr;
>>     ULONGEST cpsr;
>> +  struct arm_dwarf2_prev_register_cache *cache
>> +    = (struct arm_dwarf2_prev_register_cache *) 
>> dwarf2_frame_get_fn_data (
>> +      this_frame, this_cache, arm_dwarf2_prev_register);
>> +
>> +  if (!cache)
>> +    {
>> +      const unsigned int size = sizeof (struct 
>> arm_dwarf2_prev_register_cache);
>> +      cache = (struct arm_dwarf2_prev_register_cache *)
>> +    dwarf2_frame_allocate_fn_data (this_frame, this_cache,
>> +                       arm_dwarf2_prev_register, size);
>> +
>> +      if (tdep->have_sec_ext)
>> +    {
>> +      cache->sp
>> +        = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
> 
> We fetch ARM_SP_REGNUM in both legs of the conditional. How about moving 
> it outside of the conditional blocks?

Sure. Before doing more here, I would like to get the agreement that 
this is a good approach to the problem.

>> +
>> +      cache->msp_s
>> +        = get_frame_register_unsigned (this_frame,
>> +                       tdep->m_profile_msp_s_regnum);
>> +      cache->msp_ns
>> +        = get_frame_register_unsigned (this_frame,
>> +                       tdep->m_profile_msp_ns_regnum);
>> +      cache->psp_s
>> +        = get_frame_register_unsigned (this_frame,
>> +                       tdep->m_profile_psp_s_regnum);
>> +      cache->psp_ns
>> +        = get_frame_register_unsigned (this_frame,
>> +                       tdep->m_profile_psp_ns_regnum);
>> +    }
>> +      else if (tdep->is_m)
>> +    {
>> +      cache->sp
>> +        = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
>> +
>> +      cache->msp
>> +        = get_frame_register_unsigned (this_frame,
>> +                       tdep->m_profile_msp_regnum);
>> +      cache->psp
>> +        = get_frame_register_unsigned (this_frame,
>> +                       tdep->m_profile_psp_regnum);
>> +    }
>> +    }
>>     if (regnum == ARM_PC_REGNUM)
>>       {
>> @@ -4000,33 +4054,18 @@ arm_dwarf2_prev_register (frame_info_ptr 
>> this_frame, void **this_cache,
>>         if (tdep->have_sec_ext)
>>       {
>> -      CORE_ADDR sp
>> -        = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
>> -      CORE_ADDR msp_s
>> -        = get_frame_register_unsigned (this_frame,
>> -                       tdep->m_profile_msp_s_regnum);
>> -      CORE_ADDR msp_ns
>> -        = get_frame_register_unsigned (this_frame,
>> -                       tdep->m_profile_msp_ns_regnum);
>> -      CORE_ADDR psp_s
>> -        = get_frame_register_unsigned (this_frame,
>> -                       tdep->m_profile_psp_s_regnum);
>> -      CORE_ADDR psp_ns
>> -        = get_frame_register_unsigned (this_frame,
>> -                       tdep->m_profile_psp_ns_regnum);
>> -
>>         bool is_msp = (regnum == tdep->m_profile_msp_regnum)
>> -        && (msp_s == sp || msp_ns == sp);
>> +        && (cache->msp_s == cache->sp || cache->msp_ns == cache->sp);
>>         bool is_msp_s = (regnum == tdep->m_profile_msp_s_regnum)
>> -        && (msp_s == sp);
>> +        && (cache->msp_s == cache->sp);
>>         bool is_msp_ns = (regnum == tdep->m_profile_msp_ns_regnum)
>> -        && (msp_ns == sp);
>> +        && (cache->msp_ns == cache->sp);
>>         bool is_psp = (regnum == tdep->m_profile_psp_regnum)
>> -        && (psp_s == sp || psp_ns == sp);
>> +        && (cache->psp_s == cache->sp || cache->psp_ns == cache->sp);
>>         bool is_psp_s = (regnum == tdep->m_profile_psp_s_regnum)
>> -        && (psp_s == sp);
>> +        && (cache->psp_s == cache->sp);
>>         bool is_psp_ns = (regnum == tdep->m_profile_psp_ns_regnum)
>> -        && (psp_ns == sp);
>> +        && (cache->psp_ns == cache->sp);
>>         override_with_sp_value = is_msp || is_msp_s || is_msp_ns
>>           || is_psp || is_psp_s || is_psp_ns;
>> @@ -4034,17 +4073,10 @@ arm_dwarf2_prev_register (frame_info_ptr 
>> this_frame, void **this_cache,
>>       }
>>         else if (tdep->is_m)
>>       {
>> -      CORE_ADDR sp
>> -        = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
>> -      CORE_ADDR msp
>> -        = get_frame_register_unsigned (this_frame,
>> -                       tdep->m_profile_msp_regnum);
>> -      CORE_ADDR psp
>> -        = get_frame_register_unsigned (this_frame,
>> -                       tdep->m_profile_psp_regnum);
>> -
>> -      bool is_msp = (regnum == tdep->m_profile_msp_regnum) && (sp == 
>> msp);
>> -      bool is_psp = (regnum == tdep->m_profile_psp_regnum) && (sp == 
>> psp);
>> +      bool is_msp = (regnum == tdep->m_profile_msp_regnum)
>> +        && (cache->sp == cache->msp);
>> +      bool is_psp = (regnum == tdep->m_profile_psp_regnum)
>> +        && (cache->sp == cache->psp);
>>         override_with_sp_value = is_msp || is_psp;
>>       }
> 
> As we've discussed off-list, I think we can reduce the number of 
> get_frame_register_unsigned calls we do for each call to 
> arm_dwarf2_prev_register by using some conditionals.

Likely, but let's focus on the dwarf2 part of the patch first and do the 
polishing after, okay?

Kind regards,
Torbjörn
  

Patch

diff --git a/gdb/arm-tdep.c b/gdb/arm-tdep.c
index c011b2aa973..59cd0964d96 100644
--- a/gdb/arm-tdep.c
+++ b/gdb/arm-tdep.c
@@ -3953,6 +3953,18 @@  struct frame_base arm_normal_base = {
   arm_normal_frame_base
 };
 
+struct arm_dwarf2_prev_register_cache
+{
+  /* Cached value of the coresponding stack pointer for the inner frame.  */
+  CORE_ADDR sp;
+  CORE_ADDR msp;
+  CORE_ADDR msp_s;
+  CORE_ADDR msp_ns;
+  CORE_ADDR psp;
+  CORE_ADDR psp_s;
+  CORE_ADDR psp_ns;
+};
+
 static struct value *
 arm_dwarf2_prev_register (frame_info_ptr this_frame, void **this_cache,
 			  int regnum)
@@ -3961,6 +3973,48 @@  arm_dwarf2_prev_register (frame_info_ptr this_frame, void **this_cache,
   arm_gdbarch_tdep *tdep = gdbarch_tdep<arm_gdbarch_tdep> (gdbarch);
   CORE_ADDR lr;
   ULONGEST cpsr;
+  struct arm_dwarf2_prev_register_cache *cache
+    = (struct arm_dwarf2_prev_register_cache *) dwarf2_frame_get_fn_data (
+      this_frame, this_cache, arm_dwarf2_prev_register);
+
+  if (!cache)
+    {
+      const unsigned int size = sizeof (struct arm_dwarf2_prev_register_cache);
+      cache = (struct arm_dwarf2_prev_register_cache *)
+	dwarf2_frame_allocate_fn_data (this_frame, this_cache,
+				       arm_dwarf2_prev_register, size);
+
+      if (tdep->have_sec_ext)
+	{
+	  cache->sp
+	    = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
+
+	  cache->msp_s
+	    = get_frame_register_unsigned (this_frame,
+					   tdep->m_profile_msp_s_regnum);
+	  cache->msp_ns
+	    = get_frame_register_unsigned (this_frame,
+					   tdep->m_profile_msp_ns_regnum);
+	  cache->psp_s
+	    = get_frame_register_unsigned (this_frame,
+					   tdep->m_profile_psp_s_regnum);
+	  cache->psp_ns
+	    = get_frame_register_unsigned (this_frame,
+					   tdep->m_profile_psp_ns_regnum);
+	}
+      else if (tdep->is_m)
+	{
+	  cache->sp
+	    = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
+
+	  cache->msp
+	    = get_frame_register_unsigned (this_frame,
+					   tdep->m_profile_msp_regnum);
+	  cache->psp
+	    = get_frame_register_unsigned (this_frame,
+					   tdep->m_profile_psp_regnum);
+	}
+    }
 
   if (regnum == ARM_PC_REGNUM)
     {
@@ -4000,33 +4054,18 @@  arm_dwarf2_prev_register (frame_info_ptr this_frame, void **this_cache,
 
       if (tdep->have_sec_ext)
 	{
-	  CORE_ADDR sp
-	    = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
-	  CORE_ADDR msp_s
-	    = get_frame_register_unsigned (this_frame,
-					   tdep->m_profile_msp_s_regnum);
-	  CORE_ADDR msp_ns
-	    = get_frame_register_unsigned (this_frame,
-					   tdep->m_profile_msp_ns_regnum);
-	  CORE_ADDR psp_s
-	    = get_frame_register_unsigned (this_frame,
-					   tdep->m_profile_psp_s_regnum);
-	  CORE_ADDR psp_ns
-	    = get_frame_register_unsigned (this_frame,
-					   tdep->m_profile_psp_ns_regnum);
-
 	  bool is_msp = (regnum == tdep->m_profile_msp_regnum)
-	    && (msp_s == sp || msp_ns == sp);
+	    && (cache->msp_s == cache->sp || cache->msp_ns == cache->sp);
 	  bool is_msp_s = (regnum == tdep->m_profile_msp_s_regnum)
-	    && (msp_s == sp);
+	    && (cache->msp_s == cache->sp);
 	  bool is_msp_ns = (regnum == tdep->m_profile_msp_ns_regnum)
-	    && (msp_ns == sp);
+	    && (cache->msp_ns == cache->sp);
 	  bool is_psp = (regnum == tdep->m_profile_psp_regnum)
-	    && (psp_s == sp || psp_ns == sp);
+	    && (cache->psp_s == cache->sp || cache->psp_ns == cache->sp);
 	  bool is_psp_s = (regnum == tdep->m_profile_psp_s_regnum)
-	    && (psp_s == sp);
+	    && (cache->psp_s == cache->sp);
 	  bool is_psp_ns = (regnum == tdep->m_profile_psp_ns_regnum)
-	    && (psp_ns == sp);
+	    && (cache->psp_ns == cache->sp);
 
 	  override_with_sp_value = is_msp || is_msp_s || is_msp_ns
 	    || is_psp || is_psp_s || is_psp_ns;
@@ -4034,17 +4073,10 @@  arm_dwarf2_prev_register (frame_info_ptr this_frame, void **this_cache,
 	}
       else if (tdep->is_m)
 	{
-	  CORE_ADDR sp
-	    = get_frame_register_unsigned (this_frame, ARM_SP_REGNUM);
-	  CORE_ADDR msp
-	    = get_frame_register_unsigned (this_frame,
-					   tdep->m_profile_msp_regnum);
-	  CORE_ADDR psp
-	    = get_frame_register_unsigned (this_frame,
-					   tdep->m_profile_psp_regnum);
-
-	  bool is_msp = (regnum == tdep->m_profile_msp_regnum) && (sp == msp);
-	  bool is_psp = (regnum == tdep->m_profile_psp_regnum) && (sp == psp);
+	  bool is_msp = (regnum == tdep->m_profile_msp_regnum)
+	    && (cache->sp == cache->msp);
+	  bool is_psp = (regnum == tdep->m_profile_psp_regnum)
+	    && (cache->sp == cache->psp);
 
 	  override_with_sp_value = is_msp || is_psp;
 	}