(ARM Cortex-M) FPU and PSP aware exception frame unwinder

Message ID 5706DA27.1070308@cimeq.qc.ca
State New, archived
Headers

Commit Message

James-Adam Renquinha Henri April 7, 2016, 10:07 p.m. UTC
  I submitted it as a bug to the GNU ARM Embedded initially, see here for 
details: https://bugs.launchpad.net/gcc-arm-embedded/+bug/1566054

Basically, this patch allow gdb to unwind properly an extended stack 
frame, that is an exception frame with FPU state stacked. Additionally, 
because all Cortex-M variants have 2 stack pointers, the Main Stack 
Pointer (MSP) and the Process Stack Pointer (PSP), the code in the patch 
also check which stack was used prior to the exception. That way, 
backtraces work beautifully.

In my original submission, I mentioned a known issue that I didn't try 
to fix *yet*, because that would involve a lot more work, and the impact 
is relatively minor: for a given outer frame, some FPU registers may not 
be reported correctly. I hope you don't mind too much. I consider the 
current patch still useful, because at least backtraces work, and it's 
an annoyance not to be able to get them.
  

Comments

Luis Machado April 11, 2016, 9:03 p.m. UTC | #1
On 04/07/2016 05:07 PM, James-Adam Renquinha Henri wrote:
> I submitted it as a bug to the GNU ARM Embedded initially, see here for
> details: https://bugs.launchpad.net/gcc-arm-embedded/+bug/1566054
>
> Basically, this patch allow gdb to unwind properly an extended stack
> frame, that is an exception frame with FPU state stacked. Additionally,
> because all Cortex-M variants have 2 stack pointers, the Main Stack
> Pointer (MSP) and the Process Stack Pointer (PSP), the code in the patch
> also check which stack was used prior to the exception. That way,
> backtraces work beautifully.
>
> In my original submission, I mentioned a known issue that I didn't try
> to fix *yet*, because that would involve a lot more work, and the impact
> is relatively minor: for a given outer frame, some FPU registers may not
> be reported correctly. I hope you don't mind too much. I consider the
> current patch still useful, because at least backtraces work, and it's
> an annoyance not to be able to get them.

I have feeling people will mind. Ideally it should keep the old behavior 
intact if possible. So if you can fallback to the old code, it should be ok.

What that said, i'm not going through the arch-specific details, just 
the more general patch.

>
> --
> James-Adam Renquinha Henri, Ing. jr
> Ingenieur d'application
> CIMEQ INC.
>
>
> patch
>
>
> diff --git a/gdb/arm-tdep.c b/gdb/arm-tdep.c
> index 0412f71..8f342c1 100644
> --- a/gdb/arm-tdep.c
> +++ b/gdb/arm-tdep.c
> @@ -470,8 +470,10 @@ arm_addr_bits_remove (struct gdbarch *gdbarch, CORE_ADDR val)
>   {
>     /* On M-profile devices, do not strip the low bit from EXC_RETURN
>        (the magic exception return address).  */
> + /* NOTE: 0xf0000000 is the EXC_RETURN pattern, according to B1.5.8 of the
> +    ARMv7-M Reference Manual.  */
>     if (gdbarch_tdep (gdbarch)->is_m
> -      && (val & 0xfffffff0) == 0xfffffff0)
> +      && (val & 0xf0000000) == 0xf0000000)
>       return val;
>
>     if (arm_apcs_32)
> @@ -2907,13 +2909,31 @@ arm_m_exception_cache (struct frame_info *this_frame)
>     enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
>     struct arm_prologue_cache *cache;
>     CORE_ADDR unwound_sp;
> +  CORE_ADDR this_lr;
>     LONGEST xpsr;
> +  int main_stack_used;
> +  int extended_frame_type;
> +  int stack_regnum;
>
>     cache = FRAME_OBSTACK_ZALLOC (struct arm_prologue_cache);
>     cache->saved_regs = trad_frame_alloc_saved_regs (this_frame);
>
> -  unwound_sp = get_frame_register_unsigned (this_frame,
> -					    ARM_SP_REGNUM);
> +  /* We need LR to know: 1- if the FPU was used, 2- which stack was used.
> +     "B1.5.6 Exception entry behavior" in ARMv7-M Architecture Reference
> +     Manual Issue D (or the last one) gives the various bits in LR
> +     involved in this. NOTE: this LR is different of the stacked one.  */
> +  this_lr
> +    = get_frame_register_unsigned (this_frame,
> +				   user_reg_map_name_to_regnum (gdbarch,
> +								"lr",
> +								-1));
> +  main_stack_used = (this_lr & 0xf) != 0xd;
> +  extended_frame_type = (this_lr & (1 << 4)) == 0;
> +  stack_regnum = user_reg_map_name_to_regnum (gdbarch,
> +					      main_stack_used ? "sp" : "psp",
> +					      -1);
> +
> +  unwound_sp = get_frame_register_unsigned (this_frame, stack_regnum);
>
>     /* The hardware saves eight 32-bit words, comprising xPSR,
>        ReturnAddress, LR (R14), R12, R3, R2, R1, R0.  See details in
> @@ -2928,10 +2948,47 @@ arm_m_exception_cache (struct frame_info *this_frame)
>     cache->saved_regs[15].addr = unwound_sp + 24;
>     cache->saved_regs[ARM_PS_REGNUM].addr = unwound_sp + 28;
>
> +  if (extended_frame_type)
> +    {
> +      int s0_offset;
> +      int fpscr_offset;
> +
> +      s0_offset = user_reg_map_name_to_regnum (gdbarch, "s0", -1);
> +      fpscr_offset = user_reg_map_name_to_regnum (gdbarch, "fpscr", -1);
> +
> +      if (s0_offset == -1 || fpscr_offset == -1)
> +	{
> +	  /* Ooops. */
> +	  warning (_("can't get register offsets in cache; "
> +		     "fpu info may be wrong"));
> +	}
> +      else
> +	{
> +	  int i;
> +	  int fpu_reg_offset;
> +
> +	  fpu_reg_offset = unwound_sp + 0x20;
> +
> +	  /* XXX: This doesn't take into account the lazy stacking, see "Lazy
> +	     context save of FP state", in B1.5.7.  */

I'm almost sure we don't want to introduce FIXME's in the code. A 
fallback to the correct behavior would be ideal.

> +	  for (i = 0; i < 16; ++i, fpu_reg_offset += 4)
> +	    {
> +	      cache->saved_regs[s0_offset + i].addr = fpu_reg_offset;
> +	    }
> +	  cache->saved_regs[fpscr_offset].addr = unwound_sp + 0x60;
> +	}
> +
> +	/* Offset 0x64 is reserved */
> +	cache->prev_sp = unwound_sp + 0x68;
> +    }
> +  else
> +    {
> +      cache->prev_sp = unwound_sp + 32;
> +    }
> +

No need to have the { }'s for single line conditionals.

>     /* If bit 9 of the saved xPSR is set, then there is a four-byte
>        aligner between the top of the 32-byte stack frame and the
>        previous context's stack pointer.  */
> -  cache->prev_sp = unwound_sp + 32;
>     if (safe_read_memory_integer (unwound_sp + 28, 4, byte_order, &xpsr)
>         && (xpsr & (1 << 9)) != 0)
>       cache->prev_sp += 4;
> @@ -2997,11 +3054,19 @@ arm_m_exception_unwind_sniffer (const struct frame_unwind *self,
>     /* Exception frames return to one of these magic PCs.  Other values
>        are not defined as of v7-M.  See details in "B1.5.8 Exception
>        return behavior" in "ARMv7-M Architecture Reference Manual".  */
> -  if (this_pc == 0xfffffff1 || this_pc == 0xfffffff9
> -      || this_pc == 0xfffffffd)
> -    return 1;
> +  switch (this_pc)
> +    {
> +      case 0xffffffe1:
> +      case 0xffffffe9:
> +      case 0xffffffed:
> +      case 0xfffffff1:
> +      case 0xfffffff9:
> +      case 0xfffffffd:
> +        return 1;
>
> -  return 0;
> +      default:
> +        return 0;
> +    }

The above should probably go to a function that checks for the correct 
patterns. The code should be cleaner that way.

>   }
>
>   /* Frame unwinder for M-profile exceptions.  */
  
Pedro Alves April 11, 2016, 9:56 p.m. UTC | #2
Hi,

[Adding a few folks who either worked on or expressed
interest in this before.]

On 04/07/2016 11:07 PM, James-Adam Renquinha Henri wrote:
> I submitted it as a bug to the GNU ARM Embedded initially, see here for
> details: https://bugs.launchpad.net/gcc-arm-embedded/+bug/1566054
> 
> Basically, this patch allow gdb to unwind properly an extended stack
> frame, that is an exception frame with FPU state stacked. Additionally,
> because all Cortex-M variants have 2 stack pointers, the Main Stack
> Pointer (MSP) and the Process Stack Pointer (PSP), the code in the patch
> also check which stack was used prior to the exception. That way,
> backtraces work beautifully.
> 
> In my original submission, I mentioned a known issue that I didn't try
> to fix *yet*, because that would involve a lot more work, and the impact
> is relatively minor: for a given outer frame, some FPU registers may not
> be reported correctly. I hope you don't mind too much. I consider the
> current patch still useful, because at least backtraces work, and it's
> an annoyance not to be able to get them.
> 

Thanks for the patch.  However, we should really add new target
descriptions/features that describe these registers to gdb
instead of looking them up by name.  Please see:

 https://sourceware.org/ml/gdb-patches/2015-12/msg00273.html

And see more in this earlier attempt at getting the unwinder working:

 https://sourceware.org/ml/gdb-patches/2014-09/msg00649.html

Tristan also wrote yet another patch for the same, as mentioned at:

  https://sourceware.org/ml/gdb-patches/2015-12/msg00281.html

Tristan, did you ever manage to post that?

Lots of duplicated effort.  :-/  :-(

Thanks,
Pedro Alves
  
Tristan Gingold April 14, 2016, 6:34 a.m. UTC | #3
> On 11 Apr 2016, at 23:56, Pedro Alves <palves@redhat.com> wrote:
> 
> Hi,
> 
> [Adding a few folks who either worked on or expressed
> interest in this before.]
> 
> On 04/07/2016 11:07 PM, James-Adam Renquinha Henri wrote:
>> I submitted it as a bug to the GNU ARM Embedded initially, see here for
>> details: https://bugs.launchpad.net/gcc-arm-embedded/+bug/1566054
>> 
>> Basically, this patch allow gdb to unwind properly an extended stack
>> frame, that is an exception frame with FPU state stacked. Additionally,
>> because all Cortex-M variants have 2 stack pointers, the Main Stack
>> Pointer (MSP) and the Process Stack Pointer (PSP), the code in the patch
>> also check which stack was used prior to the exception. That way,
>> backtraces work beautifully.
>> 
>> In my original submission, I mentioned a known issue that I didn't try
>> to fix *yet*, because that would involve a lot more work, and the impact
>> is relatively minor: for a given outer frame, some FPU registers may not
>> be reported correctly. I hope you don't mind too much. I consider the
>> current patch still useful, because at least backtraces work, and it's
>> an annoyance not to be able to get them.
>> 
> 
> Thanks for the patch.  However, we should really add new target
> descriptions/features that describe these registers to gdb
> instead of looking them up by name.  Please see:
> 
> https://sourceware.org/ml/gdb-patches/2015-12/msg00273.html
> 
> And see more in this earlier attempt at getting the unwinder working:
> 
> https://sourceware.org/ml/gdb-patches/2014-09/msg00649.html
> 
> Tristan also wrote yet another patch for the same, as mentioned at:
> 
>  https://sourceware.org/ml/gdb-patches/2015-12/msg00281.html
> 
> Tristan, did you ever manage to post that?

Not yet.  But I have tested it with two different probes.

> Lots of duplicated effort.  :-/  :-(

Indeed.  But we know that the common part is correct!

Tristan.
  
James-Adam Renquinha Henri April 20, 2016, 4:22 p.m. UTC | #4
> On 04/07/2016 05:07 PM, James-Adam Renquinha Henri wrote:
>> I submitted it as a bug to the GNU ARM Embedded initially, see here for
>> details: https://bugs.launchpad.net/gcc-arm-embedded/+bug/1566054
>>
>> Basically, this patch allow gdb to unwind properly an extended stack
>> frame, that is an exception frame with FPU state stacked. Additionally,
>> because all Cortex-M variants have 2 stack pointers, the Main Stack
>> Pointer (MSP) and the Process Stack Pointer (PSP), the code in the patch
>> also check which stack was used prior to the exception. That way,
>> backtraces work beautifully.
>>
>> In my original submission, I mentioned a known issue that I didn't try
>> to fix *yet*, because that would involve a lot more work, and the impact
>> is relatively minor: for a given outer frame, some FPU registers may not
>> be reported correctly. I hope you don't mind too much. I consider the
>> current patch still useful, because at least backtraces work, and it's
>> an annoyance not to be able to get them.
>
> I have feeling people will mind. Ideally it should keep the old behavior
> intact if possible. So if you can fallback to the old code, it should be
> ok.

Sorry I don't get it. The old code didn't work in the cases I'm 
providing a fix for, so falling back to the old behavior means just 
giving wrong results? *scratches head*

As I said, getting the behavior 100% correct would require much more 
work, and I felt that it was better to provide an almost correct 
solution so others would benefit quickly of this fix. It might be more 
honest to report a warning to the user that s0-s16 and fpscr could be 
incorrect upon detection of an extended frame. Mind that the old 
situation was "I can't even backtrace past the (CPU) exception if I 
happen to use the FPU", so IMHO it's less harmful to give inaccurate FPU 
information.

Of course I or someone else can work to get it 100% right and we can 
throw all that altogether if it's better that way.


James-Adam Renquinha Henri, Ing. jr
Ingénieur d'application
CIMEQ INC.
  
Luis Machado April 20, 2016, 4:27 p.m. UTC | #5
On 04/20/2016 11:22 AM, James-Adam Renquinha Henri wrote:
>> On 04/07/2016 05:07 PM, James-Adam Renquinha Henri wrote:
>>> I submitted it as a bug to the GNU ARM Embedded initially, see here for
>>> details: https://bugs.launchpad.net/gcc-arm-embedded/+bug/1566054
>>>
>>> Basically, this patch allow gdb to unwind properly an extended stack
>>> frame, that is an exception frame with FPU state stacked. Additionally,
>>> because all Cortex-M variants have 2 stack pointers, the Main Stack
>>> Pointer (MSP) and the Process Stack Pointer (PSP), the code in the patch
>>> also check which stack was used prior to the exception. That way,
>>> backtraces work beautifully.
>>>
>>> In my original submission, I mentioned a known issue that I didn't try
>>> to fix *yet*, because that would involve a lot more work, and the impact
>>> is relatively minor: for a given outer frame, some FPU registers may not
>>> be reported correctly. I hope you don't mind too much. I consider the
>>> current patch still useful, because at least backtraces work, and it's
>>> an annoyance not to be able to get them.
>>
>> I have feeling people will mind. Ideally it should keep the old behavior
>> intact if possible. So if you can fallback to the old code, it should be
>> ok.
>
> Sorry I don't get it. The old code didn't work in the cases I'm
> providing a fix for, so falling back to the old behavior means just
> giving wrong results? *scratches head*
>

I may have misunderstood. Is the known issue something caused by the new 
patch (a regression) or something that is still broken but is not being 
addressed by this patch at this time?

If the latter, it is perfectly fine. I thought it was the former.

> As I said, getting the behavior 100% correct would require much more
> work, and I felt that it was better to provide an almost correct
> solution so others would benefit quickly of this fix. It might be more
> honest to report a warning to the user that s0-s16 and fpscr could be
> incorrect upon detection of an extended frame. Mind that the old
> situation was "I can't even backtrace past the (CPU) exception if I
> happen to use the FPU", so IMHO it's less harmful to give inaccurate FPU
> information.
>
> Of course I or someone else can work to get it 100% right and we can
> throw all that altogether if it's better that way.
  
James-Adam Renquinha Henri April 20, 2016, 11:14 p.m. UTC | #6
Le 2016-04-14 02:34, Tristan Gingold a écrit :
>
>> On 11 Apr 2016, at 23:56, Pedro Alves <palves@redhat.com> wrote:
>>
>> Hi,
>>
>> [Adding a few folks who either worked on or expressed
>> interest in this before.]
>>
>> On 04/07/2016 11:07 PM, James-Adam Renquinha Henri wrote:
>>> I submitted it as a bug to the GNU ARM Embedded initially, see here for
>>> details: https://bugs.launchpad.net/gcc-arm-embedded/+bug/1566054
>>>
>>> Basically, this patch allow gdb to unwind properly an extended stack
>>> frame, that is an exception frame with FPU state stacked. Additionally,
>>> because all Cortex-M variants have 2 stack pointers, the Main Stack
>>> Pointer (MSP) and the Process Stack Pointer (PSP), the code in the patch
>>> also check which stack was used prior to the exception. That way,
>>> backtraces work beautifully.
>>>
>>> In my original submission, I mentioned a known issue that I didn't try
>>> to fix *yet*, because that would involve a lot more work, and the impact
>>> is relatively minor: for a given outer frame, some FPU registers may not
>>> be reported correctly. I hope you don't mind too much. I consider the
>>> current patch still useful, because at least backtraces work, and it's
>>> an annoyance not to be able to get them.
>>>
>>
>> Thanks for the patch.  However, we should really add new target
>> descriptions/features that describe these registers to gdb
>> instead of looking them up by name.  Please see:
>>
>> https://sourceware.org/ml/gdb-patches/2015-12/msg00273.html
>>
Hmmmm, er, I'm pretty sure you're referring to the 
`user_reg_map_name_to_regnum` calls, but I'm not sure what the info in 
the link is supposed to tell me. This is the first time I play into the 
guts of `gdb` and am confronted by its inner complexity, though. I 
wasn't aware of "target features", haha :P

This is what I understand, please correct me if I'm wrong:

- When using OpenOCD, gdb gets a target description while communicating 
with the server, so it knows the other registers like "psp", "control", 
"fpscr" and other system registers exist. When using Qemu, gdb uses the 
"org.gnu.gdb.arm.m-profile" feature from binutils-gdb and the system 
registers are lacking from the XML file. This is the problem Christopher 
Friedt had in the thread;
- If the system registers were in "org.gnu.gdb.arm.m-profile", then we 
could rely on their presence and put them in the `gdb_regnum` enum in 
arch/arm.h and assign them fixed offsets, removing the need to get them 
by name;
- By finding registers by name, we're dependent of the supplied target 
description and even getting PSP (which is always available on any 
Cortex-M core BTW) can fail, and the code I provided will crash.

(I realized LR is accessible via `ARM_LR_REGNUM`. Ooops)

I'm not sure to follow, but it seems the inclusion of the system 
registers into "org.gnu.gdb.arm.m-profile" is still an open question. My 
opinion is to include them, because all Cortex-M have them. This can be 
seen in B1.4 of the ARMv6-M Reference Manual:

     The ARMv6-M profile has the following registers closely coupled to 
the processor:
     - General purpose registers R0-R12.
     - Two Stack Pointer registers, SP_main and SP_process. These are 
banked versions of SP, also described as R13.
     - The Link Register, LR also described as R14.
     - The Program Counter, PC, sometimes described as R15.
     - Status registers for flags, execution state bits, and the current 
exception number.
     - A mask register, PRIMASK, used to manage the prioritization 
scheme for exceptions and interrupts.
     - A control register, CONTROL that identifies the current stack.

(later, they define SP_main as synonymous to MSP, and likewise for 
SP_process with PSP)

Cortex-M0 and Cortex-M0+ are based on the ARMv6-M profile, and that 
profile is upward compatible with the ARMv7-M profile which is used in 
the Cortex-M3 and Cortex-M4(F): code compiled for Cortex-M0 can run on 
any Cortex-M. Meaning, ARMv6-M is the lowest common denominator of all 
Cortex-M. If the ARMv6-M architecture has these system registers, then 
all Cortex-M have them.

>> And see more in this earlier attempt at getting the unwinder working:
>>
>> https://sourceware.org/ml/gdb-patches/2014-09/msg00649.html
>>
>> Tristan also wrote yet another patch for the same, as mentioned at:
>>
>>   https://sourceware.org/ml/gdb-patches/2015-12/msg00281.html
>>
>> Tristan, did you ever manage to post that?
>
> Not yet.  But I have tested it with two different probes.
>
>> Lots of duplicated effort.  :-/  :-(
>
> Indeed.  But we know that the common part is correct!
>
> Tristan.
>
>

Well, technically the efforts are not entirely duplicated, for the scope 
of my patch is a bit broader, it deals with the FPU (found on 
Cortex-M4F), too. :)


James-Adam Renquinha Henri, Ing. jr
Ingénieur d'application
CIMEQ INC.
  
Pedro Alves April 22, 2016, 2:16 p.m. UTC | #7
On 04/21/2016 12:14 AM, James-Adam Renquinha Henri wrote:

> When using Qemu, gdb uses the
> "org.gnu.gdb.arm.m-profile" feature from binutils-gdb and the system
> registers are lacking from the XML file. This is the problem Christopher
> Friedt had in the thread;
> - If the system registers were in "org.gnu.gdb.arm.m-profile", then we
> could rely on their presence and put them in the `gdb_regnum` enum in
> arch/arm.h and assign them fixed offsets, removing the need to get them
> by name;
> - By finding registers by name, we're dependent of the supplied target
> description and even getting PSP (which is always available on any
> Cortex-M core BTW) can fail, and the code I provided will crash.

In general, looking up by name can return an unrelated register
that some stub decided to include as extra register in its target
description, that may not be the register GDB is looking for.
So registers that are necessary for correct operation should be
included in some standard target feature, so that GDB knows for
sure what it's getting.

> 
> (I realized LR is accessible via `ARM_LR_REGNUM`. Ooops)
> 
> I'm not sure to follow, but it seems the inclusion of the system
> registers into "org.gnu.gdb.arm.m-profile" is still an open question. My
> opinion is to include them, because all Cortex-M have them.

It may work to make them optional registers in the feature,
so that older targets that don't report them continue working.
Dunno, a new target feature for system registers may be better.


 This can be
> seen in B1.4 of the ARMv6-M Reference Manual:
> 
>     The ARMv6-M profile has the following registers closely coupled to
> the processor:
>     - General purpose registers R0-R12.
>     - Two Stack Pointer registers, SP_main and SP_process. These are
> banked versions of SP, also described as R13.
>     - The Link Register, LR also described as R14.
>     - The Program Counter, PC, sometimes described as R15.
>     - Status registers for flags, execution state bits, and the current
> exception number.
>     - A mask register, PRIMASK, used to manage the prioritization scheme
> for exceptions and interrupts.
>     - A control register, CONTROL that identifies the current stack.
> 
> (later, they define SP_main as synonymous to MSP, and likewise for
> SP_process with PSP)
> 
> Cortex-M0 and Cortex-M0+ are based on the ARMv6-M profile, and that
> profile is upward compatible with the ARMv7-M profile which is used in
> the Cortex-M3 and Cortex-M4(F): code compiled for Cortex-M0 can run on
> any Cortex-M. Meaning, ARMv6-M is the lowest common denominator of all
> Cortex-M. If the ARMv6-M architecture has these system registers, then
> all Cortex-M have them.

The question is whether on a system with some sort of
userspace / kernel separation, such registers would be exposed to
userspace.  E.g., on x86 Linux, ptrace does not expose privileged
registers, so the core x86 target feature does not include any.

See bottom half of:

 https://sourceware.org/ml/gdb-patches/2015-12/msg00273.html

(The suggestion in the top halve was assuming gdb didn't have
to know anything about these registers, which turned out to
be incorrect.)

Thanks,
Pedro Alves
  

Patch

diff --git a/gdb/arm-tdep.c b/gdb/arm-tdep.c
index 0412f71..8f342c1 100644
--- a/gdb/arm-tdep.c
+++ b/gdb/arm-tdep.c
@@ -470,8 +470,10 @@  arm_addr_bits_remove (struct gdbarch *gdbarch, CORE_ADDR val)
 {
   /* On M-profile devices, do not strip the low bit from EXC_RETURN
      (the magic exception return address).  */
+ /* NOTE: 0xf0000000 is the EXC_RETURN pattern, according to B1.5.8 of the
+    ARMv7-M Reference Manual.  */
   if (gdbarch_tdep (gdbarch)->is_m
-      && (val & 0xfffffff0) == 0xfffffff0)
+      && (val & 0xf0000000) == 0xf0000000)
     return val;
 
   if (arm_apcs_32)
@@ -2907,13 +2909,31 @@  arm_m_exception_cache (struct frame_info *this_frame)
   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
   struct arm_prologue_cache *cache;
   CORE_ADDR unwound_sp;
+  CORE_ADDR this_lr;
   LONGEST xpsr;
+  int main_stack_used;
+  int extended_frame_type;
+  int stack_regnum;
 
   cache = FRAME_OBSTACK_ZALLOC (struct arm_prologue_cache);
   cache->saved_regs = trad_frame_alloc_saved_regs (this_frame);
 
-  unwound_sp = get_frame_register_unsigned (this_frame,
-					    ARM_SP_REGNUM);
+  /* We need LR to know: 1- if the FPU was used, 2- which stack was used.
+     "B1.5.6 Exception entry behavior" in ARMv7-M Architecture Reference
+     Manual Issue D (or the last one) gives the various bits in LR
+     involved in this. NOTE: this LR is different of the stacked one.  */
+  this_lr
+    = get_frame_register_unsigned (this_frame,
+				   user_reg_map_name_to_regnum (gdbarch,
+								"lr",
+								-1));
+  main_stack_used = (this_lr & 0xf) != 0xd;
+  extended_frame_type = (this_lr & (1 << 4)) == 0;
+  stack_regnum = user_reg_map_name_to_regnum (gdbarch,
+					      main_stack_used ? "sp" : "psp",
+					      -1);
+
+  unwound_sp = get_frame_register_unsigned (this_frame, stack_regnum);
 
   /* The hardware saves eight 32-bit words, comprising xPSR,
      ReturnAddress, LR (R14), R12, R3, R2, R1, R0.  See details in
@@ -2928,10 +2948,47 @@  arm_m_exception_cache (struct frame_info *this_frame)
   cache->saved_regs[15].addr = unwound_sp + 24;
   cache->saved_regs[ARM_PS_REGNUM].addr = unwound_sp + 28;
 
+  if (extended_frame_type)
+    {
+      int s0_offset;
+      int fpscr_offset;
+
+      s0_offset = user_reg_map_name_to_regnum (gdbarch, "s0", -1);
+      fpscr_offset = user_reg_map_name_to_regnum (gdbarch, "fpscr", -1);
+
+      if (s0_offset == -1 || fpscr_offset == -1)
+	{
+	  /* Ooops. */
+	  warning (_("can't get register offsets in cache; "
+		     "fpu info may be wrong"));
+	}
+      else
+	{
+	  int i;
+	  int fpu_reg_offset;
+
+	  fpu_reg_offset = unwound_sp + 0x20;
+
+	  /* XXX: This doesn't take into account the lazy stacking, see "Lazy
+	     context save of FP state", in B1.5.7.  */
+	  for (i = 0; i < 16; ++i, fpu_reg_offset += 4)
+	    {
+	      cache->saved_regs[s0_offset + i].addr = fpu_reg_offset;
+	    }
+	  cache->saved_regs[fpscr_offset].addr = unwound_sp + 0x60;
+	}
+
+	/* Offset 0x64 is reserved */
+	cache->prev_sp = unwound_sp + 0x68;
+    }
+  else
+    {
+      cache->prev_sp = unwound_sp + 32;
+    }
+
   /* If bit 9 of the saved xPSR is set, then there is a four-byte
      aligner between the top of the 32-byte stack frame and the
      previous context's stack pointer.  */
-  cache->prev_sp = unwound_sp + 32;
   if (safe_read_memory_integer (unwound_sp + 28, 4, byte_order, &xpsr)
       && (xpsr & (1 << 9)) != 0)
     cache->prev_sp += 4;
@@ -2997,11 +3054,19 @@  arm_m_exception_unwind_sniffer (const struct frame_unwind *self,
   /* Exception frames return to one of these magic PCs.  Other values
      are not defined as of v7-M.  See details in "B1.5.8 Exception
      return behavior" in "ARMv7-M Architecture Reference Manual".  */
-  if (this_pc == 0xfffffff1 || this_pc == 0xfffffff9
-      || this_pc == 0xfffffffd)
-    return 1;
+  switch (this_pc)
+    {
+      case 0xffffffe1:
+      case 0xffffffe9:
+      case 0xffffffed:
+      case 0xfffffff1:
+      case 0xfffffff9:
+      case 0xfffffffd:
+        return 1;
 
-  return 0;
+      default:
+        return 0;
+    }
 }
 
 /* Frame unwinder for M-profile exceptions.  */