[2/2] Fix gdb.mi/mi-stack.exp when gcc generates a stack protector

Message ID 20171216145651.13936-2-simon.marchi@polymtl.ca
State New, archived
Headers

Commit Message

Simon Marchi Dec. 16, 2017, 2:56 p.m. UTC
  I see some failures in the gdb.mi/mi-stack.exp test.  The test runs to
the callee4 function:

  int callee4 (void)
  {
    int A=1;
    int B=2;
    int C;
    int D[3] = {0, 1, 2};

    C = A + B;
    return 0;
  }

and expects to be stopped at the A=1 line.  However, when gcc generates
some stack protection code, it will stop at the { instead, as shown by
this disassembly (after I did "break callee4" and "run"):

  (gdb) disassemble /s
  Dump of assembler code for function callee4:
  /home/simark/src/binutils-gdb/gdb/testsuite/gdb.mi/mi-stack.c:
  26	{
     0x00005555555546ca <+0>:	push   %rbp
     0x00005555555546cb <+1>:	mov    %rsp,%rbp
     0x00005555555546ce <+4>:	sub    $0x20,%rsp
  => 0x00005555555546d2 <+8>:	mov    %fs:0x28,%rax
     0x00005555555546db <+17>:	mov    %rax,-0x8(%rbp)
     0x00005555555546df <+21>:	xor    %eax,%eax

  27	  int A=1; /* callee4 begin */
     0x00005555555546e1 <+23>:	movl   $0x1,-0x20(%rbp)

  28	  int B=2;
     0x00005555555546e8 <+30>:	movl   $0x2,-0x1c(%rbp)

The rest of the test relies on execution stopping on the A=1, so many things
fail after that.  This patch uses mi_continue_to_line instead, to stop at the
A=1 line precisely.

gdb/testsuite/ChangeLog:

	* gdb.mi/mi-stack.exp (test_stack_frame_listing): Use
	mi_continue_to_line.
	* gdb.mi/mi-stack.exp (callee4): Add comment.
---
 gdb/testsuite/gdb.mi/mi-stack.c   | 2 +-
 gdb/testsuite/gdb.mi/mi-stack.exp | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)
  

Comments

Yao Qi Jan. 2, 2018, 10:38 a.m. UTC | #1
On Sat, Dec 16, 2017 at 2:56 PM, Simon Marchi <simon.marchi@polymtl.ca> wrote:
> I see some failures in the gdb.mi/mi-stack.exp test.  The test runs to
> the callee4 function:
>
>   int callee4 (void)
>   {
>     int A=1;
>     int B=2;
>     int C;
>     int D[3] = {0, 1, 2};
>
>     C = A + B;
>     return 0;
>   }
>
> and expects to be stopped at the A=1 line.  However, when gcc generates
> some stack protection code, it will stop at the { instead, as shown by
> this disassembly (after I did "break callee4" and "run"):

Can't we fix GDB to skip these stack protection code?
  
Simon Marchi Jan. 2, 2018, 6:14 p.m. UTC | #2
On 2018-01-02 05:38, Yao Qi wrote:
> On Sat, Dec 16, 2017 at 2:56 PM, Simon Marchi <simon.marchi@polymtl.ca> 
> wrote:
>> I see some failures in the gdb.mi/mi-stack.exp test.  The test runs to
>> the callee4 function:
>> 
>>   int callee4 (void)
>>   {
>>     int A=1;
>>     int B=2;
>>     int C;
>>     int D[3] = {0, 1, 2};
>> 
>>     C = A + B;
>>     return 0;
>>   }
>> 
>> and expects to be stopped at the A=1 line.  However, when gcc 
>> generates
>> some stack protection code, it will stop at the { instead, as shown by
>> this disassembly (after I did "break callee4" and "run"):
> 
> Can't we fix GDB to skip these stack protection code?

I think it would be desirable to consider the stack protection code as 
part of the prologue, since it's compiler-generated and of little 
interest to the user.  But I don't know how to do it without breaking 
existing behavior.

Our heuristic, when using SaL to skip prologue, is to consider the first 
linetable entry to represent the prologue.  If we find a consecutive 
entry with the same line number, we assume it's the prologue -> body 
transition (because otherwise there would be no point in having a 
separate entry).  When adding a stack protector, gcc puts it in a 
separate linetable entry, as if it was user code, so GDB thinks it's the 
beginning of the body.

Let's take this small example:

1  int main()
2  {
3    int n = 0;
4    n++;
5    return n;
6  }

Which compiles to this with -fstack-protector-all:

    0x0000000000400546 <+0>:     push   %rbp
    0x0000000000400547 <+1>:     mov    %rsp,%rbp
    0x000000000040054a <+4>:     sub    $0x10,%rsp
    0x000000000040054e <+8>:     mov    %fs:0x28,%rax
    0x0000000000400557 <+17>:    mov    %rax,-0x8(%rbp)
    0x000000000040055b <+21>:    xor    %eax,%eax
    0x000000000040055d <+23>:    movl   $0x0,-0xc(%rbp)
    0x0000000000400564 <+30>:    addl   $0x1,-0xc(%rbp)
    0x0000000000400568 <+34>:    mov    -0xc(%rbp),%eax
    0x000000000040056b <+37>:    mov    -0x8(%rbp),%rdx
    0x000000000040056f <+41>:    xor    %fs:0x28,%rdx
    0x0000000000400578 <+50>:    je     0x40057f <main+57>
    0x000000000040057a <+52>:    callq  0x400420 <__stack_chk_fail@plt>
    0x000000000040057f <+57>:    leaveq
    0x0000000000400580 <+58>:    retq

test.c                                         2            0x400546
test.c                                         2            0x40054e
test.c                                         3            0x40055d
test.c                                         4            0x400564
test.c                                         5            0x400568
test.c                                         6            0x40056b

GDB currently assumes that the second entry is the beginning of the 
body.  But ideally we would treat the first two entries as the prologue, 
and put our breakpoint on line 3/0x40055d.

And then let's look at this modified example, where the first line of 
code is on the same line as the opening curly bracket, and compiled 
without stack protection (-fno-stack-protector):

1  int main()
2  { int n = 0;
3    n++;
4    return n;
5  }


    0x00000000004004d6 <+0>:     push   %rbp
    0x00000000004004d7 <+1>:     mov    %rsp,%rbp
    0x00000000004004da <+4>:     movl   $0x0,-0x4(%rbp)
    0x00000000004004e1 <+11>:    addl   $0x1,-0x4(%rbp)
    0x00000000004004e5 <+15>:    mov    -0x4(%rbp),%eax
    0x00000000004004e8 <+18>:    pop    %rbp
    0x00000000004004e9 <+19>:    retq

test.c                                         2            0x4004d6
test.c                                         2            0x4004da
test.c                                         3            0x4004e1
test.c                                         4            0x4004e5
test.c                                         5            0x4004e8

We have a similar line table as the previous example (same source line, 
different address), but in this case the second entry at line 2 is 
really the start of user code.  We would want to put our breakpoint at 
line 2/0x4004da.  So, how do we differentiate these two cases?

When skipping prologue without DWARF info, we could always recognize the 
pattern of instructions.  But when skipping the prologue using SAL, we 
don't look at the instructions, we only rely on DWARF, and I think it 
should stay that way.  If we need more information, then the DWARF info 
needs to be improved.  Are you aware of any other information that is 
currently present that could help us?

There exists a DWARF linetable opcode that indicates the end of prologue 
(DW_LNS_set_prologue_end).  Do you know why GCC doesn't use it?

Thanks,

Simon
  
Yao Qi Jan. 3, 2018, 9:53 p.m. UTC | #3
On Tue, Jan 2, 2018 at 6:14 PM, Simon Marchi <simon.marchi@polymtl.ca> wrote:
> On 2018-01-02 05:38, Yao Qi wrote:
>> Can't we fix GDB to skip these stack protection code?
>
>
> I think it would be desirable to consider the stack protection code as part
> of the prologue, since it's compiler-generated and of little interest to the
> user.  But I don't know how to do it without breaking existing behavior.
>

Yes, we can skip them as part of skipping prologue.

> Our heuristic, when using SaL to skip prologue, is to consider the first
> linetable entry to represent the prologue.  If we find a consecutive entry
> with the same line number, we assume it's the prologue -> body transition
> (because otherwise there would be no point in having a separate entry).
> When adding a stack protector, gcc puts it in a separate linetable entry, as
> if it was user code, so GDB thinks it's the beginning of the body.
>
> Let's take this small example:
>
> 1  int main()
> 2  {
> 3    int n = 0;
> 4    n++;
> 5    return n;
> 6  }
>
> Which compiles to this with -fstack-protector-all:
>
>    0x0000000000400546 <+0>:     push   %rbp
>    0x0000000000400547 <+1>:     mov    %rsp,%rbp
>    0x000000000040054a <+4>:     sub    $0x10,%rsp
>    0x000000000040054e <+8>:     mov    %fs:0x28,%rax
>    0x0000000000400557 <+17>:    mov    %rax,-0x8(%rbp)
>    0x000000000040055b <+21>:    xor    %eax,%eax
>    0x000000000040055d <+23>:    movl   $0x0,-0xc(%rbp)
>    0x0000000000400564 <+30>:    addl   $0x1,-0xc(%rbp)
>    0x0000000000400568 <+34>:    mov    -0xc(%rbp),%eax
>    0x000000000040056b <+37>:    mov    -0x8(%rbp),%rdx
>    0x000000000040056f <+41>:    xor    %fs:0x28,%rdx
>    0x0000000000400578 <+50>:    je     0x40057f <main+57>
>    0x000000000040057a <+52>:    callq  0x400420 <__stack_chk_fail@plt>
>    0x000000000040057f <+57>:    leaveq
>    0x0000000000400580 <+58>:    retq
>
> test.c                                         2            0x400546
> test.c                                         2            0x40054e
> test.c                                         3            0x40055d
> test.c                                         4            0x400564
> test.c                                         5            0x400568
> test.c                                         6            0x40056b
>
> GDB currently assumes that the second entry is the beginning of the body.
> But ideally we would treat the first two entries as the prologue, and put
> our breakpoint on line 3/0x40055d.
>
> And then let's look at this modified example, where the first line of code
> is on the same line as the opening curly bracket, and compiled without stack
> protection (-fno-stack-protector):
>
> 1  int main()
> 2  { int n = 0;
> 3    n++;
> 4    return n;
> 5  }
>
>
>    0x00000000004004d6 <+0>:     push   %rbp
>    0x00000000004004d7 <+1>:     mov    %rsp,%rbp
>    0x00000000004004da <+4>:     movl   $0x0,-0x4(%rbp)
>    0x00000000004004e1 <+11>:    addl   $0x1,-0x4(%rbp)
>    0x00000000004004e5 <+15>:    mov    -0x4(%rbp),%eax
>    0x00000000004004e8 <+18>:    pop    %rbp
>    0x00000000004004e9 <+19>:    retq
>
> test.c                                         2            0x4004d6
> test.c                                         2            0x4004da
> test.c                                         3            0x4004e1
> test.c                                         4            0x4004e5
> test.c                                         5            0x4004e8
>
> We have a similar line table as the previous example (same source line,
> different address), but in this case the second entry at line 2 is really
> the start of user code.  We would want to put our breakpoint at line
> 2/0x4004da.  So, how do we differentiate these two cases?
>

When GDB sets breakpoint, it calls gdbarch_skip_prologue_noexcept
to skip prologue, amd64 backend doesn't use SAL to identify the end
of prologue unless compiler is clang (see amd64_skip_prologue).
Instead, GDB scans prologue to find the end of prologue, so we can
extend amd64 prologue analyzer to understand these instructions
for stack protection.

(gdb) b callee4

Thread 1 "gdb" hit Breakpoint 1, amd64_analyze_prologue
(gdbarch=gdbarch@entry=0x154ef60, pc=pc@entry=4195734,
current_pc=current_pc@entry=18446744073709551615,
    cache=cache@entry=0x7fffffffd1e0) at
../../binutils-gdb/gdb/amd64-tdep.c:2319
2319    {
(gdb) bt 10
#0  amd64_analyze_prologue (gdbarch=gdbarch@entry=0x154ef60,
pc=pc@entry=4195734, current_pc=current_pc@entry=18446744073709551615,
cache=cache@entry=0x7fffffffd1e0)
    at ../../binutils-gdb/gdb/amd64-tdep.c:2319
#1  0x0000000000428b8c in amd64_skip_prologue (gdbarch=0x154ef60,
start_pc=4195734) at ../../binutils-gdb/gdb/amd64-tdep.c:2488
#2  0x0000000000515363 in gdbarch_skip_prologue_noexcept
(gdbarch=gdbarch@entry=0x154ef60, pc=pc@entry=4195734) at
../../binutils-gdb/gdb/arch-utils.c:970
#3  0x0000000000692b03 in skip_prologue_sal
(sal=sal@entry=0x7fffffffd4d0) at ../../binutils-gdb/gdb/symtab.c:3721
#4  0x0000000000692e02 in find_function_start_sal
(sym=sym@entry=0x158e8b0, funfirstline=1) at
../../binutils-gdb/gdb/symtab.c:3594
#5  0x00000000005fe0dd in symbol_to_sal
(result=result@entry=0x7fffffffd6d0, funfirstline=<optimized out>,
sym=sym@entry=0x158e8b0)
    at ../../binutils-gdb/gdb/linespec.c:4611

We did something similar in arm-tdep.c, search "__stack_chk_guard".
However, I am not sure we can find a "fingerprint" of these stack projection
instructions on amd64.
  
Simon Marchi Jan. 3, 2018, 10:39 p.m. UTC | #4
On 2018-01-03 16:53, Yao Qi wrote:
> On Tue, Jan 2, 2018 at 6:14 PM, Simon Marchi <simon.marchi@polymtl.ca> 
> wrote:
>> On 2018-01-02 05:38, Yao Qi wrote:
>>> Can't we fix GDB to skip these stack protection code?
>> 
>> 
>> I think it would be desirable to consider the stack protection code as 
>> part
>> of the prologue, since it's compiler-generated and of little interest 
>> to the
>> user.  But I don't know how to do it without breaking existing 
>> behavior.
>> 
> 
> Yes, we can skip them as part of skipping prologue.
> 
>> Our heuristic, when using SaL to skip prologue, is to consider the 
>> first
>> linetable entry to represent the prologue.  If we find a consecutive 
>> entry
>> with the same line number, we assume it's the prologue -> body 
>> transition
>> (because otherwise there would be no point in having a separate 
>> entry).
>> When adding a stack protector, gcc puts it in a separate linetable 
>> entry, as
>> if it was user code, so GDB thinks it's the beginning of the body.
>> 
>> Let's take this small example:
>> 
>> 1  int main()
>> 2  {
>> 3    int n = 0;
>> 4    n++;
>> 5    return n;
>> 6  }
>> 
>> Which compiles to this with -fstack-protector-all:
>> 
>>    0x0000000000400546 <+0>:     push   %rbp
>>    0x0000000000400547 <+1>:     mov    %rsp,%rbp
>>    0x000000000040054a <+4>:     sub    $0x10,%rsp
>>    0x000000000040054e <+8>:     mov    %fs:0x28,%rax
>>    0x0000000000400557 <+17>:    mov    %rax,-0x8(%rbp)
>>    0x000000000040055b <+21>:    xor    %eax,%eax
>>    0x000000000040055d <+23>:    movl   $0x0,-0xc(%rbp)
>>    0x0000000000400564 <+30>:    addl   $0x1,-0xc(%rbp)
>>    0x0000000000400568 <+34>:    mov    -0xc(%rbp),%eax
>>    0x000000000040056b <+37>:    mov    -0x8(%rbp),%rdx
>>    0x000000000040056f <+41>:    xor    %fs:0x28,%rdx
>>    0x0000000000400578 <+50>:    je     0x40057f <main+57>
>>    0x000000000040057a <+52>:    callq  0x400420 <__stack_chk_fail@plt>
>>    0x000000000040057f <+57>:    leaveq
>>    0x0000000000400580 <+58>:    retq
>> 
>> test.c                                         2            0x400546
>> test.c                                         2            0x40054e
>> test.c                                         3            0x40055d
>> test.c                                         4            0x400564
>> test.c                                         5            0x400568
>> test.c                                         6            0x40056b
>> 
>> GDB currently assumes that the second entry is the beginning of the 
>> body.
>> But ideally we would treat the first two entries as the prologue, and 
>> put
>> our breakpoint on line 3/0x40055d.
>> 
>> And then let's look at this modified example, where the first line of 
>> code
>> is on the same line as the opening curly bracket, and compiled without 
>> stack
>> protection (-fno-stack-protector):
>> 
>> 1  int main()
>> 2  { int n = 0;
>> 3    n++;
>> 4    return n;
>> 5  }
>> 
>> 
>>    0x00000000004004d6 <+0>:     push   %rbp
>>    0x00000000004004d7 <+1>:     mov    %rsp,%rbp
>>    0x00000000004004da <+4>:     movl   $0x0,-0x4(%rbp)
>>    0x00000000004004e1 <+11>:    addl   $0x1,-0x4(%rbp)
>>    0x00000000004004e5 <+15>:    mov    -0x4(%rbp),%eax
>>    0x00000000004004e8 <+18>:    pop    %rbp
>>    0x00000000004004e9 <+19>:    retq
>> 
>> test.c                                         2            0x4004d6
>> test.c                                         2            0x4004da
>> test.c                                         3            0x4004e1
>> test.c                                         4            0x4004e5
>> test.c                                         5            0x4004e8
>> 
>> We have a similar line table as the previous example (same source 
>> line,
>> different address), but in this case the second entry at line 2 is 
>> really
>> the start of user code.  We would want to put our breakpoint at line
>> 2/0x4004da.  So, how do we differentiate these two cases?
>> 
> 
> When GDB sets breakpoint, it calls gdbarch_skip_prologue_noexcept
> to skip prologue, amd64 backend doesn't use SAL to identify the end
> of prologue unless compiler is clang (see amd64_skip_prologue).
> Instead, GDB scans prologue to find the end of prologue, so we can
> extend amd64 prologue analyzer to understand these instructions
> for stack protection.

Ahh ok, amd64_skip_prologue calls skip_prologue_using_sal, but I didn't 
see the result was only used for clang!  So I was stepping in 
skip_prologue_using_sal all this time for nothing :)

> (gdb) b callee4
> 
> Thread 1 "gdb" hit Breakpoint 1, amd64_analyze_prologue
> (gdbarch=gdbarch@entry=0x154ef60, pc=pc@entry=4195734,
> current_pc=current_pc@entry=18446744073709551615,
>     cache=cache@entry=0x7fffffffd1e0) at
> ../../binutils-gdb/gdb/amd64-tdep.c:2319
> 2319    {
> (gdb) bt 10
> #0  amd64_analyze_prologue (gdbarch=gdbarch@entry=0x154ef60,
> pc=pc@entry=4195734, current_pc=current_pc@entry=18446744073709551615,
> cache=cache@entry=0x7fffffffd1e0)
>     at ../../binutils-gdb/gdb/amd64-tdep.c:2319
> #1  0x0000000000428b8c in amd64_skip_prologue (gdbarch=0x154ef60,
> start_pc=4195734) at ../../binutils-gdb/gdb/amd64-tdep.c:2488
> #2  0x0000000000515363 in gdbarch_skip_prologue_noexcept
> (gdbarch=gdbarch@entry=0x154ef60, pc=pc@entry=4195734) at
> ../../binutils-gdb/gdb/arch-utils.c:970
> #3  0x0000000000692b03 in skip_prologue_sal
> (sal=sal@entry=0x7fffffffd4d0) at ../../binutils-gdb/gdb/symtab.c:3721
> #4  0x0000000000692e02 in find_function_start_sal
> (sym=sym@entry=0x158e8b0, funfirstline=1) at
> ../../binutils-gdb/gdb/symtab.c:3594
> #5  0x00000000005fe0dd in symbol_to_sal
> (result=result@entry=0x7fffffffd6d0, funfirstline=<optimized out>,
> sym=sym@entry=0x158e8b0)
>     at ../../binutils-gdb/gdb/linespec.c:4611
> 
> We did something similar in arm-tdep.c, search "__stack_chk_guard".
> However, I am not sure we can find a "fingerprint" of these stack 
> projection
> instructions on amd64.

Thanks for the pointers, I'll take a look.

Simon
  
Simon Marchi Jan. 4, 2018, 8:09 p.m. UTC | #5
On 2018-01-03 17:39, Simon Marchi wrote:
>> Thread 1 "gdb" hit Breakpoint 1, amd64_analyze_prologue
>> (gdbarch=gdbarch@entry=0x154ef60, pc=pc@entry=4195734,
>> current_pc=current_pc@entry=18446744073709551615,
>>     cache=cache@entry=0x7fffffffd1e0) at
>> ../../binutils-gdb/gdb/amd64-tdep.c:2319
>> 2319    {
>> (gdb) bt 10
>> #0  amd64_analyze_prologue (gdbarch=gdbarch@entry=0x154ef60,
>> pc=pc@entry=4195734, current_pc=current_pc@entry=18446744073709551615,
>> cache=cache@entry=0x7fffffffd1e0)
>>     at ../../binutils-gdb/gdb/amd64-tdep.c:2319
>> #1  0x0000000000428b8c in amd64_skip_prologue (gdbarch=0x154ef60,
>> start_pc=4195734) at ../../binutils-gdb/gdb/amd64-tdep.c:2488
>> #2  0x0000000000515363 in gdbarch_skip_prologue_noexcept
>> (gdbarch=gdbarch@entry=0x154ef60, pc=pc@entry=4195734) at
>> ../../binutils-gdb/gdb/arch-utils.c:970
>> #3  0x0000000000692b03 in skip_prologue_sal
>> (sal=sal@entry=0x7fffffffd4d0) at ../../binutils-gdb/gdb/symtab.c:3721
>> #4  0x0000000000692e02 in find_function_start_sal
>> (sym=sym@entry=0x158e8b0, funfirstline=1) at
>> ../../binutils-gdb/gdb/symtab.c:3594
>> #5  0x00000000005fe0dd in symbol_to_sal
>> (result=result@entry=0x7fffffffd6d0, funfirstline=<optimized out>,
>> sym=sym@entry=0x158e8b0)
>>     at ../../binutils-gdb/gdb/linespec.c:4611
>> 
>> We did something similar in arm-tdep.c, search "__stack_chk_guard".
>> However, I am not sure we can find a "fingerprint" of these stack 
>> projection
>> instructions on amd64.
> 
> Thanks for the pointers, I'll take a look.
> 
> Simon

Hi Yao,

The instructions to put the stack check guard seem recognizable enough, 
it's always

    0x000000000040057a <+52>:    mov    %fs:0x28,%rax
    0x0000000000400583 <+61>:    mov    %rax,-0x8(%rbp)
    0x0000000000400587 <+65>:    xor    %eax,%eax

with maybe the offset in the second mov changing.  The problem is that 
there is all kinds of things we currently don't recognize that could be 
in the prologue before that.  For example, a function with many 
parameters will have this (when built with -O0):

    0x0000000000400546 <+0>:     push   %rbp
    0x0000000000400547 <+1>:     mov    %rsp,%rbp
    0x000000000040054a <+4>:     sub    $0x40,%rsp
    0x000000000040054e <+8>:     mov    %edi,-0x14(%rbp)
    0x0000000000400551 <+11>:    mov    %esi,-0x18(%rbp)
    0x0000000000400554 <+14>:    mov    %edx,-0x1c(%rbp)
    0x0000000000400557 <+17>:    mov    %ecx,-0x20(%rbp)
    0x000000000040055a <+20>:    mov    %r8d,-0x24(%rbp)
    0x000000000040055e <+24>:    mov    %r9d,-0x28(%rbp)
    0x0000000000400562 <+28>:    mov    0x10(%rbp),%eax
    0x0000000000400565 <+31>:    mov    %eax,-0x2c(%rbp)
    0x0000000000400568 <+34>:    mov    0x18(%rbp),%eax
    0x000000000040056b <+37>:    mov    %eax,-0x30(%rbp)
    0x000000000040056e <+40>:    mov    0x20(%rbp),%eax
    0x0000000000400571 <+43>:    mov    %eax,-0x34(%rbp)
    0x0000000000400574 <+46>:    mov    0x28(%rbp),%eax
    0x0000000000400577 <+49>:    mov    %eax,-0x38(%rbp)
    --- end of prologue currently ---
    0x000000000040057a <+52>:    mov    %fs:0x28,%rax
    0x0000000000400583 <+61>:    mov    %rax,-0x8(%rbp)
    0x0000000000400587 <+65>:    xor    %eax,%eax
    --- end of prologue ideally ---
    0x0000000000400589 <+67>:    mov    -0x14(%rbp),%edx
    0x000000000040058c <+70>:    mov    -0x18(%rbp),%eax
    0x000000000040058f <+73>:    add    %eax,%edx
    0x0000000000400591 <+75>:    mov    -0x1c(%rbp),%eax
    0x0000000000400594 <+78>:    add    %eax,%edx

Currently, everything in [0x400546,0x40057a[ covered by a single line 
statement (it's what we consider as the prologue today).  In 
amd64_skip_prologue, we move the PC past the "push %rbp" and "mov 
%rsp,%rbp".  The generic code then notices that we left pc in the middle 
of a line, so it moves it to the start of the next line, at 0x40057a, 
effectively moving past the prologue.  If we wanted to skip the stack 
check as well, we would have to recognize all the movs that copy the 
arguments on the stack, and then recognize the stack check guard setup.  
But even if we do it, we'll have a problem when the first line of code 
is on the same line as the opening curly bracket like this:

int foo(int i, int j, int k, int l, int m, int n, int o, int p, int q, 
int r)
{       int x = i + j + k + l + m + n + o + p + q + r;
         x++;
         return x;
}

The line statements are:

CU: ./test.c:
File name                            Line number    Starting address
test.c                                         2            0x400546
test.c                                         2            0x40057a
test.c                                         3            0x4005bc
test.c                                         4            0x4005c0
test.c                                         5            0x4005c3
test.c                                         8            0x4005d9
test.c                                         8            0x4005e1
test.c                                         9            0x4005f7
test.c                                        10            0x400626

If we manage to recognize instructions and push the PC to after the 
stack check guard setup (0x400589), the generic code will notice that we 
are in the middle of a line, and skip to 0x4005bc, which is then too 
far, as it would skip some user code.  The problem is really that there 
is a single line statement covering both the stack check guard and the 
user code.

So I think that gcc should do one of these:

1. Include the stack check guard setup code in the prologue line 
statement:

test.c                                         2            0x400546
test.c                                         2            0x400589
test.c                                         3            0x4005bc
...

GDB wouldn't have to do anything more than it does today.

2. Have a line statement only for the stack check guard setup, separate 
from the user code:

test.c                                         2            0x400546
test.c                                         2            0x40057a
test.c                                         2            0x400589
test.c                                         3            0x4005bc

In that case, GDB would have to do a bit more than it does today, but at 
least we would be able to find the stack "check guard -> user code" 
transition.

Idea #1 seems better to me, because it's easier for us :) and existing 
GDBs would do the right thing with newer GCCs.

Does that make sense, or am I missing something?

Thanks,

Simon
  
Simon Marchi April 7, 2018, 6:14 p.m. UTC | #6
On 2018-01-03 04:53 PM, Yao Qi wrote:
> On Tue, Jan 2, 2018 at 6:14 PM, Simon Marchi <simon.marchi@polymtl.ca> wrote:
>> On 2018-01-02 05:38, Yao Qi wrote:
>>> Can't we fix GDB to skip these stack protection code?
>>
>>
>> I think it would be desirable to consider the stack protection code as part
>> of the prologue, since it's compiler-generated and of little interest to the
>> user.  But I don't know how to do it without breaking existing behavior.
>>
> 
> Yes, we can skip them as part of skipping prologue.
> 
>> Our heuristic, when using SaL to skip prologue, is to consider the first
>> linetable entry to represent the prologue.  If we find a consecutive entry
>> with the same line number, we assume it's the prologue -> body transition
>> (because otherwise there would be no point in having a separate entry).
>> When adding a stack protector, gcc puts it in a separate linetable entry, as
>> if it was user code, so GDB thinks it's the beginning of the body.
>>
>> Let's take this small example:
>>
>> 1  int main()
>> 2  {
>> 3    int n = 0;
>> 4    n++;
>> 5    return n;
>> 6  }
>>
>> Which compiles to this with -fstack-protector-all:
>>
>>    0x0000000000400546 <+0>:     push   %rbp
>>    0x0000000000400547 <+1>:     mov    %rsp,%rbp
>>    0x000000000040054a <+4>:     sub    $0x10,%rsp
>>    0x000000000040054e <+8>:     mov    %fs:0x28,%rax
>>    0x0000000000400557 <+17>:    mov    %rax,-0x8(%rbp)
>>    0x000000000040055b <+21>:    xor    %eax,%eax
>>    0x000000000040055d <+23>:    movl   $0x0,-0xc(%rbp)
>>    0x0000000000400564 <+30>:    addl   $0x1,-0xc(%rbp)
>>    0x0000000000400568 <+34>:    mov    -0xc(%rbp),%eax
>>    0x000000000040056b <+37>:    mov    -0x8(%rbp),%rdx
>>    0x000000000040056f <+41>:    xor    %fs:0x28,%rdx
>>    0x0000000000400578 <+50>:    je     0x40057f <main+57>
>>    0x000000000040057a <+52>:    callq  0x400420 <__stack_chk_fail@plt>
>>    0x000000000040057f <+57>:    leaveq
>>    0x0000000000400580 <+58>:    retq
>>
>> test.c                                         2            0x400546
>> test.c                                         2            0x40054e
>> test.c                                         3            0x40055d
>> test.c                                         4            0x400564
>> test.c                                         5            0x400568
>> test.c                                         6            0x40056b
>>
>> GDB currently assumes that the second entry is the beginning of the body.
>> But ideally we would treat the first two entries as the prologue, and put
>> our breakpoint on line 3/0x40055d.
>>
>> And then let's look at this modified example, where the first line of code
>> is on the same line as the opening curly bracket, and compiled without stack
>> protection (-fno-stack-protector):
>>
>> 1  int main()
>> 2  { int n = 0;
>> 3    n++;
>> 4    return n;
>> 5  }
>>
>>
>>    0x00000000004004d6 <+0>:     push   %rbp
>>    0x00000000004004d7 <+1>:     mov    %rsp,%rbp
>>    0x00000000004004da <+4>:     movl   $0x0,-0x4(%rbp)
>>    0x00000000004004e1 <+11>:    addl   $0x1,-0x4(%rbp)
>>    0x00000000004004e5 <+15>:    mov    -0x4(%rbp),%eax
>>    0x00000000004004e8 <+18>:    pop    %rbp
>>    0x00000000004004e9 <+19>:    retq
>>
>> test.c                                         2            0x4004d6
>> test.c                                         2            0x4004da
>> test.c                                         3            0x4004e1
>> test.c                                         4            0x4004e5
>> test.c                                         5            0x4004e8
>>
>> We have a similar line table as the previous example (same source line,
>> different address), but in this case the second entry at line 2 is really
>> the start of user code.  We would want to put our breakpoint at line
>> 2/0x4004da.  So, how do we differentiate these two cases?
>>
> 
> When GDB sets breakpoint, it calls gdbarch_skip_prologue_noexcept
> to skip prologue, amd64 backend doesn't use SAL to identify the end
> of prologue unless compiler is clang (see amd64_skip_prologue).
> Instead, GDB scans prologue to find the end of prologue, so we can
> extend amd64 prologue analyzer to understand these instructions
> for stack protection.
> 
> (gdb) b callee4
> 
> Thread 1 "gdb" hit Breakpoint 1, amd64_analyze_prologue
> (gdbarch=gdbarch@entry=0x154ef60, pc=pc@entry=4195734,
> current_pc=current_pc@entry=18446744073709551615,
>     cache=cache@entry=0x7fffffffd1e0) at
> ../../binutils-gdb/gdb/amd64-tdep.c:2319
> 2319    {
> (gdb) bt 10
> #0  amd64_analyze_prologue (gdbarch=gdbarch@entry=0x154ef60,
> pc=pc@entry=4195734, current_pc=current_pc@entry=18446744073709551615,
> cache=cache@entry=0x7fffffffd1e0)
>     at ../../binutils-gdb/gdb/amd64-tdep.c:2319
> #1  0x0000000000428b8c in amd64_skip_prologue (gdbarch=0x154ef60,
> start_pc=4195734) at ../../binutils-gdb/gdb/amd64-tdep.c:2488
> #2  0x0000000000515363 in gdbarch_skip_prologue_noexcept
> (gdbarch=gdbarch@entry=0x154ef60, pc=pc@entry=4195734) at
> ../../binutils-gdb/gdb/arch-utils.c:970
> #3  0x0000000000692b03 in skip_prologue_sal
> (sal=sal@entry=0x7fffffffd4d0) at ../../binutils-gdb/gdb/symtab.c:3721
> #4  0x0000000000692e02 in find_function_start_sal
> (sym=sym@entry=0x158e8b0, funfirstline=1) at
> ../../binutils-gdb/gdb/symtab.c:3594
> #5  0x00000000005fe0dd in symbol_to_sal
> (result=result@entry=0x7fffffffd6d0, funfirstline=<optimized out>,
> sym=sym@entry=0x158e8b0)
>     at ../../binutils-gdb/gdb/linespec.c:4611
> 
> We did something similar in arm-tdep.c, search "__stack_chk_guard".
> However, I am not sure we can find a "fingerprint" of these stack projection
> instructions on amd64.
> 

I ended up pushing these patches.  Since this issue of skipping the stack check
instructions is relatively low priority, I don't think the behavior will change
any time soon, and I'd rather fix that test now.

Simon
  

Patch

diff --git a/gdb/testsuite/gdb.mi/mi-stack.c b/gdb/testsuite/gdb.mi/mi-stack.c
index 30dce8b9f6..dc239f0e7b 100644
--- a/gdb/testsuite/gdb.mi/mi-stack.c
+++ b/gdb/testsuite/gdb.mi/mi-stack.c
@@ -24,7 +24,7 @@ 
 
 int callee4 (void)
 {
-  int A=1;
+  int A=1; /* callee4 begin */
   int B=2;
   int C;
   int D[3] = {0, 1, 2};
diff --git a/gdb/testsuite/gdb.mi/mi-stack.exp b/gdb/testsuite/gdb.mi/mi-stack.exp
index cd7c6d4c38..b88032c73e 100644
--- a/gdb/testsuite/gdb.mi/mi-stack.exp
+++ b/gdb/testsuite/gdb.mi/mi-stack.exp
@@ -46,8 +46,8 @@  proc test_stack_frame_listing {} {
     global mi_gdb_prompt
     global hex fullname_syntax srcfile
 
-    set line_callee4_head [gdb_get_line_number "callee4 ("]
-    set line_callee4_body [expr $line_callee4_head + 2]
+    set callee4_begin [gdb_get_line_number "callee4 begin"]
+    mi_continue_to_line $callee4_begin "continue to callee4 begin"
 
     # Obtain a stack trace
     # Tests:
@@ -56,7 +56,7 @@  proc test_stack_frame_listing {} {
     # -stack-list-frames 1 3
     # -stack-info-frame
     mi_gdb_test "231-stack-list-frames" \
-	"231\\^done,stack=\\\[frame=\{level=\"0\",addr=\"$hex\",func=\"callee4\",file=\".*${srcfile}\",fullname=\"${fullname_syntax}${srcfile}\",line=\"$line_callee4_body\"\},frame=\{level=\"1\",addr=\"$hex\",func=\"callee3\",.*\},frame=\{level=\"2\",addr=\"$hex\",func=\"callee2\",.*\},frame=\{level=\"3\",addr=\"$hex\",func=\"callee1\",.*\},frame=\{level=\"4\",addr=\"$hex\",func=\"main\",.*\}\\\]" \
+	"231\\^done,stack=\\\[frame=\{level=\"0\",addr=\"$hex\",func=\"callee4\",file=\".*${srcfile}\",fullname=\"${fullname_syntax}${srcfile}\",line=\"$callee4_begin\"\},frame=\{level=\"1\",addr=\"$hex\",func=\"callee3\",.*\},frame=\{level=\"2\",addr=\"$hex\",func=\"callee2\",.*\},frame=\{level=\"3\",addr=\"$hex\",func=\"callee1\",.*\},frame=\{level=\"4\",addr=\"$hex\",func=\"main\",.*\}\\\]" \
 	"stack frame listing"
     mi_gdb_test "232-stack-list-frames 1 1" \
 	"232\\^done,stack=\\\[frame=\{level=\"1\",addr=\"$hex\",func=\"callee3\",.*\}\\\]" \
@@ -70,7 +70,7 @@  proc test_stack_frame_listing {} {
 	"stack frame listing wrong"
 
     mi_gdb_test "235-stack-info-frame" \
-	"235\\^done,frame=\{level=\"0\",addr=\"$hex\",func=\"callee4\",file=\".*${srcfile}\",fullname=\"${fullname_syntax}${srcfile}\",line=\"$line_callee4_body\"\}" \
+	"235\\^done,frame=\{level=\"0\",addr=\"$hex\",func=\"callee4\",file=\".*${srcfile}\",fullname=\"${fullname_syntax}${srcfile}\",line=\"$callee4_begin\"\}" \
 	"selected frame listing"
 
     mi_gdb_test "236-stack-list-frames 1 300" \