Fix handling of null stap semaphores

Message ID 5we87igzwt5_kr.5b-38floyexzwmozuj6vb-.hmx8r4u3r41_sy@mail.bob131.so
State New, archived
Headers

Commit Message

George Barrett Dec. 3, 2019, 7:59 p.m. UTC
  According to the SystemTap documentation on user-space probes[0], stap
probe points without semaphores are denoted by setting the semaphore
address in the probe's note to zero. At present the code does do a
comparison of the semaphore address against zero, but only after it's
been relocated; as such it will (almost?) always fail, commonly
resulting in GDB trying to overwrite the ELF magic located at the
image's base address.

This commit tests the address as specified in the SDT note rather than
the relocated value in order to correctly detect absent probe
semaphores.

[0]: https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation

gdb/Changelog:

	* stap-probe.c: Fix handling of null stap semaphores
---
 gdb/stap-probe.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
  

Comments

George Barrett Dec. 18, 2019, 5:20 p.m. UTC | #1
Ping

On Wed, Dec 04, 2019 at 06:59:25AM +1100, George Barrett wrote:
> According to the SystemTap documentation on user-space probes[0], stap
> probe points without semaphores are denoted by setting the semaphore
> address in the probe's note to zero. At present the code does do a
> comparison of the semaphore address against zero, but only after it's
> been relocated; as such it will (almost?) always fail, commonly
> resulting in GDB trying to overwrite the ELF magic located at the
> image's base address.
>
> This commit tests the address as specified in the SDT note rather than
> the relocated value in order to correctly detect absent probe
> semaphores.
>
> [0]: https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
>
> gdb/Changelog:
>
> 	* stap-probe.c: Fix handling of null stap semaphores
> ---
>  gdb/stap-probe.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/gdb/stap-probe.c b/gdb/stap-probe.c
> index ba927790a5..e5e3cceacd 100644
> --- a/gdb/stap-probe.c
> +++ b/gdb/stap-probe.c
> @@ -1425,9 +1425,6 @@ stap_modify_semaphore (CORE_ADDR address, int set, struct gdbarch *gdbarch)
>    struct type *type = builtin_type (gdbarch)->builtin_unsigned_short;
>    ULONGEST value;
>  
> -  if (address == 0)
> -    return;
> -
>    /* Swallow errors.  */
>    if (target_read_memory (address, bytes, TYPE_LENGTH (type)) != 0)
>      {
> @@ -1461,6 +1458,8 @@ stap_modify_semaphore (CORE_ADDR address, int set, struct gdbarch *gdbarch)
>  void
>  stap_probe::set_semaphore (struct objfile *objfile, struct gdbarch *gdbarch)
>  {
> +  if (m_sem_addr == 0)
> +    return;
>    stap_modify_semaphore (relocate_address (m_sem_addr, objfile), 1, gdbarch);
>  }
>  
> @@ -1469,6 +1468,8 @@ stap_probe::set_semaphore (struct objfile *objfile, struct gdbarch *gdbarch)
>  void
>  stap_probe::clear_semaphore (struct objfile *objfile, struct gdbarch *gdbarch)
>  {
> +  if (m_sem_addr == 0)
> +    return;
>    stap_modify_semaphore (relocate_address (m_sem_addr, objfile), 0, gdbarch);
>  }
>  
> --
> 2.23.0
  
George Barrett Dec. 28, 2019, 5:28 a.m. UTC | #2
On Thu, Dec 19, 2019 at 04:20:50AM +1100, George Barrett wrote:
> Ping
> 
> On Wed, Dec 04, 2019 at 06:59:25AM +1100, George Barrett wrote:
> > According to the SystemTap documentation on user-space probes[0], stap
> > probe points without semaphores are denoted by setting the semaphore
> > address in the probe's note to zero. At present the code does do a
> > comparison of the semaphore address against zero, but only after it's
> > been relocated; as such it will (almost?) always fail, commonly
> > resulting in GDB trying to overwrite the ELF magic located at the
> > image's base address.
> >
> > This commit tests the address as specified in the SDT note rather than
> > the relocated value in order to correctly detect absent probe
> > semaphores.
> >
> > [0]: https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
> >
> > gdb/Changelog:
> >
> > 	* stap-probe.c: Fix handling of null stap semaphores
> > ---
> >  gdb/stap-probe.c | 7 ++++---
> >  1 file changed, 4 insertions(+), 3 deletions(-)
> >
> > diff --git a/gdb/stap-probe.c b/gdb/stap-probe.c
> > index ba927790a5..e5e3cceacd 100644
> > --- a/gdb/stap-probe.c
> > +++ b/gdb/stap-probe.c
> > @@ -1425,9 +1425,6 @@ stap_modify_semaphore (CORE_ADDR address, int set, struct gdbarch *gdbarch)
> >    struct type *type = builtin_type (gdbarch)->builtin_unsigned_short;
> >    ULONGEST value;
> >  
> > -  if (address == 0)
> > -    return;
> > -
> >    /* Swallow errors.  */
> >    if (target_read_memory (address, bytes, TYPE_LENGTH (type)) != 0)
> >      {
> > @@ -1461,6 +1458,8 @@ stap_modify_semaphore (CORE_ADDR address, int set, struct gdbarch *gdbarch)
> >  void
> >  stap_probe::set_semaphore (struct objfile *objfile, struct gdbarch *gdbarch)
> >  {
> > +  if (m_sem_addr == 0)
> > +    return;
> >    stap_modify_semaphore (relocate_address (m_sem_addr, objfile), 1, gdbarch);
> >  }
> >  
> > @@ -1469,6 +1468,8 @@ stap_probe::set_semaphore (struct objfile *objfile, struct gdbarch *gdbarch)
> >  void
> >  stap_probe::clear_semaphore (struct objfile *objfile, struct gdbarch *gdbarch)
> >  {
> > +  if (m_sem_addr == 0)
> > +    return;
> >    stap_modify_semaphore (relocate_address (m_sem_addr, objfile), 0, gdbarch);
> >  }
> >  
> > --
> > 2.23.0
  
Simon Marchi Dec. 29, 2019, 6:58 p.m. UTC | #3
On 2019-12-03 2:59 p.m., George Barrett wrote:
> According to the SystemTap documentation on user-space probes[0], stap
> probe points without semaphores are denoted by setting the semaphore
> address in the probe's note to zero. At present the code does do a
> comparison of the semaphore address against zero, but only after it's
> been relocated; as such it will (almost?) always fail, commonly
> resulting in GDB trying to overwrite the ELF magic located at the
> image's base address.
> 
> This commit tests the address as specified in the SDT note rather than
> the relocated value in order to correctly detect absent probe
> semaphores.
> 
> [0]: https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
> 
> gdb/Changelog:
> 
> 	* stap-probe.c: Fix handling of null stap semaphores

Hi George,

Thanks, the patch looks good to me.  Though the ChangeLog should mention the
modified functions, I propose to use this:

	* stap-probe.c (stap_modify_semaphore): Don't check for null
	semaphores.
	(stap_probe::set_semaphore, stap_probe::clear_semaphore): Check
	for null semaphores.

I'd really like if we could have a test for this, so that eventual refactors don't
re-introduce this bug.  Perhaps the gdb.base/stap-probe.exp test could be enhanced to
test that the ELF magic number hasn't been changed?

One difficulty is finding out where it is, I don't know if there's a GDB command that
will compute that directly.  One way is to take the address of a global variable before
and after starting the process, and see how it has been relocated, that would be the base
of the image:

(gdb) p &some_global
$1 = (int *) 0x402c <some_global>
(gdb) start
Temporary breakpoint 1 at 0x111d: file test.c, line 9.
Starting program: /home/simark/src/aoc/08/p2/a.out

Temporary breakpoint 1, main () at test.c:9
9	  for (i = 0; i < 1000; i++) {
(gdb) p &some_global
$2 = (int *) 0x55555555802c <some_global>
(gdb) p/x 0x55555555802c - 0x402c
$3 = 0x555555554000
(gdb) p (*(char*) 0x555555554000)@4
$4 = "\177ELF"

Also, the semaphore is set when the breakpoint is inserted and cleared when the breakpoint
is removed.  By default, GDB removes the breakpoints while the inferior is stopped, so if
we inspect the magic number while the inferior is stopped, it will appear OK.

However, we can use "set breakpoints always-inserted on" to make GDB leave the breakpoint
inserted when the inferior is stopped.  When doing this, we can observe the overwritten
magic number:

(gdb) set breakpoint always-inserted 1
(gdb) b -probe provider:name
Breakpoint 1 at 0x1137
(gdb) p &some_global
$1 = (int *) 0x402c <some_global>
(gdb) start
Temporary breakpoint 2 at 0x111d: file test.c, line 9.
Starting program: /home/simark/src/aoc/08/p2/a.out

Temporary breakpoint 2, main () at test.c:9
9	  for (i = 0; i < 1000; i++) {
(gdb) p &some_global
$2 = (int *) 0x55555555802c <some_global>
(gdb) p/x 0x55555555802c - 0x402c
$3 = 0x555555554000
(gdb) p ((unsigned char[4]) *0x555555554000)
$4 = "\200ELF"

I think all this only applies if the main program is a position-independent executable, so
this test should probably be skipped if we detect the relocation is 0.

So with all this it should be pretty straightforward to improve the test to check for this.

Note that I found what looks like to be a GDB bug while doing this:

https://sourceware.org/bugzilla/show_bug.cgi?id=25321

Simon
  
George Barrett Dec. 30, 2019, 3:38 p.m. UTC | #4
On Sun, Dec 29, 2019 at 01:58:46PM -0500, Simon Marchi wrote:
> Though the ChangeLog should mention the modified functions, I propose to use
> this:
>
> 	* stap-probe.c (stap_modify_semaphore): Don't check for null
> 	semaphores.
> 	(stap_probe::set_semaphore, stap_probe::clear_semaphore): Check
> 	for null semaphores.

Ack.

> I'd really like if we could have a test for this, so that eventual refactors
> don't re-introduce this bug.  Perhaps the gdb.base/stap-probe.exp test could
> be enhanced to test that the ELF magic number hasn't been changed?

I was secretly hoping no-one would notice the absence of a test case ;)

> One difficulty is finding out where it is, I don't know if there's a GDB
> command that will compute that directly.  One way is to take the address of
> a global variable before and after starting the process, and see how it has
> been relocated, that would be the base of the image:
>
> (gdb) p &some_global
> $1 = (int *) 0x402c <some_global>
> (gdb) start
> Temporary breakpoint 1 at 0x111d: file test.c, line 9.
> Starting program: /home/simark/src/aoc/08/p2/a.out
>
> Temporary breakpoint 1, main () at test.c:9
> 9	  for (i = 0; i < 1000; i++) {
> (gdb) p &some_global
> $2 = (int *) 0x55555555802c <some_global>
> (gdb) p/x 0x55555555802c - 0x402c
> $3 = 0x555555554000
> (gdb) p (*(char*) 0x555555554000)@4
> $4 = "\177ELF"

Is it too glibc-specific to use _r_debug? Something like the following:

(gdb) start
Temporary breakpoint 1 at 0x4e60: file ../src/ls.c, line 1451.
Starting program: /usr/bin/ls

Temporary breakpoint 1, main (argc=1, argv=0x7fffffffd8d8) at ../src/ls.c:1451
(gdb) p/x _r_debug.r_map->l_addr
$1 = 0x555555554000
(gdb) p (*(char*) _r_debug.r_map->l_addr)@4
$2 = "\177ELF"

> Also, the semaphore is set when the breakpoint is inserted and cleared when
> the breakpoint is removed.  By default, GDB removes the breakpoints while
> the inferior is stopped, so if we inspect the magic number while the
> inferior is stopped, it will appear OK.
>
> However, we can use "set breakpoints always-inserted on" to make GDB leave
> the breakpoint inserted when the inferior is stopped.

Thanks for the tip, that seems like something that could have been pretty
frustrating to find out on my own.

> I think all this only applies if the main program is a position-independent
> executable, so this test should probably be skipped if we detect the
> relocation is 0.

Ack.

> So with all this it should be pretty straightforward to improve the test to
> check for this.

Thanks
  
Simon Marchi Dec. 30, 2019, 5:59 p.m. UTC | #5
On 2019-12-30 10:38 a.m., George Barrett wrote:
> On Sun, Dec 29, 2019 at 01:58:46PM -0500, Simon Marchi wrote:
>> Though the ChangeLog should mention the modified functions, I propose to use
>> this:
>>
>> 	* stap-probe.c (stap_modify_semaphore): Don't check for null
>> 	semaphores.
>> 	(stap_probe::set_semaphore, stap_probe::clear_semaphore): Check
>> 	for null semaphores.
> 
> Ack.
> 
>> I'd really like if we could have a test for this, so that eventual refactors
>> don't re-introduce this bug.  Perhaps the gdb.base/stap-probe.exp test could
>> be enhanced to test that the ELF magic number hasn't been changed?
> 
> I was secretly hoping no-one would notice the absence of a test case ;)

Yeah, I know writing a test case is not the most fun part, but it pays in the
long run.  I can help if you are not familiar enough with tcl/expect/dejagnu.

>> One difficulty is finding out where it is, I don't know if there's a GDB
>> command that will compute that directly.  One way is to take the address of
>> a global variable before and after starting the process, and see how it has
>> been relocated, that would be the base of the image:
>>
>> (gdb) p &some_global
>> $1 = (int *) 0x402c <some_global>
>> (gdb) start
>> Temporary breakpoint 1 at 0x111d: file test.c, line 9.
>> Starting program: /home/simark/src/aoc/08/p2/a.out
>>
>> Temporary breakpoint 1, main () at test.c:9
>> 9	  for (i = 0; i < 1000; i++) {
>> (gdb) p &some_global
>> $2 = (int *) 0x55555555802c <some_global>
>> (gdb) p/x 0x55555555802c - 0x402c
>> $3 = 0x555555554000
>> (gdb) p (*(char*) 0x555555554000)@4
>> $4 = "\177ELF"
> 
> Is it too glibc-specific to use _r_debug? Something like the following:
> 
> (gdb) start
> Temporary breakpoint 1 at 0x4e60: file ../src/ls.c, line 1451.
> Starting program: /usr/bin/ls
> 
> Temporary breakpoint 1, main (argc=1, argv=0x7fffffffd8d8) at ../src/ls.c:1451
> (gdb) p/x _r_debug.r_map->l_addr
> $1 = 0x555555554000
> (gdb) p (*(char*) _r_debug.r_map->l_addr)@4
> $2 = "\177ELF"

Hmm, it is not only glibc-specific (I believe?), but it also requires having glibc
debug symbols installed.  I just tried in an Alpine docker image (which uses musl),
and the address subtraction method works.  So between the two methods, I'd prefer
the address subtraction, since it works on more libc's, and it works without debug
info for the libc.

Simon
  
George Barrett Dec. 31, 2019, 4:39 a.m. UTC | #6
On Mon, Dec 30, 2019 at 12:59:30PM -0500, Simon Marchi wrote:
> Yeah, I know writing a test case is not the most fun part, but it pays in
> the long run.  I can help if you are not familiar enough with
> tcl/expect/dejagnu.

I can stumble through enough to get something ready for review, but there is
a bit of a hiccup I wanted some advice on: the testing strategy you outlined
hinges on prepare_for_testing producing PIC executables, but AFAICS this is
neither done by default nor can I find a facility in the test suite utilities
to achieve this. Is adding -pie/-fPIC to additional_flags acceptable in this
instance?

On a related note, I was a bit surprised to discover that the test case
doesn't actually ever define USE_PROBES since the argument provided to
stap_test(_no_debuginfo) is `-DUSE_PROBES' instead of
`additional_flags=-DUSE_PROBES'. This seems like a trivial enough fix to be
rolled into a single commit, but I was thinking that the fix wouldn't be
particularly evident from the diff if the -pie flags were added in the same
patch. Would this be worth splitting into an independent patch?

> Hmm, it is not only glibc-specific (I believe?), but it also requires having
> glibc debug symbols installed.  I just tried in an Alpine docker image
> (which uses musl), and the address subtraction method works.  So between the
> two methods, I'd prefer the address subtraction, since it works on more
> libc's, and it works without debug info for the libc.

Indeed. I hadn't gotten far with testing an _r_debug approach before I
realised I was taking some things for granted.

> Simon

Thanks
  
Simon Marchi Dec. 31, 2019, 5:37 p.m. UTC | #7
On 2019-12-30 11:39 p.m., George Barrett wrote:
> On Mon, Dec 30, 2019 at 12:59:30PM -0500, Simon Marchi wrote:
>> Yeah, I know writing a test case is not the most fun part, but it pays in
>> the long run.  I can help if you are not familiar enough with
>> tcl/expect/dejagnu.
> 
> I can stumble through enough to get something ready for review, but there is
> a bit of a hiccup I wanted some advice on: the testing strategy you outlined
> hinges on prepare_for_testing producing PIC executables, but AFAICS this is
> neither done by default nor can I find a facility in the test suite utilities
> to achieve this. Is adding -pie/-fPIC to additional_flags acceptable in this
> instance?

If nothing else is specified, the testsuite produces executables with the default
settings of the toolchain.  If, for a test, you need to force creating a PIE or
a non-PIE, you can pass these flags to gdb_compile & al:

   - pie: Force creation of PIE executables.
   - nopie: Prevent creation of PIE executables.

https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=blob;f=gdb/testsuite/lib/gdb.exp;h=8b1ec62c9c21570f3d3cb4c9d3fa0bdce016876e;hb=HEAD#l3686

If there are some differences in how PIE and non-PIE are handled, it can be good
to run the test with both.  This can be achieved relatively easily with a:

  foreach_with_prefix pie { "nopie" "pie" } {
    ...
  }

Here's an example:

https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=blob;f=gdb/testsuite/gdb.base/break-idempotent.exp;h=96f91c50f906955b289b7ee7dfd65dc7c375e79a;hb=HEAD#l149

> On a related note, I was a bit surprised to discover that the test case
> doesn't actually ever define USE_PROBES since the argument provided to
> stap_test(_no_debuginfo) is `-DUSE_PROBES' instead of
> `additional_flags=-DUSE_PROBES'. This seems like a trivial enough fix to be
> rolled into a single commit, but I was thinking that the fix wouldn't be
> particularly evident from the diff if the -pie flags were added in the same
> patch. Would this be worth splitting into an independent patch?

Yes, totally, a separate patch (ideally before, in the same series) would be perfect.
This shows that the test is written in a poor way, it does not appear to actually
test anything related to semaphores.  At least, I think it should parse the output
of "info probes" and expect an hexadecimal number for the semaphore address.

Good catch, btw.  Also, shouldn't it be called USE_SEMAPHORES and not USE_PROBES?

Thanks for looking into this.

Simon
  
Simon Marchi Dec. 31, 2019, 5:56 p.m. UTC | #8
On 2019-12-29 1:58 p.m., Simon Marchi wrote:
> Note that I found what looks like to be a GDB bug while doing this:
> 
> https://sourceware.org/bugzilla/show_bug.cgi?id=25321

Btw, forget about this, I was just forgetting how pointer arithmetic works.

Making the global variable a char instead of an int (or casting the pointers in GDB)
should make it work as expected.

Simon
  
George Barrett Dec. 31, 2019, 5:59 p.m. UTC | #9
On Tue, Dec 31, 2019 at 12:37:15PM -0500, Simon Marchi wrote:
> If nothing else is specified, the testsuite produces executables with the
> default settings of the toolchain.  If, for a test, you need to force
> creating a PIE or a non-PIE, you can pass these flags to gdb_compile & al:
>
>    - pie: Force creation of PIE executables.
>    - nopie: Prevent creation of PIE executables.
>
> https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=blob;f=gdb/testsuite/lib/gdb.exp;h=8b1ec62c9c21570f3d3cb4c9d3fa0bdce016876e;hb=HEAD#l3686

Wow, I'm not sure how I missed that. Thanks for spelling it out.

> If there are some differences in how PIE and non-PIE are handled, it can be
> good to run the test with both.  This can be achieved relatively easily with
> a:
>
>   foreach_with_prefix pie { "nopie" "pie" } {
>     ...
>   }
>
> Here's an example:
>
> https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;a=blob;f=gdb/testsuite/gdb.base/break-idempotent.exp;h=96f91c50f906955b289b7ee7dfd65dc7c375e79a;hb=HEAD#l149

Ah, alright.

> Yes, totally, a separate patch (ideally before, in the same series) would be
> perfect.

> At least, I think it should parse the output of "info probes" and expect an
> hexadecimal number for the semaphore address.

Okay, I can do that.

> Also, shouldn't it be called USE_SEMAPHORES and not USE_PROBES?

My thoughts exactly. I can make that change to the first fix-up patch too, if
you like.

> Thanks for looking into this.

'Tis my pleasure.
  

Patch

diff --git a/gdb/stap-probe.c b/gdb/stap-probe.c
index ba927790a5..e5e3cceacd 100644
--- a/gdb/stap-probe.c
+++ b/gdb/stap-probe.c
@@ -1425,9 +1425,6 @@  stap_modify_semaphore (CORE_ADDR address, int set, struct gdbarch *gdbarch)
   struct type *type = builtin_type (gdbarch)->builtin_unsigned_short;
   ULONGEST value;
 
-  if (address == 0)
-    return;
-
   /* Swallow errors.  */
   if (target_read_memory (address, bytes, TYPE_LENGTH (type)) != 0)
     {
@@ -1461,6 +1458,8 @@  stap_modify_semaphore (CORE_ADDR address, int set, struct gdbarch *gdbarch)
 void
 stap_probe::set_semaphore (struct objfile *objfile, struct gdbarch *gdbarch)
 {
+  if (m_sem_addr == 0)
+    return;
   stap_modify_semaphore (relocate_address (m_sem_addr, objfile), 1, gdbarch);
 }
 
@@ -1469,6 +1468,8 @@  stap_probe::set_semaphore (struct objfile *objfile, struct gdbarch *gdbarch)
 void
 stap_probe::clear_semaphore (struct objfile *objfile, struct gdbarch *gdbarch)
 {
+  if (m_sem_addr == 0)
+    return;
   stap_modify_semaphore (relocate_address (m_sem_addr, objfile), 0, gdbarch);
 }