diff mbox

[RFC] Handle armv4t thumb shared library

Message ID 1411911976-7338-1-git-send-email-yao@codesourcery.com
State New
Headers show

Commit Message

Yao Qi Sept. 28, 2014, 1:46 p.m. UTC
Hi,
I am looking at some test fails (20+) about shared library when they are
compiled with "-march=armv4t -mthumb", like:

break shr2^M
Breakpoint 2: shr2. (2 locations)^M
(gdb) FAIL: gdb.base/shlib-call.exp: breakpoint function shr2

These tests fail because GDB can't handle the stubs ld generates for
the armv4t thumb shared library.  It was added by patch
byhttps://sourceware.org/ml/binutils/2006-08/msg00201.html
The part interesting to GDB is that ld changes function foo to a stub,
, rename foo to __real_foo, and it will jump to __real_foo at the end
of the stub.  See details in my comments to
arm_convert_from_func_ptr_addr.

However, the debug info about function foo still describes that
function foo is located at __real_foo.  When we insert breakpoint on
foo, GDB query symbols (both min symbol and full symbol), get two
address, and create two breakpoint locations respectively.  Program
always hits the breakpoint on the stub, but there is no source line
info associated on it, many tests fail.

Here is an example, for function pendfunc1, ld generates stub
pendfunc1, and rename the real function to __real_pendfunc1.

    18: 000007b4    44 FUNC    GLOBAL DEFAULT   10 pendfunc1
    80: 000007b4     0 FUNC    LOCAL  DEFAULT   10 __pendfunc1_from_arm
    81: 0000075d     0 FUNC    LOCAL  DEFAULT   10 __real_pendfunc1

it jumps to __real_pendfunc1 at the end of the stub,

(gdb) x/3i 0x7b4
   0x7b4 <pendfunc1>:   ldr     r12, [pc, #4]   ; 0x7c0 <pendfunc1+12>
   0x7b8 <pendfunc1+4>: add     r12, r12, pc
   0x7bc <pendfunc1+8>: bx      r12

but the debug info about pendfunc1 is still about the real start
address, instead of the stub,

 <1><135>: Abbrev Number: 4 (DW_TAG_subprogram)
    <136>   DW_AT_external    : 1
    <136>   DW_AT_name        : (indirect string, offset: 0x12b):
pendfunc1
    <13a>   DW_AT_decl_file   : 1
    <13b>   DW_AT_decl_line   : 20
    <13c>   DW_AT_prototyped  : 1
    <13c>   DW_AT_low_pc      : 0x75c
    <140>   DW_AT_high_pc     : 0x2c

so when we set breakpoint on pendfunc1, two locations are created,
(gdb) b pendfunc1
Breakpoint 1 at 0x764: pendfunc1. (2 locations)
(gdb) info breakpoints
Num     Type           Disp Enb Address    What
1       breakpoint     keep y   <MULTIPLE>
1.1                         y     0x00000764 in pendfunc1 at ../../../../git/gdb/testsuite/gdb.base/pendshr.c:22
1.2                         y     0x000007b4 <pendfunc1>

If the problem description is clear, let us look at how to fix it.

When we set breakpoint on pendfunc1, GDB will query both full symbols
and min symbols, and find both msymbol and symbol for function
pendfunc1.  The fix could be making GDB record the same address for
msymbol and full symbol, that is to say, teach GDB to recognize this
stub and record the address of __real_pendfunc1 instead of pendfunc1.

Once a min symbol is found, GDB saves it in the result, see
linespec.c:minsym_found:

  /* The minimal symbol might point to a function descriptor;
     resolve it to the actual code address instead.  */
  pc = gdbarch_convert_from_func_ptr_addr (gdbarch, sal.pc, &current_target);
  if (pc != sal.pc)
    sal = find_pc_sect_line (pc, NULL, 0);

  if (self->funfirstline)
    skip_prologue_sal (&sal);

  if (maybe_add_address (self->addr_set, objfile->pspace, sal.pc))
    add_sal_to_sals (self, result, &sal, MSYMBOL_NATURAL_NAME (msymbol), 0);

Since skip_prologue_sal is called, my first try is to enhance
arm_skip_prologue to skip this stub, but it doesn't work, because the
change is reverted by the code later in skip_prologue_sal:

  /* If we're already past the prologue, leave SAL unchanged.  Otherwise
     forward SAL to the end of the prologue.  */
  if (sal->pc >= pc)
    return;

At this point, sal->pc is stub address, and pc is the first
instruction after prologue.  This approach isn't reliable as it
depends on the order of function and stub.

Then, second try is to extend gdbarch hook convert_from_func_ptr_addr
for my purpose to return the real function address for a given address
of stub.  Nowadays, convert_from_func_ptr_addr is used to convert
function descriptor address to function address, so we may generalize
it to convert function symbol to function address.  Function symbol
includes function descriptor and function stub.  This patch is to
implement convert_from_func_ptr_addr for arm, and return address of
__real_foo if it exists.

The user visible change happens on the armv4t thumb shared library
*without* debug info.  With my patch applied, and get rid of debug
info from a shared library,

(gdb) p pendfunc1
$1 = {<text variable, no debug info>} 0x75c <__real_pendfunc1>
(gdb) disassemble pendfunc1
Dump of assembler code for function __real_pendfunc1:

Note that __real_pendfunc1 instead of pendfunc1 is printed, and that
causes fails in solib-weak.exp.  IMO, the output is not confusing, so
the change is acceptable.  I update solib-weak.exp to match
__real_bar.

What do you think of my approach to fix this problem?  I'm OK to
rename convert_from_func_ptr_addr to convert_from_func_symbol or
something else as a follow up.

Another approach is to teach skip_trampoline_code to match the
code in stub, and call gdbarch_skip_trampoline_code after
gdbarch_convert_from_func_ptr_addr in linespec.c:minsym_found.

This patch is regression tested on for "-march=armv4t -mthumb",
"-march=armv7-a -mthumb" and "-march=arv4t".  No regression and get
about 20+ fails fixed in "-march=armv4t -mthumb" test.

gdb:

2014-09-27  Yao Qi  <yao@codesourcery.com>

	* arm-linux-tdep.c: Inlcude objfiles.h.
	(arm_linux_skip_armv4t_thumb_shlib_stub): New function.
	(arm_linux_convert_from_func_ptr_addr): New function.
	(arm_linux_init_abi): Install arm_convert_from_func_ptr_addr to
	gdbarch hook.

gdb/testsuite:

2014-09-27  Yao Qi  <yao@codesourcery.com>

	* gdb.base/solib-weak.exp (do_test): Match __real_bar if it
	exists.
---
 gdb/arm-linux-tdep.c                  | 76 +++++++++++++++++++++++++++++++++++
 gdb/testsuite/gdb.base/solib-weak.exp | 26 +++++++++++-
 2 files changed, 101 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/gdb/arm-linux-tdep.c b/gdb/arm-linux-tdep.c
index 3d524c9..795dd4c 100644
--- a/gdb/arm-linux-tdep.c
+++ b/gdb/arm-linux-tdep.c
@@ -34,6 +34,7 @@ 
 #include "breakpoint.h"
 #include "auxv.h"
 #include "xml-syscall.h"
+#include "objfiles.h"
 
 #include "arm-tdep.h"
 #include "arm-linux-tdep.h"
@@ -1351,6 +1352,69 @@  arm_linux_syscall_record (struct regcache *regcache, unsigned long svc_number)
   return 0;
 }
 
+/* This is used to skip the arm entry points in thumb shared library.
+   GNU ld adds the stub for the armv4t thumb shared library.  If we call
+   'foo' from a shared library, 'foo' becomes the stub and '__real_foo' is
+   the real function.
+
+   0x75c <__real_foo>: foo function
+
+   0x7b4 <foo>:   ldr     r12, [pc, #4]   ; 0x7c0 <pendfunc1+12>
+   0x7b8 <foo+4>: add     r12, r12, pc
+   0x7bc <foo+8>: bx      r12             ; branch to __real_foo.
+
+   When ADDR is foo's address, this function is to adjust the address to
+   __real_foo's, the address of the function foo.  */
+
+static CORE_ADDR
+arm_linux_skip_armv4t_thumb_shlib_stub (struct gdbarch *gdbarch,
+					CORE_ADDR addr)
+{
+  struct bound_minimal_symbol msym = lookup_minimal_symbol_by_pc (addr);
+
+  if (msym.minsym != NULL
+      && BMSYMBOL_VALUE_ADDRESS (msym) == addr
+      && MSYMBOL_LINKAGE_NAME (msym.minsym) != NULL)
+    {
+      const char *name = MSYMBOL_LINKAGE_NAME (msym.minsym);
+      char *real = xmalloc (strlen (name) + sizeof "__real_");
+
+      strcpy (real, "__real_");
+      strcpy (real + sizeof "__real_" - 1, name);
+
+      /* Look for __real_foo within the same objfile.  */
+      msym = lookup_minimal_symbol (real, NULL, msym.objfile);
+
+      xfree (real);
+      if (msym.minsym != NULL)
+	{
+	  int i;
+	  static const unsigned int stub_code[] =
+	  {
+	    0xe59fc004,	/* ldr     r12, [pc, #4] */
+	    0xe08cc00f,	/* add     r12, r12, pc  */
+	    0xe12fff1c,	/* bx      r12           */
+	    0
+	  };
+
+	  for (i = 0; stub_code[i] != 0; i++)
+	    {
+	      unsigned int insn;
+	      enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
+
+	      insn = read_memory_integer (addr + (i * 4), 4, byte_order);
+	      if (insn != stub_code[i])
+		return addr;
+	    }
+
+	  /* Return the address of __real_foo if it is found.  */
+	  return BMSYMBOL_VALUE_ADDRESS (msym);
+	}
+    }
+
+  return addr;
+}
+
 /* Implement the skip_trampoline_code gdbarch method.  */
 
 static CORE_ADDR
@@ -1364,6 +1428,16 @@  arm_linux_skip_trampoline_code (struct frame_info *frame, CORE_ADDR pc)
   return find_solib_trampoline_target (frame, pc);
 }
 
+/* Implement the convert_from_func_ptr_addr gdbarch method.  */
+
+static CORE_ADDR
+arm_linux_convert_from_func_ptr_addr (struct gdbarch *gdbarch,
+				      CORE_ADDR addr,
+				      struct target_ops *targ)
+{
+  return arm_linux_skip_armv4t_thumb_shlib_stub (gdbarch, addr);
+}
+
 static void
 arm_linux_init_abi (struct gdbarch_info info,
 		    struct gdbarch *gdbarch)
@@ -1431,6 +1505,8 @@  arm_linux_init_abi (struct gdbarch_info info,
   /* Shared library handling.  */
   set_gdbarch_skip_trampoline_code (gdbarch, arm_linux_skip_trampoline_code);
   set_gdbarch_skip_solib_resolver (gdbarch, glibc_skip_solib_resolver);
+  set_gdbarch_convert_from_func_ptr_addr
+    (gdbarch, arm_linux_convert_from_func_ptr_addr);
 
   /* Enable TLS support.  */
   set_gdbarch_fetch_tls_load_module_address (gdbarch,
diff --git a/gdb/testsuite/gdb.base/solib-weak.exp b/gdb/testsuite/gdb.base/solib-weak.exp
index 27f07e7..63144f5 100644
--- a/gdb/testsuite/gdb.base/solib-weak.exp
+++ b/gdb/testsuite/gdb.base/solib-weak.exp
@@ -100,7 +100,31 @@  proc do_test { lib1opts lib2opts lib1first } {
 
     gdb_breakpoint "bar"
 
-    gdb_test "continue" "Breakpoint .* \\.?bar .*${expected_file}\\..*" \
+    set pattern "\\.?bar"
+
+    global gdb_prompt hex
+    set test "p __real_bar"
+    # Test symbol __real_bar exists or not.
+    gdb_test_multiple $test $test {
+	-re "<text variable, no debug info>.*$gdb_prompt $" {
+	    # If the first appeared library doesn't have debug info,
+	    # the minimal symbol is used.  The address is resolved
+	    # to __real_bar.
+	    if {$lib1first} {
+		if {$lib1opts == ""} {
+		    set pattern "__real_bar"
+		}
+	    } else {
+		if {$lib2opts == ""} {
+		    set pattern "__real_bar"
+		}
+	    }
+	}
+	-re "$gdb_prompt $" {
+	}
+    }
+
+    gdb_test "continue" "Breakpoint .* $pattern .*${expected_file}\\..*" \
 	"run to breakpoint - $testopts"
 }