[7/8] Initialise target descrption after skipping extra traps for --wrapper

Message ID 1437392126-29503-8-git-send-email-yao.qi@linaro.org
State New, archived
Headers

Commit Message

Yao Qi July 20, 2015, 11:35 a.m. UTC
  Nowadays, when --wrapper is used, GDBserver skips extra traps/stops
in the wrapper program, and stops at the first instruction of the
program to be debugged.  However, GDBserver created target description
in the first stop of inferior, and the executable of the inferior
is the wrapper program rather than the program to be debugged.  In
this way, the target description can be wrong if the architectures
of wrapper program and program to be debugged are different.  This
is shown by some fails in gdb.server/wrapper.exp on buildbot.

We are testing i686-linux GDB (Fedora-i686) on an x86_64-linux box
(fedora-x86-64-4) in buildbot, such configuration causes fails in
gdb.server/wrapper.exp like this:

spawn /home/gdb-buildbot-2/fedora-x86-64-4/fedora-i686/build/gdb/testsuite/../../gdb/gdbserver/gdbserver --once --wrapper env TEST=1 -- :2346 /home/gdb-buildbot-2/fedora-x86-64-4/fedora-i686/build/gdb/testsuite/outputs/gdb.server/wrapper/wrapper
Process /home/gdb-buildbot-2/fedora-x86-64-4/fedora-i686/build/gdb/testsuite/outputs/gdb.server/wrapper/wrapper created; pid = 8795
Can't debug 64-bit process with 32-bit GDBserver
Exiting
target remote localhost:2346
localhost:2346: Connection timed out.
(gdb) FAIL: gdb.server/wrapper.exp: setting breakpoint at marker

See https://sourceware.org/ml/gdb-testers/2015-q3/msg01541.html

In this case, program to be debugged ("wrapper") is 32-bit but wrapper
program ("/usr/bin/env") is 64-bit, so GDBserver gets the 64-bit
target description instead of 32-bit.

The root cause of this problem is that GDBserver creates target
description too early, and the rationale of fix could be creating
target description once the GDBserver skips extra traps and inferior
stops at the first instruction of the program we want to debug.  IOW,
when GDBserver skips extra traps, the inferior's tdesc is NULL, and
mywait and its callees shouldn't use inferior's tdesc, so in this
patch, we do the shortcut if proc->tdesc == NULL, see changes in
linux_resume_one_lwp_throw and need_step_over_p.

In linux_low_filter_event, if target description isn't initialised and
GDBserver attached the process, we create target description immediately,
because GDBserver don't have to skip extra traps for attach, IOW, it
makes no sense to use --attach and --wrapper together.  Otherwise, the
process is launched by GDBserver, we keep the status pending, and return.

After GDBserver skipped extra traps in start_inferior, we call a
target_ops hook arch_setup to initialise target description there.

gdb/gdbserver:

2015-07-16  Yao Qi  <yao.qi@linaro.org>

	* linux-low.c (linux_arch_setup): New function.
	(linux_low_filter_event): If proc->tdesc is NULL and
	proc->attached is true, call the_low_target.arch_setup.
	Otherwise, keep status pending, and return.
	(linux_resume_one_lwp_throw): if proc->tdesc is NULL
	resume the inferior.
	(need_step_over_p): Return 0 if proc->tdesc is NULL.
	(linux_target_ops): Install arch_setup.
	* server.c (start_inferior): Call the_target->arch_setup.
	* target.h (struct target_ops) <arch_setup>: New field.
	(target_arch_setup): New marco.
	* lynx-low.c (lynx_target_ops): Update.
	* nto-low.c (nto_target_ops): Update.
	* spu-low.c (spu_target_ops): Update.
	* win32-low.c (win32_target_ops): Update.
---
 gdb/gdbserver/linux-low.c | 74 ++++++++++++++++++++++++++++++++++++++++-------
 gdb/gdbserver/lynx-low.c  |  1 +
 gdb/gdbserver/nto-low.c   |  1 +
 gdb/gdbserver/server.c    |  3 ++
 gdb/gdbserver/spu-low.c   |  1 +
 gdb/gdbserver/target.h    | 10 +++++++
 gdb/gdbserver/win32-low.c |  1 +
 7 files changed, 80 insertions(+), 11 deletions(-)
  

Comments

Pedro Alves July 23, 2015, 11:25 p.m. UTC | #1
On 07/20/2015 12:35 PM, Yao Qi wrote:
> Nowadays, when --wrapper is used, GDBserver skips extra traps/stops
> in the wrapper program, and stops at the first instruction of the
> program to be debugged.  However, GDBserver created target description
> in the first stop of inferior, and the executable of the inferior
> is the wrapper program rather than the program to be debugged.  In
> this way, the target description can be wrong if the architectures
> of wrapper program and program to be debugged are different.  This
> is shown by some fails in gdb.server/wrapper.exp on buildbot.
> 
> We are testing i686-linux GDB (Fedora-i686) on an x86_64-linux box
> (fedora-x86-64-4) in buildbot, such configuration causes fails in
> gdb.server/wrapper.exp like this:
> 
> spawn /home/gdb-buildbot-2/fedora-x86-64-4/fedora-i686/build/gdb/testsuite/../../gdb/gdbserver/gdbserver --once --wrapper env TEST=1 -- :2346 /home/gdb-buildbot-2/fedora-x86-64-4/fedora-i686/build/gdb/testsuite/outputs/gdb.server/wrapper/wrapper
> Process /home/gdb-buildbot-2/fedora-x86-64-4/fedora-i686/build/gdb/testsuite/outputs/gdb.server/wrapper/wrapper created; pid = 8795
> Can't debug 64-bit process with 32-bit GDBserver
> Exiting
> target remote localhost:2346
> localhost:2346: Connection timed out.
> (gdb) FAIL: gdb.server/wrapper.exp: setting breakpoint at marker
> 
> See https://sourceware.org/ml/gdb-testers/2015-q3/msg01541.html
> 
> In this case, program to be debugged ("wrapper") is 32-bit but wrapper
> program ("/usr/bin/env") is 64-bit, so GDBserver gets the 64-bit
> target description instead of 32-bit.
> 
> The root cause of this problem is that GDBserver creates target
> description too early, and the rationale of fix could be creating
> target description once the GDBserver skips extra traps and inferior
> stops at the first instruction of the program we want to debug.  IOW,
> when GDBserver skips extra traps, the inferior's tdesc is NULL, and
> mywait and its callees shouldn't use inferior's tdesc, so in this
> patch, we do the shortcut if proc->tdesc == NULL, see changes in
> linux_resume_one_lwp_throw and need_step_over_p.
> 
> In linux_low_filter_event, if target description isn't initialised and
> GDBserver attached the process, we create target description immediately,
> because GDBserver don't have to skip extra traps for attach, IOW, it
> makes no sense to use --attach and --wrapper together.  Otherwise, the
> process is launched by GDBserver, we keep the status pending, and return.
> 
> After GDBserver skipped extra traps in start_inferior, we call a
> target_ops hook arch_setup to initialise target description there.
> 

Great, thanks for doing this.  Looks generally good to me.  One
comment below.

> @@ -3651,6 +3671,31 @@ linux_resume_one_lwp_throw (struct lwp_info *lwp,
>    struct thread_info *thread = get_lwp_thread (lwp);
>    struct thread_info *saved_thread;
>    int fast_tp_collecting;
> +  struct process_info *proc = get_thread_process (thread);
> +
> +  if (proc->tdesc == NULL)
> +    {
> +      /* Target description isn't initialised because the program hasn't
> +	 stop at the first instruction.  It means GDBserver skips the

"hasn't stopped" ... "first instruction yet".

> +	 extra traps from the wrapper program (see option --wrapper),
> +	 and GDBserver just simply resumes the program without accessing
> +	 inferior registers, because target description is unavailable
> +	 at this point.  */
> +      errno = 0;
> +      lwp->stepping = step;
> +      ptrace (step ? PTRACE_SINGLESTEP : PTRACE_CONT, lwpid_of (thread),
> +	      (PTRACE_TYPE_ARG3) 0,
> +	      /* Coerce to a uintptr_t first to avoid potential gcc warning
> +		 of coercing an 8 byte integer to a 4 byte pointer.  */
> +	      (PTRACE_TYPE_ARG4) (uintptr_t) signal);
> +
> +      if (errno)
> +	perror_with_name ("resuming thread");
> +
> +      lwp->stopped = 0;
> +      lwp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
> +      return;

Instead of this whole code block, I think we should be able to skip
the bits in the function that require accessing registers.  E.g.,
this:

 /* Cancel actions that rely on GDB not changing the PC (e.g., the
     user used the "jump" command, or "set $pc = foo").  */
  if (lwp->stop_pc != get_pc (lwp))
    {
      /* Collecting 'while-stepping' actions doesn't make sense
	 anymore.  */
      release_while_stepping_state_list (thread);
    }

Could be guarded by:

  if (thread->while_stepping != NULL)

And this:

  if (the_low_target.get_pc != NULL)
    {
      struct regcache *regcache = get_thread_regcache (current_thread, 1);

      lwp->stop_pc = (*the_low_target.get_pc) (regcache);

      if (debug_threads)
	{
	  debug_printf ("  %s from pc 0x%lx\n", step ? "step" : "continue",
			(long) lwp->stop_pc);
	}
    }

could be guarded by:

  if (proc->tdesc == NULL)

Did you try that?

> +    }
>  
>    if (lwp->stopped == 0)
>      return;

Thanks,
Pedro Alves
  
Yao Qi July 24, 2015, 11:11 a.m. UTC | #2
Pedro Alves <palves@redhat.com> writes:

> Instead of this whole code block, I think we should be able to skip
> the bits in the function that require accessing registers.  E.g.,
> this:
>
>  /* Cancel actions that rely on GDB not changing the PC (e.g., the
>      user used the "jump" command, or "set $pc = foo").  */
>   if (lwp->stop_pc != get_pc (lwp))
>     {
>       /* Collecting 'while-stepping' actions doesn't make sense
> 	 anymore.  */
>       release_while_stepping_state_list (thread);
>     }
>
> Could be guarded by:
>
>   if (thread->while_stepping != NULL)
>
> And this:
>
>   if (the_low_target.get_pc != NULL)
>     {
>       struct regcache *regcache = get_thread_regcache (current_thread, 1);
>
>       lwp->stop_pc = (*the_low_target.get_pc) (regcache);
>
>       if (debug_threads)
> 	{
> 	  debug_printf ("  %s from pc 0x%lx\n", step ? "step" : "continue",
> 			(long) lwp->stop_pc);
> 	}
>     }
>
> could be guarded by:
>
>   if (proc->tdesc == NULL)
>
> Did you try that?

To make sure I understand you correctly, is the change below what you suggested?
If yes, I thought about this approach before, but I didn't try that
because I was worried that we should check every piece of code in
linux_resume_one_lwp_throw and its callees that don't access registers
when target description isn't initialised yet.  Especially for
the_low_target.prepare_to_resume, the implementation of this hook should
be aware of that target description may not be available.  Nowadays,
prepare_to_resume is only used to update HW debug registers, and
should be OK because GDBserver shouldn't update HW debug registers
before the inferior stops at the first instruction of the program.
However, in nat/x86-linux.c:lwp_debug_registers_changed, I saw such
comments

  struct arch_lwp_info *info = lwp_arch_private_info (lwp);

  /* NULL means either that this is the main thread still going
     through the shell, or that no watchpoint has been set yet.
     The debug registers are unchanged in either case.  */

I was wondering all the implementations of prepare_to_resume of
different targets should be aware that "thread still going through the
shell"?  I'll test this patch on targets other than x86 (such as arm and
aarch64) and see if it causes fails.
  
Pedro Alves July 24, 2015, 11:52 a.m. UTC | #3
On 07/24/2015 12:11 PM, Yao Qi wrote:
> Pedro Alves <palves@redhat.com> writes:
> 
>> Instead of this whole code block, I think we should be able to skip
>> the bits in the function that require accessing registers.  E.g.,
>> this:
>>
>>  /* Cancel actions that rely on GDB not changing the PC (e.g., the
>>      user used the "jump" command, or "set $pc = foo").  */
>>   if (lwp->stop_pc != get_pc (lwp))
>>     {
>>       /* Collecting 'while-stepping' actions doesn't make sense
>> 	 anymore.  */
>>       release_while_stepping_state_list (thread);
>>     }
>>
>> Could be guarded by:
>>
>>   if (thread->while_stepping != NULL)
>>
>> And this:
>>
>>   if (the_low_target.get_pc != NULL)
>>     {
>>       struct regcache *regcache = get_thread_regcache (current_thread, 1);
>>
>>       lwp->stop_pc = (*the_low_target.get_pc) (regcache);
>>
>>       if (debug_threads)
>> 	{
>> 	  debug_printf ("  %s from pc 0x%lx\n", step ? "step" : "continue",
>> 			(long) lwp->stop_pc);
>> 	}
>>     }
>>
>> could be guarded by:
>>
>>   if (proc->tdesc == NULL)
>>
>> Did you try that?
> 
> To make sure I understand you correctly, is the change below what you suggested?

Yes.

> If yes, I thought about this approach before, but I didn't try that
> because I was worried that we should check every piece of code in
> linux_resume_one_lwp_throw and its callees that don't access registers
> when target description isn't initialised yet.  Especially for
> the_low_target.prepare_to_resume, the implementation of this hook should
> be aware of that target description may not be available.  Nowadays,
> prepare_to_resume is only used to update HW debug registers, and
> should be OK because GDBserver shouldn't update HW debug registers
> before the inferior stops at the first instruction of the program.
> However, in nat/x86-linux.c:lwp_debug_registers_changed, I saw such
> comments
> 
>   struct arch_lwp_info *info = lwp_arch_private_info (lwp);
> 
>   /* NULL means either that this is the main thread still going
>      through the shell, or that no watchpoint has been set yet.
>      The debug registers are unchanged in either case.  */
> 
> I was wondering all the implementations of prepare_to_resume of
> different targets should be aware that "thread still going through the
> shell"?  

Yes, I think they should.  It's what the GDB side used to do already
when that code was x86 gdb only (and hence that shell comment, which
gdbserver doesn't use yet), and then other archs followed suit.
"Going through the shell" is the exact same as going through
the exec wrapper -- we're not interested in debugging the shell,
and it may well have a different bitness/architecture of the target
program we want to debug.

In practice that hook's implementation should want to avoid work if some
flag is not set, to avoid unnecessary ptrace syscalls and thus ends up
not doing anything when going through the shell/exec-wrapper.

> I'll test this patch on targets other than x86 (such as arm and
> aarch64) and see if it causes fails.

Thanks,
Pedro Alves
  

Patch

diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index fa9dc29..10942c8 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -822,6 +822,14 @@  linux_create_inferior (char *program, char **allargs)
   return pid;
 }
 
+/* Implement the arch_setup target_ops method.  */
+
+static void
+linux_arch_setup (void)
+{
+  the_low_target.arch_setup ();
+}
+
 /* Attach to an inferior process.  Returns 0 on success, ERRNO on
    error.  */
 
@@ -2105,23 +2113,35 @@  linux_low_filter_event (int lwpid, int wstat)
     {
       struct process_info *proc;
 
-      /* Architecture-specific setup after inferior is running.  This
-	 needs to happen after we have attached to the inferior and it
-	 is stopped for the first time, but before we access any
-	 inferior registers.  */
+      /* Architecture-specific setup after inferior is running.  */
       proc = find_process_pid (pid_of (thread));
-      if (proc->priv->new_inferior)
+      if (proc->tdesc == NULL)
 	{
-	  struct thread_info *saved_thread;
+	  if (proc->attached)
+	    {
+	      struct thread_info *saved_thread;
 
-	  saved_thread = current_thread;
-	  current_thread = thread;
+	      /* This needs to happen after we have attached to the
+		 inferior and it is stopped for the first time, but
+		 before we access any inferior registers.  */
+	      saved_thread = current_thread;
+	      current_thread = thread;
 
-	  the_low_target.arch_setup ();
+	      the_low_target.arch_setup ();
 
-	  current_thread = saved_thread;
+	      current_thread = saved_thread;
 
-	  proc->priv->new_inferior = 0;
+	      proc->priv->new_inferior = 0;
+	    }
+	  else
+	    {
+	      /* The process is started, but GDBserver will do
+		 architecture-specific setup after the program stops at
+		 the first instruction.  */
+	      child->status_pending_p = 1;
+	      child->status_pending = wstat;
+	      return child;
+	    }
 	}
     }
 
@@ -3651,6 +3671,31 @@  linux_resume_one_lwp_throw (struct lwp_info *lwp,
   struct thread_info *thread = get_lwp_thread (lwp);
   struct thread_info *saved_thread;
   int fast_tp_collecting;
+  struct process_info *proc = get_thread_process (thread);
+
+  if (proc->tdesc == NULL)
+    {
+      /* Target description isn't initialised because the program hasn't
+	 stop at the first instruction.  It means GDBserver skips the
+	 extra traps from the wrapper program (see option --wrapper),
+	 and GDBserver just simply resumes the program without accessing
+	 inferior registers, because target description is unavailable
+	 at this point.  */
+      errno = 0;
+      lwp->stepping = step;
+      ptrace (step ? PTRACE_SINGLESTEP : PTRACE_CONT, lwpid_of (thread),
+	      (PTRACE_TYPE_ARG3) 0,
+	      /* Coerce to a uintptr_t first to avoid potential gcc warning
+		 of coercing an 8 byte integer to a 4 byte pointer.  */
+	      (PTRACE_TYPE_ARG4) (uintptr_t) signal);
+
+      if (errno)
+	perror_with_name ("resuming thread");
+
+      lwp->stopped = 0;
+      lwp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
+      return;
+    }
 
   if (lwp->stopped == 0)
     return;
@@ -4014,6 +4059,12 @@  need_step_over_p (struct inferior_list_entry *entry, void *dummy)
   struct lwp_info *lwp = get_thread_lwp (thread);
   struct thread_info *saved_thread;
   CORE_ADDR pc;
+  struct process_info *proc = get_thread_process (thread);
+
+  /* GDBserver is skipping the extra traps from the wrapper program,
+     don't have to do step over.  */
+  if (proc->tdesc == NULL)
+    return 0;
 
   /* LWPs which will not be resumed are not interesting, because we
      might not wait for them next time through linux_wait.  */
@@ -6635,6 +6686,7 @@  current_lwp_ptid (void)
 
 static struct target_ops linux_target_ops = {
   linux_create_inferior,
+  linux_arch_setup,
   linux_attach,
   linux_kill,
   linux_detach,
diff --git a/gdb/gdbserver/lynx-low.c b/gdb/gdbserver/lynx-low.c
index ee7b28a..5cf03be 100644
--- a/gdb/gdbserver/lynx-low.c
+++ b/gdb/gdbserver/lynx-low.c
@@ -722,6 +722,7 @@  lynx_request_interrupt (void)
 
 static struct target_ops lynx_target_ops = {
   lynx_create_inferior,
+  NULL,  /* arch_setup */
   lynx_attach,
   lynx_kill,
   lynx_detach,
diff --git a/gdb/gdbserver/nto-low.c b/gdb/gdbserver/nto-low.c
index 9276736..19f492f 100644
--- a/gdb/gdbserver/nto-low.c
+++ b/gdb/gdbserver/nto-low.c
@@ -925,6 +925,7 @@  nto_supports_non_stop (void)
 
 static struct target_ops nto_target_ops = {
   nto_create_inferior,
+  NULL,  /* arch_setup */
   nto_attach,
   nto_kill,
   nto_detach,
diff --git a/gdb/gdbserver/server.c b/gdb/gdbserver/server.c
index 497c5fe..1af7389 100644
--- a/gdb/gdbserver/server.c
+++ b/gdb/gdbserver/server.c
@@ -272,6 +272,7 @@  start_inferior (char **argv)
 	    }
 	  while (last_status.value.sig != GDB_SIGNAL_TRAP);
 	}
+      target_arch_setup ();
       return signal_pid;
     }
 
@@ -279,6 +280,8 @@  start_inferior (char **argv)
      (assuming success).  */
   last_ptid = mywait (pid_to_ptid (signal_pid), &last_status, 0, 0);
 
+  target_arch_setup ();
+
   if (last_status.kind != TARGET_WAITKIND_EXITED
       && last_status.kind != TARGET_WAITKIND_SIGNALLED)
     {
diff --git a/gdb/gdbserver/spu-low.c b/gdb/gdbserver/spu-low.c
index a56a889..cbee960 100644
--- a/gdb/gdbserver/spu-low.c
+++ b/gdb/gdbserver/spu-low.c
@@ -638,6 +638,7 @@  spu_request_interrupt (void)
 
 static struct target_ops spu_target_ops = {
   spu_create_inferior,
+  NULL,  /* arch_setup */
   spu_attach,
   spu_kill,
   spu_detach,
diff --git a/gdb/gdbserver/target.h b/gdb/gdbserver/target.h
index 9a40867..fefd8d1 100644
--- a/gdb/gdbserver/target.h
+++ b/gdb/gdbserver/target.h
@@ -74,6 +74,9 @@  struct target_ops
 
   int (*create_inferior) (char *program, char **args);
 
+  /* Architecture-specific setup.  */
+  void (*arch_setup) (void);
+
   /* Attach to a running process.
 
      PID is the process ID to attach to, specified by the user
@@ -445,6 +448,13 @@  void set_target_ops (struct target_ops *);
 #define create_inferior(program, args) \
   (*the_target->create_inferior) (program, args)
 
+#define target_arch_setup()			 \
+  do						 \
+    {						 \
+      if (the_target->arch_setup != NULL)	 \
+	(*the_target->arch_setup) ();		 \
+    } while (0)
+
 #define myattach(pid) \
   (*the_target->attach) (pid)
 
diff --git a/gdb/gdbserver/win32-low.c b/gdb/gdbserver/win32-low.c
index 64caf24..7ccb3dd 100644
--- a/gdb/gdbserver/win32-low.c
+++ b/gdb/gdbserver/win32-low.c
@@ -1785,6 +1785,7 @@  win32_get_tib_address (ptid_t ptid, CORE_ADDR *addr)
 
 static struct target_ops win32_target_ops = {
   win32_create_inferior,
+  NULL,  /* arch_setup */
   win32_attach,
   win32_kill,
   win32_detach,