[v7,6/7] Remote fork catch

Message ID 1428685786-18094-7-git-send-email-donb@codesourcery.com
State New, archived
Headers

Commit Message

Don Breazeal April 10, 2015, 5:09 p.m. UTC
  Hi Pedro,
This version of the patch incorporates changes based on your comments on
the previous version, as outlined below.

On 3/24/2015 5:47 AM, Pedro Alves wrote:
> On 03/17/2015 08:56 PM, Don Breazeal wrote:
> 
>> diff --git a/gdb/gdbserver/server.c b/gdb/gdbserver/server.c
>> index 8fa6f8a..346f2c4 100644
>> --- a/gdb/gdbserver/server.c
>> +++ b/gdb/gdbserver/server.c
>> @@ -1356,6 +1356,15 @@ handle_qxfer_threads_worker (struct inferior_list_entry *inf, void *arg)
>>    int core = target_core_of_thread (ptid);
>>    char core_s[21];
>>  
>> +  /* Skip new threads created as the result of a fork if we are not done
>> +     handling that fork event.  We won't know whether to tell GDB about
>> +     the new thread until we are done following the fork.  */
>> +  if ((last_status.kind == TARGET_WAITKIND_FORKED
>> +       || last_status.kind == TARGET_WAITKIND_VFORKED)
>> +      && (ptid_get_pid (last_status.value.related_pid)
>> +	  == ptid_get_pid (ptid)))
>> +    return;
> 
> This use of last_status here is really just as bad as
> get_last_target_status, for the same reasons.  What if a thread
> forks at the same time another thread hits a breakpoint, and
> we end up reporting the breakpoint first, leaving the fork
> pending?  Sounds like we'll end up listing the child fork
> thread then.

I moved this operation (removing the new, unreported thread from the list
reported by the target) to the host side in remote.c:remove_new_fork_child,
called from remote.c:remote_update_thread_list.

> 
>> +
>>    write_ptid (ptid_s, ptid);
>>  
>>    if (core != -1)
>> @@ -4144,3 +4153,12 @@ handle_target_event (int err, gdb_client_data client_data)
>>  
>>    return 0;
>>  }
>> +
>> +/* Retrieve the last waitstatus reported to GDB.  */
>> +
>> +void
>> +get_last_target_status (ptid_t *ptid, struct target_waitstatus *last)
>> +{
>> +  *ptid = last_ptid;
>> +  *last = last_status;
>> +}
> 
> Looks like you forgot to delete the function.  :-)

Sorry for the sloppiness.

> 
>> diff --git a/gdb/gdbserver/server.h b/gdb/gdbserver/server.h
>> index 09a5624..8c6ec27 100644
>> --- a/gdb/gdbserver/server.h
>> +++ b/gdb/gdbserver/server.h
>> @@ -113,6 +113,8 @@ typedef int gdb_fildes_t;
>>  /* Functions from server.c.  */
>>  extern int handle_serial_event (int err, gdb_client_data client_data);
>>  extern int handle_target_event (int err, gdb_client_data client_data);
>> +extern void get_last_target_status (ptid_t *ptid,
>> +				    struct target_waitstatus *last);
>>  
>>  #include "remote-utils.h"
>>  
>> diff --git a/gdb/remote.c b/gdb/remote.c
>> index d1ba62d..44ee89f 100644
>> --- a/gdb/remote.c
>> +++ b/gdb/remote.c
> 
> 
>> @@ -8012,6 +8060,23 @@ extended_remote_kill (struct target_ops *ops)
>>    int res;
>>    int pid = ptid_get_pid (inferior_ptid);
>>    struct remote_state *rs = get_remote_state ();
>> +  struct thread_info *tp = find_thread_ptid (inferior_ptid);
>> +
>> +  /* If we're stopped while forking and we haven't followed yet,
>> +     kill the child task.  We need to do this first because the
>> +     parent will be sleeping if this is a vfork.  */
>> +
>> +  if (tp != NULL && (tp->pending_follow.kind == TARGET_WAITKIND_FORKED
>> +		     || tp->pending_follow.kind == TARGET_WAITKIND_VFORKED))
> 
> Looks like this will miss killing the child if the user switches to
> some thread other than the one that forked, in case it was a multi-threaded
> program that forked.

Fixed, using a similar method as for removing the new unreported thread
in remote_update_thread_list above.

> 
>> +    {
>> +      ptid_t parent_ptid = inferior_ptid;
>> +
>> +      inferior_ptid = tp->pending_follow.value.related_pid;
>> +      set_general_thread (inferior_ptid);
>> +      extended_remote_kill (ops);
>> +      inferior_ptid = parent_ptid;
>> +      set_general_thread (inferior_ptid);
> 
> We never want to use the 'k' packet here, so this could
> just simply be:
> 
>          int child_pid = tp->pending_follow.value.related_pid;
>          remote_vkill (child_pid);

I made this change, thanks.

> 
>> +    }
>>  
>>    res = remote_vkill (pid, rs);
>>    if (res == -1 && !(rs->extended && remote_multi_process_p (rs)))
>> @@ -12036,6 +12101,14 @@ Specify the serial device it is connected to (e.g. /dev/ttya).";
>>    extended_remote_ops.to_supports_disable_randomization
>>      = extended_remote_supports_disable_randomization;
>>    extended_remote_ops.to_follow_fork = remote_follow_fork;
>> +  extended_remote_ops.to_insert_fork_catchpoint
>> +    = remote_insert_fork_catchpoint;
>> +  extended_remote_ops.to_remove_fork_catchpoint
>> +    = remote_remove_fork_catchpoint;
>> +  extended_remote_ops.to_insert_vfork_catchpoint
>> +    = remote_insert_vfork_catchpoint;
>> +  extended_remote_ops.to_remove_vfork_catchpoint
>> +    = remote_remove_vfork_catchpoint;
>>  }
>>  
>>  static int
>> diff --git a/gdb/testsuite/gdb.threads/fork-thread-pending.exp b/gdb/testsuite/gdb.threads/fork-thread-pending.exp
>> index d229232..594f376 100644
>> --- a/gdb/testsuite/gdb.threads/fork-thread-pending.exp
>> +++ b/gdb/testsuite/gdb.threads/fork-thread-pending.exp
>> @@ -31,6 +31,26 @@ if {[gdb_compile_pthreads "${srcdir}/${subdir}/${srcfile}" "${binfile}" executab
>>      return -1
>>  }
>>  
>> +# Find a thread that did not fork and is not the main thread and
>> +# return its thread number.  We can't just hard-code the thread
>> +# number since we have no guarantee as to the ordering of the threads
>> +# in gdb.  
> 
> I don't understand this -- the test runs to main first, so the main
> thread should always be thread 1, no?
> 

I can no longer reproduce the thread ordering problem that I was seeing
when I implemented this.  Not sure why...my notes imply it might have
something to do with 'target remote', but I'm unsure at this point.  
At any rate this test doesn't need to be changed for this patch, so
I've removed it from the patch.

> 
>> We know that the main thread is in pthread_join and the
>> +# forking thread is in fork, so we use this rather ungainly regexp
>> +# to capture an entry from 'info threads' that doesn't show one of
>> +# those routines, then extract the thread number.
>> +
>> +proc find_unforked_thread { } {
>> +    gdb_test_multiple "info threads" "find unforked thread" {
>> +        -re "(\[^\r]*Thread\[^\r]* in \[^fp]\[^ot]\[^rh]\[^kr]\[^e]\[^a]\[^d]\[^_]\[^j]\[^\r]*\r\n)" {
>> +    	    regexp "(\[ 	]*)(\[0-9]*)(\[    ]*Thread\[^\r]*\r\n)" $expect_out(0,string) ignore lead_spc threadnum rest
>> +        }
>> +        timeout {
>> +	    set threadnum -1
>> +        }
>> +    }
>> +    return $threadnum
>> +}
>> +
>>  clean_restart ${binfile}
>>  
>>  if ![runto_main] then {
>> @@ -46,7 +66,8 @@ gdb_test "continue" "Catchpoint.*" "1, get to the fork event"
>>  
>>  gdb_test "info threads" " Thread .* Thread .* Thread .* Thread .*" "1, multiple threads found"
>>  
>> -gdb_test "thread 1" ".*" "1, switched away from event thread"
>> +set threadnum [find_unforked_thread]
>> +gdb_test "thread $threadnum" ".*" "1, switched away from event thread to thread $threadnum"
>>  
>>  gdb_test "continue" "Not resuming.*" "1, refused to resume"
>>  
>>
> 
> Thanks,
> Pedro Alves
> 

BTW, I still intend to submit a patch that removes the need to use 
get_last_target_status in linux-nat.c:linux_nat_kill, along with a test
for that scenario.

Thanks
--Don

This patch implements catchpoints for fork events on extended-remote
Linux targets.

Implementation appeared to be straightforward, requiring four new functions
in remote.c to implement insert/remove of fork/vfork catchpoints.  These
functions are essentially stubs that just return 0 ('success') if the
required features are enabled.  If the fork events are being reported, then
catchpoints are set and hit.

However, there are some extra issues that arise with catchpoints.

1) Thread creation reporting -- fork catchpoints are hit before the
   follow_fork has been completed.  When stopped at a fork catchpoint
   in the native implementation, the new process is not 'reported'
   until after the follow is done.  It doesn't show up in the inferiors
   list or the threads list.  However, in the gdbserver case, an
   'info threads' while stopped at a fork catchpoint will retrieve the
   new thread info from the target and add it to GDB's data structures,
   prior to the follow operations.  Because of this premature report,
   things on the GDB side eventually get very confused.

   So in remote.c:remote_update_thread_list, we check to see if there
   are any pending fork parent threads.  If there are we remove the
   related fork child thread from the thread list sent by the target.

2) Kill process before fork is followed -- on the native side in
   linux-nat.c:linux_nat_kill, there is some code to handle the case where
   a fork has occurred but follow_fork hasn't been called yet.  It does
   this by using the last status to determine if a follow is pending, and
   if it is, to kill the child task.  The use of last_status is fragile
   in situations like non-stop mode where other events may have occurred
   after the fork event.  This patch identifies a fork parent
   in remote.c:extended_remote_kill in a way similar to that used in
   thread creation reporting above.  If one is found, it kills the new
   child as well.

Tested on x64 Ubuntu Lucid, native, remote, extended-remote.  Tested the
case of killing the forking process before the fork has been followed
manually.

Thanks
--Don

gdb/
2015-04-08  Don Breazeal  <donb@codesourcery.com>
	* remote.c (remote_insert_fork_catchpoint): New function.
	(remote_remove_fork_catchpoint): New function.
	(remote_insert_vfork_catchpoint): New function.
	(remote_remove_vfork_catchpoint): New function.
	(pending_fork_parent_callback): New function.
	(remove_new_fork_child): New function.
	(remote_update_thread_list): Call remove_new_fork_child.
	(extended_remote_kill): Kill fork child when killing the
	parent before follow_fork completes.
	(init_extended_remote_ops): Initialize target vector with
	new fork catchpoint functions.


---
 gdb/remote.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
  

Comments

Pedro Alves April 15, 2015, 3:39 p.m. UTC | #1
On 04/10/2015 06:09 PM, Don Breazeal wrote:
> Hi Pedro,
> This version of the patch incorporates changes based on your comments on
> the previous version, as outlined below.
> 
> On 3/24/2015 5:47 AM, Pedro Alves wrote:
>> On 03/17/2015 08:56 PM, Don Breazeal wrote:
>>
>>> diff --git a/gdb/gdbserver/server.c b/gdb/gdbserver/server.c
>>> index 8fa6f8a..346f2c4 100644
>>> --- a/gdb/gdbserver/server.c
>>> +++ b/gdb/gdbserver/server.c
>>> @@ -1356,6 +1356,15 @@ handle_qxfer_threads_worker (struct inferior_list_entry *inf, void *arg)
>>>    int core = target_core_of_thread (ptid);
>>>    char core_s[21];
>>>  
>>> +  /* Skip new threads created as the result of a fork if we are not done
>>> +     handling that fork event.  We won't know whether to tell GDB about
>>> +     the new thread until we are done following the fork.  */
>>> +  if ((last_status.kind == TARGET_WAITKIND_FORKED
>>> +       || last_status.kind == TARGET_WAITKIND_VFORKED)
>>> +      && (ptid_get_pid (last_status.value.related_pid)
>>> +	  == ptid_get_pid (ptid)))
>>> +    return;
>>
>> This use of last_status here is really just as bad as
>> get_last_target_status, for the same reasons.  What if a thread
>> forks at the same time another thread hits a breakpoint, and
>> we end up reporting the breakpoint first, leaving the fork
>> pending?  Sounds like we'll end up listing the child fork
>> thread then.
> 
> I moved this operation (removing the new, unreported thread from the list
> reported by the target) to the host side in remote.c:remove_new_fork_child,
> called from remote.c:remote_update_thread_list.

Agreed, I think that's the right thing to do.  This gives us the
most flexibility to change the follow-fork model in core gdb.

Also, if you disconnect while stopped at an unfollowed fork, and then
reconnect, it's not clear to me whether the child should be
hidden from the user until the next continue then.  Again, leaving it
up to the client gives us the flexibility either way.

>>> diff --git a/gdb/testsuite/gdb.threads/fork-thread-pending.exp b/gdb/testsuite/gdb.threads/fork-thread-pending.exp
>>> index d229232..594f376 100644
>>> --- a/gdb/testsuite/gdb.threads/fork-thread-pending.exp
>>> +++ b/gdb/testsuite/gdb.threads/fork-thread-pending.exp
>>> @@ -31,6 +31,26 @@ if {[gdb_compile_pthreads "${srcdir}/${subdir}/${srcfile}" "${binfile}" executab
>>>      return -1
>>>  }
>>>  
>>> +# Find a thread that did not fork and is not the main thread and
>>> +# return its thread number.  We can't just hard-code the thread
>>> +# number since we have no guarantee as to the ordering of the threads
>>> +# in gdb.  
>>
>> I don't understand this -- the test runs to main first, so the main
>> thread should always be thread 1, no?
>>
> 
> I can no longer reproduce the thread ordering problem that I was seeing
> when I implemented this.  Not sure why...my notes imply it might have
> something to do with 'target remote', but I'm unsure at this point.  

Maybe it was necessary before:

  https://sourceware.org/ml/gdb-patches/2014-09/msg00734.html

> At any rate this test doesn't need to be changed for this patch, so
> I've removed it from the patch.


> 
> BTW, I still intend to submit a patch that removes the need to use 
> get_last_target_status in linux-nat.c:linux_nat_kill, along with a test
> for that scenario.

That'd be great!


> +/* Determine if THREAD is a pending fork parent thread.  ARG contains
> +   the pid of the process who's threads we want to check, or -1 if
> +   we want to check all threads.  */
> +
> +static int
> +pending_fork_parent_callback (struct thread_info *thread, void *arg)
> +{
> +  int pid = *(int *) arg;
> +
> +  if (thread->pending_follow.kind == TARGET_WAITKIND_FORKED
> +      || thread->pending_follow.kind == TARGET_WAITKIND_VFORKED)
> +    {
> +      if ((pid == -1) || (pid == ptid_get_pid (thread->ptid)))

Unnecessary parens:

      if (pid == -1 || pid == ptid_get_pid (thread->ptid))


> +	return 1;
> +    }
> +
> +  return 0;
> +}
> +
> +/* If CONTEXT contains any fork child threads that have not been
> +   reported yet, remove them from the CONTEXT list.  If such a
> +   thread exists it is because we are stopped at a fork catchpoint
> +   and have not yet called follow_fork, which will set up the
> +   host-side data structures for the new process.  */
> +
> +static void
> +remove_new_fork_child (struct threads_listing_context *context)
> +{
> +  struct thread_info * thread;
> +  int pid = -1;
> +
> +  /* Check to see if there is an in-progress fork parent.  */
> +  thread = iterate_over_threads (pending_fork_parent_callback, &pid);
> +  if (thread != NULL)

In non-stop mode, if you're debugging multiple process, multiple
processes can fork at the same, and then we end up with multiple
threads with an in-progress fork parent.  So this needs to walk
the whole thread list, not just stop at the first.  Either
use ALL_NON_EXITED_THREADS, or move the loop below to
pending_fork_parent_callback (or to a helper function called
by that).

> +    {
> +      ptid_t child_ptid = thread->pending_follow.value.related_pid;
> +      struct thread_item *item;
> +      int i;
> +
> +      for (i = 0; VEC_iterate (thread_item_t, context->items, i, item); ++i)
> +	{
> +	  if (ptid_equal (item->ptid, child_ptid))
> +	    {
> +	      VEC_ordered_remove (thread_item_t, context->items, i);
> +	      break;
> +	    }
> +	}
> +    }
> +}
> +
>  /* Implement the to_update_thread_list function for the remote
>     targets.  */
>  
> @@ -2874,6 +2964,10 @@ remote_update_thread_list (struct target_ops *ops)
>  	    }
>          }
>  
> +      /* Remove any unreported fork child from CONTEXT so that
> +	 we don't interfere with follow fork.  */
> +      remove_new_fork_child (&context);

I think there's a race here, in non-stop mode.  Consider:

 #1 - process forks just before gdb starts fetching the remote thread
      list.
 #2 - gdbserver adds the fork child  its thread list.
 #3 - gdbserver queues the fork event, sends vStopped notification
 #4 - gdb/remote_update_thread_list pulls the thread list
 #5 - we're now in remove_new_fork_child, but we don't know
      about the fork event yet.  It's still pending in the vStopped
      queue.

So I think that we need to make remote_update_thread_list do,
in this order:

 #1 - fetch the remote thread list
 #2 - fetch the pending vStopped notifications
        (remote_notif_get_pending_events)
 #3 - call remove_new_fork_child
 #4 - add threads we don't know about yet to our list.

and make remove_new_fork_child also peek at the
pending vStopped events queue (and in the future at
any other layers of pending events in the core side.)

> +      child_pid = ptid_get_pid (thread->pending_follow.value.related_pid);
> +      res = remote_vkill (child_pid, rs);
> +      if (res != 0)
> +	error (_("Can't kill fork child process"));

It'll probably be good to include the PID in the error message.

> +    }

Thanks,
Pedro Alves
  

Patch

diff --git a/gdb/remote.c b/gdb/remote.c
index 4e65092..224a520 100644
--- a/gdb/remote.c
+++ b/gdb/remote.c
@@ -1477,6 +1477,46 @@  remote_vfork_event_p (struct remote_state *rs)
   return packet_support (PACKET_vfork_event_feature) == PACKET_ENABLE;
 }
 
+/* Insert fork catchpoint target routine.  If fork events are enabled
+   then return success, nothing more to do.  */
+
+static int
+remote_insert_fork_catchpoint (struct target_ops *ops, int pid)
+{
+  struct remote_state *rs = get_remote_state ();
+
+  return !remote_fork_event_p (rs);
+}
+
+/* Remove fork catchpoint target routine.  Nothing to do, just
+   return success.  */
+
+static int
+remote_remove_fork_catchpoint (struct target_ops *ops, int pid)
+{
+  return 0;
+}
+
+/* Insert vfork catchpoint target routine.  If vfork events are enabled
+   then return success, nothing more to do.  */
+
+static int
+remote_insert_vfork_catchpoint (struct target_ops *ops, int pid)
+{
+  struct remote_state *rs = get_remote_state ();
+
+  return !remote_vfork_event_p (rs);
+}
+
+/* Remove vfork catchpoint target routine.  Nothing to do, just
+   return success.  */
+
+static int
+remote_remove_vfork_catchpoint (struct target_ops *ops, int pid)
+{
+  return 0;
+}
+
 /* Tokens for use by the asynchronous signal handlers for SIGINT.  */
 static struct async_signal_handler *async_sigint_remote_twice_token;
 static struct async_signal_handler *async_sigint_remote_token;
@@ -2815,6 +2855,56 @@  remote_get_threads_with_qthreadinfo (struct target_ops *ops,
   return 0;
 }
 
+/* Determine if THREAD is a pending fork parent thread.  ARG contains
+   the pid of the process who's threads we want to check, or -1 if
+   we want to check all threads.  */
+
+static int
+pending_fork_parent_callback (struct thread_info *thread, void *arg)
+{
+  int pid = *(int *) arg;
+
+  if (thread->pending_follow.kind == TARGET_WAITKIND_FORKED
+      || thread->pending_follow.kind == TARGET_WAITKIND_VFORKED)
+    {
+      if ((pid == -1) || (pid == ptid_get_pid (thread->ptid)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* If CONTEXT contains any fork child threads that have not been
+   reported yet, remove them from the CONTEXT list.  If such a
+   thread exists it is because we are stopped at a fork catchpoint
+   and have not yet called follow_fork, which will set up the
+   host-side data structures for the new process.  */
+
+static void
+remove_new_fork_child (struct threads_listing_context *context)
+{
+  struct thread_info * thread;
+  int pid = -1;
+
+  /* Check to see if there is an in-progress fork parent.  */
+  thread = iterate_over_threads (pending_fork_parent_callback, &pid);
+  if (thread != NULL)
+    {
+      ptid_t child_ptid = thread->pending_follow.value.related_pid;
+      struct thread_item *item;
+      int i;
+
+      for (i = 0; VEC_iterate (thread_item_t, context->items, i, item); ++i)
+	{
+	  if (ptid_equal (item->ptid, child_ptid))
+	    {
+	      VEC_ordered_remove (thread_item_t, context->items, i);
+	      break;
+	    }
+	}
+    }
+}
+
 /* Implement the to_update_thread_list function for the remote
    targets.  */
 
@@ -2874,6 +2964,10 @@  remote_update_thread_list (struct target_ops *ops)
 	    }
         }
 
+      /* Remove any unreported fork child from CONTEXT so that
+	 we don't interfere with follow fork.  */
+      remove_new_fork_child (&context);
+
       /* And now add threads we don't know about yet to our list.  */
       for (i = 0;
 	   VEC_iterate (thread_item_t, context.items, i, item);
@@ -7998,6 +8092,21 @@  extended_remote_kill (struct target_ops *ops)
   int res;
   int pid = ptid_get_pid (inferior_ptid);
   struct remote_state *rs = get_remote_state ();
+  struct thread_info *thread;
+
+  /* If we're stopped while forking and we haven't followed yet,
+     kill the child task.  We need to do this first because the
+     parent will be sleeping if this is a vfork.  */
+  thread = iterate_over_threads (pending_fork_parent_callback, &pid);
+  if (thread != NULL)
+    {
+      int child_pid;
+
+      child_pid = ptid_get_pid (thread->pending_follow.value.related_pid);
+      res = remote_vkill (child_pid, rs);
+      if (res != 0)
+	error (_("Can't kill fork child process"));
+    }
 
   res = remote_vkill (pid, rs);
   if (res == -1 && !(rs->extended && remote_multi_process_p (rs)))
@@ -11913,6 +12022,14 @@  Specify the serial device it is connected to (e.g. /dev/ttya).";
   extended_remote_ops.to_supports_disable_randomization
     = extended_remote_supports_disable_randomization;
   extended_remote_ops.to_follow_fork = remote_follow_fork;
+  extended_remote_ops.to_insert_fork_catchpoint
+    = remote_insert_fork_catchpoint;
+  extended_remote_ops.to_remove_fork_catchpoint
+    = remote_remove_fork_catchpoint;
+  extended_remote_ops.to_insert_vfork_catchpoint
+    = remote_insert_vfork_catchpoint;
+  extended_remote_ops.to_remove_vfork_catchpoint
+    = remote_remove_vfork_catchpoint;
 }
 
 static int