diff mbox

[1/2] linux-nat.c: fix a few lin_lwp_attach_lwp issues

Message ID 1423422659-27559-2-git-send-email-palves@redhat.com
State New
Headers show

Commit Message

Pedro Alves Feb. 8, 2015, 7:10 p.m. UTC
This function has a few latent bugs that are triggered by a non-stop
mode test that will be added in a subsequent patch.

First, as described in the function's intro comment, the function is
supposed to return 1 if we're already auto attached to the thread, but
haven't processed the PTRACE_EVENT_CLONE event of its parent thread

Then, we may find that we're trying to attach to a clone child that
hasn't yet stopped for its initial stop, and therefore 'waitpid(...,
WNOHANG)' returns 0.  In that case, we're currently adding the LWP to
the stopped_pids list, which results in linux_handle_extended_wait
skipping the waitpid call on the child, and thus confusing things
later on when the child eventually reports the stop.

Then, the tail end of lin_lwp_attach_lwp always sets the
last_resume_kind of the LWP to resume_stop, which is wrong given that
the user may be doing "info threads" while some threads are running.

And then, the else branch of lin_lwp_attach_lwp always sets the
stopped flag of the LWP.  This branch is reached if the LWP is the
main LWP, which may well be running at this point (to it's wrong to
set its 'stopped' flag).

AFAICS, there's no reason anymore for special-casing the main/leader
LWP here:

- For the "attach" case, linux_nat_attach already adds the main LWP to
the lwp list, and sets its 'stopped' flag.

- For the "run" case, after linux_nat_create_inferior, end up in
linux_nat_wait_1 here:

  /* The first time we get here after starting a new inferior, we may
     not have added it to the LWP list yet - this is the earliest
     moment at which we know its PID.  */
  if (ptid_is_pid (inferior_ptid))
      /* Upgrade the main thread's ptid.  */
      thread_change_ptid (inferior_ptid,
			  ptid_build (ptid_get_pid (inferior_ptid),
				      ptid_get_pid (inferior_ptid), 0));

      lp = add_initial_lwp (inferior_ptid);
      lp->resumed = 1;

... which adds the LWP to the LWP list already, before
lin_lwp_attach_lwp can ever be reached.

2015-02-08  Pedro Alves  <palves@redhat.com>

	* linux-nat.c (lin_lwp_attach_lwp): No longer special case the
	main LWP.  Handle the case of waitpid returning 0 if we're already
	attached to the LWP.  Don't set the LWP's last_resume_kind to
	resume_stop if we already knew about the LWP.
	(linux_nat_filter_event): Add debug logs.
 gdb/linux-nat.c | 60 +++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 21 deletions(-)
diff mbox


diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c
index 169188a..ba62adc 100644
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -994,13 +994,12 @@  lin_lwp_attach_lwp (ptid_t ptid)
   lp = find_lwp_pid (ptid);
   lwpid = ptid_get_lwp (ptid);
-  /* We assume that we're already attached to any LWP that has an id
-     equal to the overall process id, and to any LWP that is already
+  /* We assume that we're already attached to any LWP that is already
      in our list of LWPs.  If we're not seeing exit events from threads
      and we've had PID wraparound since we last tried to stop all threads,
      this assumption might be wrong; fortunately, this is very unlikely
      to happen.  */
-  if (lwpid != ptid_get_pid (ptid) && lp == NULL)
+  if (lp == NULL)
       int status, cloned = 0, signalled = 0;
@@ -1018,23 +1017,50 @@  lin_lwp_attach_lwp (ptid_t ptid)
 		  /* We've already seen this thread stop, but we
 		     haven't seen the PTRACE_EVENT_CLONE extended
 		     event yet.  */
-		  return 0;
+		  if (debug_linux_nat)
+		    fprintf_unfiltered (gdb_stdlog,
+					"LLAL: attach failed, but already seen "
+					"this thread %s stop\n",
+					target_pid_to_str (ptid));
+		  return 1;
 		  int new_pid;
 		  int status;
-		  /* See if we've got a stop for this new child
-		     pending.  If so, we're already attached.  */
+		  if (debug_linux_nat)
+		    fprintf_unfiltered (gdb_stdlog,
+					"LLAL: attach failed, and haven't seen "
+					"this thread %s stop yet\n",
+					target_pid_to_str (ptid));
+		  /* We may or may not be attached to the LWP already.
+		     Try waitpid on it.  If that errors, we're not
+		     attached to the LWP yet.  Otherwise, we're
+		     already attached.  */
 		  gdb_assert (lwpid > 0);
 		  new_pid = my_waitpid (lwpid, &status, WNOHANG);
 		  if (new_pid == -1 && errno == ECHILD)
 		    new_pid = my_waitpid (lwpid, &status, __WCLONE | WNOHANG);
 		  if (new_pid != -1)
-		      if (WIFSTOPPED (status))
-			add_to_pid_list (&stopped_pids, lwpid, status);
+		      if (new_pid == 0)
+			{
+			  /* The child hasn't stopped for its initial
+			     SIGSTOP stop yet.  */
+			  if (debug_linux_nat)
+			    fprintf_unfiltered (gdb_stdlog,
+						"LLAL: child hasn't "
+						"stopped yet\n");
+			}
+		      else if (WIFSTOPPED (status))
+			{
+			  if (debug_linux_nat)
+			    fprintf_unfiltered (gdb_stdlog,
+						"LLAL: adding to stopped_pids\n");
+			  add_to_pid_list (&stopped_pids, lwpid, status);
+			}
 		      return 1;
@@ -1061,6 +1087,7 @@  lin_lwp_attach_lwp (ptid_t ptid)
       lp = add_lwp (ptid);
       lp->stopped = 1;
+      lp->last_resume_kind = resume_stop;
       lp->cloned = cloned;
       lp->signalled = signalled;
       if (WSTOPSIG (status) != SIGSTOP)
@@ -1079,20 +1106,7 @@  lin_lwp_attach_lwp (ptid_t ptid)
 			      status_to_str (status));
-  else
-    {
-      /* We assume that the LWP representing the original process is
-         already stopped.  Mark it as stopped in the data structure
-         that the GNU/linux ptrace layer uses to keep track of
-         threads.  Note that this won't have already been done since
-         the main thread will have, we assume, been stopped by an
-         attach from a different layer.  */
-      if (lp == NULL)
-	lp = add_lwp (ptid);
-      lp->stopped = 1;
-    }
-  lp->last_resume_kind = resume_stop;
   return 0;
@@ -2855,6 +2869,10 @@  linux_nat_filter_event (int lwpid, int status)
   if (WIFSTOPPED (status) && !lp)
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog,
+			    "LHEW: saving LWP %ld status %s in stopped_pids list\n",
+			    (long) lwpid, status_to_str (status));
       add_to_pid_list (&stopped_pids, lwpid, status);
       return NULL;