diff mbox

[v3] Fix failure to detach if threads exit while detaching on linux

Message ID 1464972191-13912-1-git-send-email-antoine.tremblay@ericsson.com
State New
Headers show

Commit Message

Antoine Tremblay June 3, 2016, 4:43 p.m. UTC
Updated patch...

Thanks for looking at this!

Antoine
--

This patches fixes detaching on linux when some threads exit while we're
detaching with GDB and GDBServer.

What happened before is that as GDB/GDBserver would be detaching threads
one thread at a time and allowing them to continue, if one of these
detached threads called exit for example and the other threads were
destroyed GDB/GDBserver would still try and detach these exited threads
and fail with a message like: "Can't detach process." as ptrace could not
execute the operation.

This patch uses check_ptrace_stopped_lwp_gone or
linux_proc_pid_is_trace_stopped_nowarn like is used in the resume case to
avoid an error if this function detects that the ptrace failure is normal
since the thread has exited.

This patch adds the gdb.threads/detach-gone-thread.exp test for this case.

Tested on x86-linux with {unix, native-gdbserver, native-extended-gdbserver}

gdb/gdbserver/ChangeLog:

	* linux-low.c (check_ptrace_stopped_lwp_gone) Move up to be used
	by linux_detach_one_lwp.
	(linux_detach_one_lwp): Report the error only if
	check_ptrace_stopped_lwp_gone is false.

gdb/ChangeLog:

	* inf-ptrace.c (inf_ptrace_detach): Call inf_ptrace_detach_success
	on success.
	(inf_ptrace_detach_success): New function.
	* inf-ptrace.h (inf_ptrace_detach_success): New function declaration.
	* linux-nat.c (check_ptrace_stopped_lwp_gone): Move up to be used
	by detach_callback.
	(detach_callback): Report the error only if
	check_ptrace_stopped_lwp_gone is false.
	(linux_nat_detach): Likewise.

gdb/testsuite/ChangeLog:

	* gdb.threads/detach-gone-thread.c: New file.
	* gdb.threads/detach-gone-thread.exp: New file.
---
 gdb/gdbserver/linux-low.c                        | 73 +++++++++----------
 gdb/inf-ptrace.c                                 | 10 ++-
 gdb/inf-ptrace.h                                 |  4 ++
 gdb/linux-nat.c                                  | 90 ++++++++++++++----------
 gdb/testsuite/gdb.threads/detach-gone-thread.c   | 47 +++++++++++++
 gdb/testsuite/gdb.threads/detach-gone-thread.exp | 59 ++++++++++++++++
 6 files changed, 210 insertions(+), 73 deletions(-)
 create mode 100644 gdb/testsuite/gdb.threads/detach-gone-thread.c
 create mode 100644 gdb/testsuite/gdb.threads/detach-gone-thread.exp
diff mbox

Patch

diff --git a/gdb/gdbserver/linux-low.c b/gdb/gdbserver/linux-low.c
index 81134b0..5f02dab 100644
--- a/gdb/gdbserver/linux-low.c
+++ b/gdb/gdbserver/linux-low.c
@@ -1447,6 +1447,39 @@  get_detach_signal (struct thread_info *thread)
     }
 }
 
+/* Called when we try to resume or detach a stopped LWP and that errors
+   out.  If the LWP is no longer in ptrace-stopped state (meaning it's
+   zombie, or about to become), discard the error, clear any pending
+   status the LWP may have, and return true (we'll collect the exit status
+   soon enough).  Otherwise, return false.  */
+
+static int
+check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
+{
+  struct thread_info *thread = get_lwp_thread (lp);
+
+  /* If we get an error after resuming the LWP successfully, we'd
+     confuse !T state for the LWP being gone.  */
+  gdb_assert (lp->stopped);
+
+  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
+     because even if ptrace failed with ESRCH, the tracee may be "not
+     yet fully dead", but already refusing ptrace requests.  In that
+     case the tracee has 'R (Running)' state for a little bit
+     (observed in Linux 3.18).  See also the note on ESRCH in the
+     ptrace(2) man page.  Instead, check whether the LWP has any state
+     other than ptrace-stopped.  */
+
+  /* Don't assume anything if /proc/PID/status can't be read.  */
+  if (linux_proc_pid_is_trace_stopped_nowarn (lwpid_of (thread)) == 0)
+    {
+      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
+      lp->status_pending_p = 0;
+      return 1;
+    }
+  return 0;
+}
+
 static int
 linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
 {
@@ -1480,9 +1513,10 @@  linux_detach_one_lwp (struct inferior_list_entry *entry, void *args)
     the_low_target.prepare_to_resume (lwp);
   if (ptrace (PTRACE_DETACH, lwpid_of (thread), (PTRACE_TYPE_ARG3) 0,
 	      (PTRACE_TYPE_ARG4) (long) sig) < 0)
-    error (_("Can't detach %s: %s"),
-	   target_pid_to_str (ptid_of (thread)),
-	   strerror (errno));
+    if (!check_ptrace_stopped_lwp_gone (lwp))
+      error (_("Can't detach %s: %s"),
+	     target_pid_to_str (ptid_of (thread)),
+	     strerror (errno));
 
   delete_lwp (lwp);
   return 0;
@@ -4331,39 +4365,6 @@  linux_resume_one_lwp_throw (struct lwp_info *lwp,
   lwp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
 }
 
-/* Called when we try to resume a stopped LWP and that errors out.  If
-   the LWP is no longer in ptrace-stopped state (meaning it's zombie,
-   or about to become), discard the error, clear any pending status
-   the LWP may have, and return true (we'll collect the exit status
-   soon enough).  Otherwise, return false.  */
-
-static int
-check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
-{
-  struct thread_info *thread = get_lwp_thread (lp);
-
-  /* If we get an error after resuming the LWP successfully, we'd
-     confuse !T state for the LWP being gone.  */
-  gdb_assert (lp->stopped);
-
-  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
-     because even if ptrace failed with ESRCH, the tracee may be "not
-     yet fully dead", but already refusing ptrace requests.  In that
-     case the tracee has 'R (Running)' state for a little bit
-     (observed in Linux 3.18).  See also the note on ESRCH in the
-     ptrace(2) man page.  Instead, check whether the LWP has any state
-     other than ptrace-stopped.  */
-
-  /* Don't assume anything if /proc/PID/status can't be read.  */
-  if (linux_proc_pid_is_trace_stopped_nowarn (lwpid_of (thread)) == 0)
-    {
-      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
-      lp->status_pending_p = 0;
-      return 1;
-    }
-  return 0;
-}
-
 /* Like linux_resume_one_lwp_throw, but no error is thrown if the LWP
    disappears while we try to resume it.  */
 
diff --git a/gdb/inf-ptrace.c b/gdb/inf-ptrace.c
index 329d8fb..27f531b 100644
--- a/gdb/inf-ptrace.c
+++ b/gdb/inf-ptrace.c
@@ -265,9 +265,17 @@  inf_ptrace_detach (struct target_ops *ops, const char *args, int from_tty)
   error (_("This system does not support detaching from a process"));
 #endif
 
+  inf_ptrace_detach_success(ops);
+}
+
+/* See inf-ptrace.h.  */
+
+void
+inf_ptrace_detach_success (struct target_ops *ops)
+{
+  pid_t pid = ptid_get_pid (inferior_ptid);
   inferior_ptid = null_ptid;
   detach_inferior (pid);
-
   inf_child_maybe_unpush_target (ops);
 }
 
diff --git a/gdb/inf-ptrace.h b/gdb/inf-ptrace.h
index 0a26720..f1fc111 100644
--- a/gdb/inf-ptrace.h
+++ b/gdb/inf-ptrace.h
@@ -38,4 +38,8 @@  extern struct target_ops *
 
 extern pid_t get_ptrace_pid (ptid_t);
 
+
+/* Cleanup the inferior after a successful ptrace detach.  */
+extern void inf_ptrace_detach_success (struct target_ops *ops);
+
 #endif
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c
index e6d525f..56a222c 100644
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -1382,6 +1382,38 @@  get_pending_status (struct lwp_info *lp, int *status)
   return 0;
 }
 
+/* Called when we try to resume or detach a stopped LWP and that errors
+   out.  If the LWP is no longer in ptrace-stopped state (meaning it's
+   zombie, or about to become), discard the error, clear any pending
+   status the LWP may have, and return true (we'll collect the exit status
+   soon enough).  Otherwise, return false.  */
+
+static int
+check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
+{
+  /* If we get an error after resuming the LWP successfully, we'd
+     confuse !T state for the LWP being gone.  */
+  gdb_assert (lp->stopped);
+
+  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
+     because even if ptrace failed with ESRCH, the tracee may be "not
+     yet fully dead", but already refusing ptrace requests.  In that
+     case the tracee has 'R (Running)' state for a little bit
+     (observed in Linux 3.18).  See also the note on ESRCH in the
+     ptrace(2) man page.  Instead, check whether the LWP has any state
+     other than ptrace-stopped.  */
+
+  /* Don't assume anything if /proc/PID/status can't be read.  */
+  if (linux_proc_pid_is_trace_stopped_nowarn (ptid_get_lwp (lp->ptid)) == 0)
+    {
+      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
+      lp->status = 0;
+      lp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
+      return 1;
+    }
+  return 0;
+}
+
 static int
 detach_callback (struct lwp_info *lp, void *data)
 {
@@ -1418,8 +1450,9 @@  detach_callback (struct lwp_info *lp, void *data)
       errno = 0;
       if (ptrace (PTRACE_DETACH, ptid_get_lwp (lp->ptid), 0,
 		  WSTOPSIG (status)) < 0)
-	error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
-	       safe_strerror (errno));
+	if (!check_ptrace_stopped_lwp_gone (lp))
+	  error (_("Can't detach %s: %s"), target_pid_to_str (lp->ptid),
+		 safe_strerror (errno));
 
       if (debug_linux_nat)
 	fprintf_unfiltered (gdb_stdlog,
@@ -1480,7 +1513,6 @@  linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
 
   if (linux_nat_prepare_to_resume != NULL)
     linux_nat_prepare_to_resume (main_lwp);
-  delete_lwp (main_lwp->ptid);
 
   if (forks_exist_p ())
     {
@@ -1491,7 +1523,25 @@  linux_nat_detach (struct target_ops *ops, const char *args, int from_tty)
       linux_fork_detach (args, from_tty);
     }
   else
-    linux_ops->to_detach (ops, args, from_tty);
+    {
+      TRY
+	{
+	  linux_ops->to_detach (ops, args, from_tty);
+	}
+      CATCH (ex, RETURN_MASK_ERROR)
+	{
+	  if (!check_ptrace_stopped_lwp_gone (main_lwp))
+	    {
+	      throw_exception (ex);
+	    }
+	  /* Ignore the error since the thread is gone already.  */
+	  else
+	    {
+	      inf_ptrace_detach_success(ops);
+	    }
+	}
+    }
+  delete_lwp (main_lwp->ptid);
 }
 
 /* Resume execution of the inferior process.  If STEP is nonzero,
@@ -1531,38 +1581,6 @@  linux_resume_one_lwp_throw (struct lwp_info *lp, int step,
   registers_changed_ptid (lp->ptid);
 }
 
-/* Called when we try to resume a stopped LWP and that errors out.  If
-   the LWP is no longer in ptrace-stopped state (meaning it's zombie,
-   or about to become), discard the error, clear any pending status
-   the LWP may have, and return true (we'll collect the exit status
-   soon enough).  Otherwise, return false.  */
-
-static int
-check_ptrace_stopped_lwp_gone (struct lwp_info *lp)
-{
-  /* If we get an error after resuming the LWP successfully, we'd
-     confuse !T state for the LWP being gone.  */
-  gdb_assert (lp->stopped);
-
-  /* We can't just check whether the LWP is in 'Z (Zombie)' state,
-     because even if ptrace failed with ESRCH, the tracee may be "not
-     yet fully dead", but already refusing ptrace requests.  In that
-     case the tracee has 'R (Running)' state for a little bit
-     (observed in Linux 3.18).  See also the note on ESRCH in the
-     ptrace(2) man page.  Instead, check whether the LWP has any state
-     other than ptrace-stopped.  */
-
-  /* Don't assume anything if /proc/PID/status can't be read.  */
-  if (linux_proc_pid_is_trace_stopped_nowarn (ptid_get_lwp (lp->ptid)) == 0)
-    {
-      lp->stop_reason = TARGET_STOPPED_BY_NO_REASON;
-      lp->status = 0;
-      lp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
-      return 1;
-    }
-  return 0;
-}
-
 /* Like linux_resume_one_lwp_throw, but no error is thrown if the LWP
    disappears while we try to resume it.  */
 
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.c b/gdb/testsuite/gdb.threads/detach-gone-thread.c
new file mode 100644
index 0000000..19f6fc2
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.c
@@ -0,0 +1,47 @@ 
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2016 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <pthread.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+pthread_barrier_t barrier;
+
+#define NTHREADS 256
+
+void *
+child_function (void *arg)
+{
+  pthread_barrier_wait (&barrier);
+  _exit (0);
+}
+
+int
+main ()
+{
+  pthread_t threads[NTHREADS];
+  int res;
+  int i;
+
+  pthread_barrier_init (&barrier, NULL, NTHREADS + 1);
+
+  for (i = 0; i < NTHREADS; i++)
+    res = pthread_create (&threads[i], NULL, child_function, NULL);
+
+  pthread_barrier_wait (&barrier);
+  exit (0);
+}
diff --git a/gdb/testsuite/gdb.threads/detach-gone-thread.exp b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
new file mode 100644
index 0000000..b8caf18
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/detach-gone-thread.exp
@@ -0,0 +1,59 @@ 
+# Copyright 2015-2016 Free Software Foundation, Inc.
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+standard_testfile
+
+if {[gdb_compile_pthreads "${srcdir}/${subdir}/${srcfile}" "${binfile}" executable {debug}] != "" } {
+    return -1
+}
+
+clean_restart ${testfile}
+
+if ![runto_main] {
+    fail "Can't run to main"
+    return -1
+}
+
+proc test_server_exit {} {
+    global server_spawn_id
+    if ![info exists server_spawn_id] {
+	return
+    }
+
+    gdb_expect {
+	-i $server_spawn_id
+	eof {
+	    pass $test
+	    wait -i $server_spawn_id
+	    unset server_spawn_id
+	}
+	timeout {
+	    fail "$test (timeout)"
+	}
+    }
+}
+
+gdb_breakpoint "_exit"
+gdb_continue_to_breakpoint "_exit" ".*_exit.*"
+set test "detach"
+gdb_test_multiple $test $test {
+    -re "Detaching from .*, process $decimal\r\nEnding remote debugging\.\r\n$gdb_prompt $" {
+        # This is what you get with "target remote".
+        pass $test
+	test_server_exit
+    }
+    -re "Detaching from .*, process $decimal\r\n$gdb_prompt $" {
+        pass $test
+    }
+}