[review,v4,gdb/threads] Fix hang in stop_all_threads after killing inferior

Message ID 20200205195258.5763C2816C@gnutoolchain-gerrit.osci.io
State New, archived
Headers

Commit Message

Simon Marchi (Code Review) Feb. 5, 2020, 7:52 p.m. UTC
  Change URL: https://gnutoolchain-gerrit.osci.io/r/c/binutils-gdb/+/759
......................................................................

[gdb/threads] Fix hang in stop_all_threads after killing inferior

Consider a two-threaded testcase a.out, sleeping in both its threads:
...
$ gdb -ex r --args a.out
Reading symbols from a.out...
Starting program: /data/gdb_versions/devel/a.out
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
[New Thread 0x7ffff77fe700 (LWP 31268)]
...

Typing ^C causes stop_all_threads to be executed, and if an external SIGKILL
(such as caused by killall -9 a.out) arrives at the start of stop_all_threads,
gdb hangs in stop_all_threads after giving this warning:
...
warning: unable to open /proc file '/proc/24938/status'
...

Using "set debug infrun 1" we can see in more detail where we hang:
...
infrun: stop_all_threads
infrun: stop_all_threads, pass=0, iterations=0
infrun:   Thread 0x7ffff7fa6740 (LWP 10264) not executing
infrun:   Thread 0x7ffff77fe700 (LWP 10268) executing, need stop
infrun: target_wait (-1.0.0, status) =
infrun:   10264.10268.0 [Thread 0x7ffff77fe700 (LWP 10268)],
infrun:   status->kind = signalled, signal = GDB_SIGNAL_KILL
infrun: stop_all_threads status->kind = signalled, signal = GDB_SIGNAL_KILL \
  Thread 0x7ffff77fe700 (LWP 10268)
infrun:   Thread 0x7ffff7fa6740 (LWP 10264) not executing
infrun:   Thread 0x7ffff77fe700 (LWP 10268) executing, already stopping
warning: unable to open /proc file '/proc/10264/status'
infrun: target_wait (-1.0.0, status) =
infrun:   -1.0.0 [process -1],
infrun:   status->kind = no-resumed
infrun: infrun_async(0)
infrun: stop_all_threads status->kind = no-resumed process -1
infrun:   Thread 0x7ffff7fa6740 (LWP 10264) not executing
infrun:   Thread 0x7ffff77fe700 (LWP 10268) executing, already stopping
infrun: stop_all_threads status->kind = no-resumed process -1
infrun:   Thread 0x7ffff7fa6740 (LWP 10264) not executing
infrun:   Thread 0x7ffff77fe700 (LWP 10268) executing, already stopping
infrun: stop_all_threads status->kind = no-resumed process -1
infrun:   Thread 0x7ffff7fa6740 (LWP 10264) not executing
infrun:   Thread 0x7ffff77fe700 (LWP 10268) executing, already stopping
<repeat>
......

So, we're hanging in the 'while (1)' loop in stop_all_threads as follows:
- thread t is tested, and both t->executing and t->stop_requested are found
  to be 1 (noted with 'executing, already stopping')
- consequently need_wait is set 1
- consequently wait_one is executed
- wait_one returns a TARGET_WAITKIND_NO_RESUMED event, which is handled by
  continuing at the start of the loop

The loop actually starts with update_thread_list (), but that doesn't seem
to change the state of the threads.

Fix the hang by:
- detecting the first sign of trouble: the TARGET_WAITKIND_SIGNALLED event
  with signal GDB_SIGNAL_KILL,
- making that event pending again,
- making sure the corresponding thread will not set need_wait again
  (by setting t->executing == 0)
- making sure that the corresponding thread keeps t->resumed == 1 in the
  the all_non_exited_threads loop

This results in the ^C being handled without showing the user that the
test-case was killed:
...
^C
Thread 1 received signal SIGINT, Interrupt.
0x00007ffff78c50f0 in nanosleep () from /lib64/libc.so.6
(gdb)
...

But a subsequent continue does show that:
...
(gdb) c
Continuing.

Program terminated with signal SIGKILL, Killed.
The program no longer exists.
(gdb)
....

Build and reg-tested on x86_64-linux.

gdb/ChangeLog:

2020-01-29  Tom de Vries  <tdevries@suse.de>

	PR threads/25478
	* infrun.c (stop_all_threads): Detecting event
	TARGET_WAITKIND_SIGNALLED with signal GDB_SIGNAL_KILL, make event
	pending again, set t->executing to 0 and keep t->resumed set to 1.

gdb/testsuite/ChangeLog:

2020-02-05  Tom de Vries  <tdevries@suse.de>

	* gdb.threads/kill-in-stop-all-threads.c: New test.
	* gdb.threads/kill-in-stop-all-threads.exp: New file.

Change-Id: Ibe1f29251fe2ff1c1991f041babbe18373c113b1
---
M gdb/infrun.c
A gdb/testsuite/gdb.threads/kill-in-stop-all-threads.c
A gdb/testsuite/gdb.threads/kill-in-stop-all-threads.exp
3 files changed, 182 insertions(+), 2 deletions(-)
  

Patch

diff --git a/gdb/infrun.c b/gdb/infrun.c
index 3e846f8..a722da5 100644
--- a/gdb/infrun.c
+++ b/gdb/infrun.c
@@ -4749,7 +4749,12 @@ 
 
 		  /* The thread may be not executing, but still be
 		     resumed with a pending status to process.  */
-		  t->resumed = false;
+		  if (t->suspend.waitstatus.kind == TARGET_WAITKIND_SIGNALLED
+		      && t->suspend.waitstatus.value.sig == GDB_SIGNAL_KILL
+		      && t->suspend.waitstatus_pending_p)
+		    ;
+		  else
+		    t->resumed = false;
 		}
 	    }
 
@@ -4772,7 +4777,15 @@ 
 				  target_pid_to_str (event.ptid).c_str ());
 	    }
 
-	  if (event.ws.kind == TARGET_WAITKIND_NO_RESUMED
+	  if (event.ws.kind == TARGET_WAITKIND_SIGNALLED
+	      && event.ws.value.sig == GDB_SIGNAL_KILL)
+	    {
+	      thread_info *t = find_thread_ptid (event.target, event.ptid);
+	      save_waitstatus (t, &event.ws);
+	      t->resumed = true;
+	      t->executing = false;
+	    }
+	  else if (event.ws.kind == TARGET_WAITKIND_NO_RESUMED
 	      || event.ws.kind == TARGET_WAITKIND_THREAD_EXITED
 	      || event.ws.kind == TARGET_WAITKIND_EXITED
 	      || event.ws.kind == TARGET_WAITKIND_SIGNALLED)
diff --git a/gdb/testsuite/gdb.threads/kill-in-stop-all-threads.c b/gdb/testsuite/gdb.threads/kill-in-stop-all-threads.c
new file mode 100644
index 0000000..cddbd4d
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/kill-in-stop-all-threads.c
@@ -0,0 +1,42 @@ 
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2020 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <pthread.h>
+#include <unistd.h>
+#include <signal.h>
+
+static void *
+fun (void *dummy)
+{
+  raise (SIGINT);
+  while (1)
+    sleep (1);
+
+  return NULL;
+}
+
+int
+main (void)
+{
+  pthread_t thread;
+  pthread_create (&thread, NULL, fun, NULL);
+
+  while (1)
+    sleep (1);
+
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.threads/kill-in-stop-all-threads.exp b/gdb/testsuite/gdb.threads/kill-in-stop-all-threads.exp
new file mode 100644
index 0000000..2df509f
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/kill-in-stop-all-threads.exp
@@ -0,0 +1,125 @@ 
+# Copyright 2020 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+# This test-case starts gdb with a $binfile inferior, which sends a SIGINT to
+# itself.  Then when gdb handles the SIGINT and starts executing
+# stop_all_threads, the inferior is killed by a SIGKILL.
+#
+# In order to time the SIGKILL, the gdb itself is run using another gdb, which
+# allows us to set a breakpoint on stop_all_threads.  So, we have the following
+# hierarchy:
+# - master gdb
+# - inferior gdb
+# - $binfile
+#
+# This setup make this a non-standard test-case.
+#
+
+standard_testfile
+
+if {[build_executable "failed to build" $testfile $srcfile \
+	 {debug pthreads}] == -1} {
+    return -1
+}
+
+# Setup master gdb, with inferior gdb as executable.
+clean_restart $GDB
+
+# Set master gdb to have a different prompt, to make it easier to distinguish
+# between prompts of master gdb and inferior gdb.
+set saved_gdb_prompt $gdb_prompt
+set master_gdb_prompt "\\(master-gdb\\)"
+set gdb_prompt $master_gdb_prompt
+gdb_test_no_output "set prompt $master_gdb_prompt "
+
+# Set the arguments for the inferior gdb.
+gdb_test_no_output "set args $INTERNAL_GDBFLAGS $GDBFLAGS $binfile"
+
+# Run to main in master gdb.
+if ![runto_main] then {
+    fail "can't run to main"
+    return 0
+}
+
+# Set a breakpoint for the inferior gdb in master gdb.
+if {[gdb_breakpoint "stop_all_threads" no-message] != 1 } {
+    # If we cannot set this breakpoint, there no point in trying further.
+    # Bail out.
+    return -1
+}
+
+# Continue from main in master gdb to initial inferior gdb prompt.
+set gdb_prompt $saved_gdb_prompt
+gdb_test "continue" "Continuing\..*Reading symbols from.*" \
+    "continue from gdb main"
+
+# Start $binfile
+set gdb_prompt $master_gdb_prompt
+gdb_test "start" "Starting program.*Breakpoint 2, stop_all_threads \\(\\).*" \
+    "start inferior"
+
+# We run into stop_all_threads breakpoint in master gdb here, but it's too
+# early, continue to inferior gdb prompt.
+set gdb_prompt $saved_gdb_prompt
+gdb_continue_to_breakpoint "continue past stop_all_threads bp" ".*$srcfile:.*"
+
+# Get the pid of the $binfile process
+set pid -1
+gdb_test_multiple "info inferior 1" "get inferior pid" {
+    -re -wrap "process (\[0-9\]*).*" {
+	set pid $expect_out(1,string)
+	pass $gdb_test_name
+    }
+}
+if { $pid == -1 } {
+    return -1
+}
+
+# Continue $binfile.  The $binfile will then trigger SIGINT, which will
+# trigger stop_all_threads in inferior gdb, which will cause the breakpoint
+# to hit in master gdb.
+set gdb_prompt $master_gdb_prompt
+gdb_continue_to_breakpoint "continue to stop_all_threads bp" ".*infrun.c:.*"
+
+# Kill inferior in master gdb, while we're at the start of stop_all_threads in
+# inferior gdb.
+gdb_test_no_output "shell kill -9 $pid"
+
+# Continue from master gdb prompt to inferior gdb prompt.
+set gdb_prompt $saved_gdb_prompt
+gdb_test_multiple "continue" "continue to raise sigint" {
+    -re "warning: unable to open /proc file" {
+	# This warning is the first sign of trouble before we start hanging.
+	# Fail and bail out now, instead of waiting for a timeout.
+	fail $gdb_test_name
+	return -1
+    }
+    -re -wrap "Continuing\.\[\r\n\]+Thread \[0-9\]+ received signal SIGINT, Interrupt\..*" {
+	pass $gdb_test_name
+    }
+}
+
+# Check that the SIGKILL of $binfile is reported by inferior gdb.
+set killed_msg [multi_line "Program terminated with signal SIGKILL, Killed\." \
+		    "The program no longer exists\."]
+gdb_test "continue" "Continuing\.\[\r\n\]+$killed_msg" "continue to inferior exit"
+
+# Check that inferior gdb exits normally
+set gdb_prompt $master_gdb_prompt
+gdb_test "quit" "$inferior_exited_re normally\\\]"
+
+# Restore gdb prompt in master gdb.
+set gdb_prompt $saved_gdb_prompt
+gdb_test_no_output "set prompt $saved_gdb_prompt "