[pushed] native/Linux: internal error if resume is short-circuited (Re: [PATCH 4/6] native/Linux: internal error if inferior disappears after stopped by breakpoint)

Message ID 550AC2EB.10801@redhat.com
State Committed
Headers

Commit Message

Pedro Alves March 19, 2015, 12:36 p.m. UTC
  On 03/06/2015 07:58 PM, Pedro Alves wrote:
> If the inferior disappears just after it was stopped at a breakpoint,
> GDB internal errors on next resume:
> 
>  Executing on target: kill -9 11605    (timeout = 300)
>  spawn -ignore SIGHUP kill -9 11605
>  continue
>  Continuing.
>  /home/pedro/gdb/mygit/src/gdb/linux-nat.c:2590: internal-error: status_callback: Assertion `lp->status != 0' failed.
> 
> This is because the thread had stopped for a breakpoint, and had
> already reported the event, so its ->status flag was cleared.  The
> lwp's stopped, etc., flags should only be cleared when we're sure the
> LWP was successfully resumed (see PR gdb/15713, git 8817a6f2).  So the
> next resume hits an ESRCH error which throws before those flags are
> cleared.  GDB core prints the error, and ends up calling target_wait
> to poll remaining events.  We then trip on the assertion.
> 
> Fix this by bailing out earlier.  GDBserver is already doing this.
> 
> A follow up patch will add a test that exercises this
> (gdb.base/killed-outside.exp).

So I thought some more about this and realized that it's
possible to construct a test case that triggers the assertion,
even without the special-case of a process that disappears.  So I
wrote that test, rewrote the git commit log in that direction,
and pushed it in, as below.

----
From eb54c8bf087f434b0cb91b35e7cde68a69ac9193 Mon Sep 17 00:00:00 2001
From: Pedro Alves <palves@redhat.com>
Date: Thu, 19 Mar 2015 12:20:25 +0000
Subject: [PATCH] native/Linux: internal error if resume is short-circuited

If the linux_nat_resume's short-circuits the resume because the
current thread has a pending status, and, a thread with a higher
number was previously stopped for a breakpoint, GDB internal errors,
like:

 /home/pedro/gdb/mygit/src/gdb/linux-nat.c:2590: internal-error: status_callback: Assertion `lp->status != 0' failed.

Fix this by make status_callback bail out earlier.  GDBserver is
already doing the same.

New test added that exercises this.

gdb/ChangeLog:
2015-03-19  Pedro Alves  <palves@redhat.com>

	* linux-nat.c (status_callback): Return early if the LWP has no
	status pending.

gdb/testsuite/ChangeLog:
2015-03-19  Pedro Alves  <palves@redhat.com>

	* gdb.threads/continue-pending-status.c: New file.
	* gdb.threads/continue-pending-status.exp: New file.
---
 gdb/ChangeLog                                      |   5 +
 gdb/testsuite/ChangeLog                            |   5 +
 gdb/linux-nat.c                                    |   8 +-
 .../gdb.threads/continue-pending-status.c          |  58 +++++++++++
 .../gdb.threads/continue-pending-status.exp        | 110 +++++++++++++++++++++
 5 files changed, 182 insertions(+), 4 deletions(-)
 create mode 100644 gdb/testsuite/gdb.threads/continue-pending-status.c
 create mode 100644 gdb/testsuite/gdb.threads/continue-pending-status.exp
  

Patch

diff --git a/gdb/ChangeLog b/gdb/ChangeLog
index 45e5029..7ae3c58 100644
--- a/gdb/ChangeLog
+++ b/gdb/ChangeLog
@@ -1,5 +1,10 @@ 
 2015-03-19  Pedro Alves  <palves@redhat.com>
 
+	* linux-nat.c (status_callback): Return early if the LWP has no
+	status pending.
+
+2015-03-19  Pedro Alves  <palves@redhat.com>
+
 	* linux-nat.c (select_event_lwp_callback): Update comment to no
 	longer mention SIGTRAP.
 
diff --git a/gdb/testsuite/ChangeLog b/gdb/testsuite/ChangeLog
index d9f4ecf..94dae82 100644
--- a/gdb/testsuite/ChangeLog
+++ b/gdb/testsuite/ChangeLog
@@ -1,3 +1,8 @@ 
+2015-03-19  Pedro Alves  <palves@redhat.com>
+
+	* gdb.threads/continue-pending-status.c: New file.
+	* gdb.threads/continue-pending-status.exp: New file.
+
 2015-03-18  Pedro Alves  <palves@redhat.com>
 
 	* gdb.base/disp-step-syscall.exp (disp_step_cross_syscall):
diff --git a/gdb/linux-nat.c b/gdb/linux-nat.c
index f5622ac..f5f92d9 100644
--- a/gdb/linux-nat.c
+++ b/gdb/linux-nat.c
@@ -2543,6 +2543,9 @@  status_callback (struct lwp_info *lp, void *data)
   if (!lp->resumed)
     return 0;
 
+  if (!lwp_status_pending_p (lp))
+    return 0;
+
   if (lp->stop_reason == TARGET_STOPPED_BY_SW_BREAKPOINT
       || lp->stop_reason == TARGET_STOPPED_BY_HW_BREAKPOINT)
     {
@@ -2551,8 +2554,6 @@  status_callback (struct lwp_info *lp, void *data)
       CORE_ADDR pc;
       int discard = 0;
 
-      gdb_assert (lp->status != 0);
-
       pc = regcache_read_pc (regcache);
 
       if (pc != lp->stop_pc)
@@ -2590,10 +2591,9 @@  status_callback (struct lwp_info *lp, void *data)
 	  linux_resume_one_lwp (lp, lp->step, GDB_SIGNAL_0);
 	  return 0;
 	}
-      return 1;
     }
 
-  return lwp_status_pending_p (lp);
+  return 1;
 }
 
 /* Return non-zero if LP isn't stopped.  */
diff --git a/gdb/testsuite/gdb.threads/continue-pending-status.c b/gdb/testsuite/gdb.threads/continue-pending-status.c
new file mode 100644
index 0000000..eea0c7e
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/continue-pending-status.c
@@ -0,0 +1,58 @@ 
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2015 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <assert.h>
+
+pthread_barrier_t barrier;
+
+#define NUM_THREADS 2
+
+void *
+thread_function (void *arg)
+{
+  /* This ensures that the breakpoint is only hit after both threads
+     are created, so the test can always switch to the non-event
+     thread when the breakpoint triggers.  */
+  pthread_barrier_wait (&barrier);
+
+  while (1); /* break here */
+}
+
+int
+main (void)
+{
+  int i;
+
+  pthread_barrier_init (&barrier, NULL, NUM_THREADS);
+
+  for (i = 0; i < NUM_THREADS; i++)
+    {
+      pthread_t thread;
+      int res;
+
+      res = pthread_create (&thread, NULL,
+			    thread_function, NULL);
+      assert (res == 0);
+    }
+
+  sleep (300);
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.threads/continue-pending-status.exp b/gdb/testsuite/gdb.threads/continue-pending-status.exp
new file mode 100644
index 0000000..ff73ce4
--- /dev/null
+++ b/gdb/testsuite/gdb.threads/continue-pending-status.exp
@@ -0,0 +1,110 @@ 
+# Copyright (C) 2015 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# This test exercises the case of stopping for a breakpoint hit of one
+# thread, then switching to a thread that has a status pending and
+# continuing.
+
+standard_testfile
+
+if [prepare_for_testing "failed to prepare" $testfile $srcfile {debug pthreads}] {
+    return -1
+}
+
+if ![runto_main] {
+    untested "could not run to main"
+    return -1
+}
+
+set break_line [gdb_get_line_number "break here"]
+
+# Return current thread's number.
+
+proc get_current_thread {} {
+    global gdb_prompt
+
+    set thread ""
+    set msg "get thread number"
+    gdb_test_multiple "print /x \$_thread" $msg {
+	-re "\\$\[0-9\]* = (0x\[0-9a-zA-Z\]+).*$gdb_prompt $" {
+	    set thread $expect_out(1,string)
+	    pass "$msg"
+	}
+    }
+    return ${thread}
+}
+
+# There are two threads in the program that are running the same tight
+# loop, where we place a breakpoint.  Sometimes we'll get a breakpoint
+# trigger for thread 2, with the breakpoint event of thread 3 pending,
+# other times the opposite.  The original bug that motivated this test
+# depended on the event thread being the highest numbered thread.  We
+# try the same multiple times, which should cover both threads
+# reporting the event.
+
+set attempts 20
+
+for {set i 0} {$i < $attempts} {incr i} {
+    with_test_prefix "attempt $i" {
+	gdb_test "b $srcfile:$break_line" \
+	    "Breakpoint .* at .*$srcfile, line $break_line.*" \
+	    "set break in tight loop"
+	gdb_test "continue" \
+	    "$srcfile:$break_line.*" \
+	    "continue to tight loop"
+
+	# Switch to the thread that did _not_ report the event (and
+	# thus may have a pending status).  At the time this test was
+	# written this was necessary to make linux-nat.c short-circuit
+	# the resume and go straight to consuming the pending event.
+	set thread [get_current_thread]
+	if {$thread == 2} {
+	    set thread 3
+	} else {
+	    set thread 2
+	}
+	gdb_test "thread $thread" \
+	    "Switching to thread $thread .*" \
+	    "switch to non-event thread"
+
+	# Delete all breakpoints so that continuing doesn't switch
+	# back to the event thread to do a step-over, which would mask
+	# away the original bug, which depended on the event thread
+	# still having TARGET_STOPPED_BY_SW_BREAKPOINT stop_reason.
+	delete_breakpoints
+
+	# In the original bug, continuing would trigger an internal
+	# error in the linux-nat.c backend.
+
+	set msg "continue for ctrl-c"
+	gdb_test_multiple "continue" $msg {
+	    -re "Continuing" {
+		pass $msg
+	    }
+	}
+
+	# Wait a bit for GDB to give the terminal to the inferior,
+	# otherwise ctrl-c too soon can result in a "Quit".
+	sleep 1
+	send_gdb "\003"
+
+	set msg "caught interrupt"
+	gdb_test_multiple "" $msg {
+	    -re "Program received signal SIGINT.*$gdb_prompt $" {
+		pass $msg
+	    }
+	}
+    }
+}