[3/N] remote follow fork and spurious child stops in non-stop mode
Commit Message
On 07/29/2015 02:38 PM, Pedro Alves wrote:
> Ignore the "SIGTRAP" mention in "SEL: Found 2 SIGTRAP events",
> it's "two events". And the one that was picked was a process
> exit. But the tail end of linux_wait_1 isn't expecting that
> can happen.
>
This seems to fix it (and ends up making the code a little
more like linux-nat.c). I'm running it through the testsuite.
--------
From: Pedro Alves <palves@redhat.com>
Date: 2015-07-29 14:40:04 +0100
fix
---
gdb/gdbserver/linux-low.c | 67 ++++++++++++++++++++++++++-------------------
gdb/gdbserver/linux-low.h | 11 +++----
2 files changed, 42 insertions(+), 36 deletions(-)
Comments
On 07/29/2015 03:23 PM, Pedro Alves wrote:
> On 07/29/2015 02:38 PM, Pedro Alves wrote:
>
>> Ignore the "SIGTRAP" mention in "SEL: Found 2 SIGTRAP events",
>> it's "two events". And the one that was picked was a process
>> exit. But the tail end of linux_wait_1 isn't expecting that
>> can happen.
>>
>
> This seems to fix it (and ends up making the code a little
> more like linux-nat.c). I'm running it through the testsuite.
It passes cleanly.
And I confirmed that that test idea triggers the suspend count
assertion I suspected:
Child exited with status 0
/home/pedro/gdb/mygit/build/../src/gdb/gdbserver/linux-low.c:2619: A problem internal to GDBserver has been detected.
unsuspend_one_lwp: Assertion `lwp->suspended >= 0' failed.
[Inferior 1 (process 32473) exited with code 01]
(gdb)
I'll convert that to a proper test.
Thanks,
Pedro Alves
On 07/29/2015 04:40 PM, Pedro Alves wrote:
> On 07/29/2015 03:23 PM, Pedro Alves wrote:
>> On 07/29/2015 02:38 PM, Pedro Alves wrote:
>>
>>> Ignore the "SIGTRAP" mention in "SEL: Found 2 SIGTRAP events",
>>> it's "two events". And the one that was picked was a process
>>> exit. But the tail end of linux_wait_1 isn't expecting that
>>> can happen.
>>>
>>
>> This seems to fix it (and ends up making the code a little
>> more like linux-nat.c). I'm running it through the testsuite.
>
> It passes cleanly.
>
> And I confirmed that that test idea triggers the suspend count
> assertion I suspected:
>
> Child exited with status 0
> /home/pedro/gdb/mygit/build/../src/gdb/gdbserver/linux-low.c:2619: A problem internal to GDBserver has been detected.
> unsuspend_one_lwp: Assertion `lwp->suspended >= 0' failed.
> [Inferior 1 (process 32473) exited with code 01]
> (gdb)
>
> I'll convert that to a proper test.
>
And that that exposes more issues, like:
[New Thread 27183.27382]
[New Thread 27183.27684]
/home/pedro/gdb/mygit/build/../src/gdb/thread.c:936: internal-error: finish_thread_state: Assertion `tp' failed.
A problem internal to GDB has been detected,
further debugging may prove unreliable.
Quit this debugging session? (y or n) FAIL: gdb.threads/fork-plus-threads-2.exp: inferior 1 exited (GDB internal error)
Resyncing due to internal error.
n
...
Detaching from process 28486
Detaching from process 28487
Detaching from process 28488
/home/pedro/gdb/mygit/build/../src/gdb/gdbserver/linux-low.c:3569: A problem internal to GDBserver has been detected.
stuck_in_jump_pad_callback: Assertion `lwp->suspended == 0' failed.
testcase /home/pedro/gdb/mygit/build/../src/gdb/testsuite/gdb.threads/fork-plus-threads-2.exp completed in 3 seconds
=== gdb Summary ===
Looks like I found myself a nice deep rat hole...
Thanks,
Pedro Alves
@@ -264,6 +264,7 @@ static int linux_wait_for_event (ptid_t ptid, int *wstat, int options);
static struct lwp_info *add_lwp (ptid_t ptid);
static int linux_stopped_by_watchpoint (void);
static void mark_lwp_dead (struct lwp_info *lwp, int wstat);
+static int lwp_is_marked_dead (struct lwp_info *lwp);
static void proceed_all_lwps (void);
static int finish_step_over (struct lwp_info *lwp);
static int kill_lwp (unsigned long lwpid, int signo);
@@ -755,9 +756,9 @@ add_lwp (ptid_t ptid)
{
struct lwp_info *lwp;
- lwp = (struct lwp_info *) xmalloc (sizeof (*lwp));
- memset (lwp, 0, sizeof (*lwp));
+ lwp = (struct lwp_info *) xcalloc (1, sizeof (*lwp));
+ lwp->waitstatus.kind = TARGET_WAITKIND_IGNORE;
if (the_low_target.new_thread != NULL)
the_low_target.new_thread (lwp);
@@ -1397,7 +1398,7 @@ linux_thread_alive (ptid_t ptid)
exited but we still haven't been able to report it to GDB, we'll
hold on to the last lwp of the dead process. */
if (lwp != NULL)
- return !lwp->dead;
+ return !lwp_is_marked_dead (lwp);
else
return 0;
}
@@ -2741,20 +2742,6 @@ ignore_event (struct target_waitstatus *ourstatus)
return null_ptid;
}
-/* Return non-zero if WAITSTATUS reflects an extended linux
- event. Otherwise, return zero. */
-
-static int
-extended_event_reported (const struct target_waitstatus *waitstatus)
-{
- if (waitstatus == NULL)
- return 0;
-
- return (waitstatus->kind == TARGET_WAITKIND_FORKED
- || waitstatus->kind == TARGET_WAITKIND_VFORKED
- || waitstatus->kind == TARGET_WAITKIND_VFORK_DONE);
-}
-
/* Wait for process, returns status. */
static ptid_t
@@ -3122,7 +3109,7 @@ linux_wait_1 (ptid_t ptid,
|| (gdb_breakpoint_here (event_child->stop_pc)
&& gdb_condition_true_at_breakpoint (event_child->stop_pc)
&& gdb_no_commands_at_breakpoint (event_child->stop_pc))
- || extended_event_reported (&event_child->waitstatus));
+ || event_child->waitstatus.kind != TARGET_WAITKIND_IGNORE);
run_breakpoint_commands (event_child->stop_pc);
@@ -3144,9 +3131,11 @@ linux_wait_1 (ptid_t ptid,
paddress (event_child->stop_pc),
paddress (event_child->step_range_start),
paddress (event_child->step_range_end));
- if (extended_event_reported (&event_child->waitstatus))
+ if (event_child->waitstatus.kind != TARGET_WAITKIND_IGNORE)
{
- char *str = target_waitstatus_to_string (ourstatus);
+ char *str;
+
+ str = target_waitstatus_to_string (&event_child->waitstatus);
debug_printf ("LWP %ld: extended event with waitstatus %s\n",
lwpid_of (get_lwp_thread (event_child)), str);
xfree (str);
@@ -3260,12 +3249,11 @@ linux_wait_1 (ptid_t ptid,
unstop_all_lwps (1, event_child);
}
- if (extended_event_reported (&event_child->waitstatus))
+ if (event_child->waitstatus.kind != TARGET_WAITKIND_IGNORE)
{
- /* If the reported event is a fork, vfork or exec, let GDB know. */
- ourstatus->kind = event_child->waitstatus.kind;
- ourstatus->value = event_child->waitstatus.value;
-
+ /* If the reported event is an exit, fork, vfork or exec, let
+ GDB know. */
+ *ourstatus = event_child->waitstatus;
/* Clear the event lwp's waitstatus since we handled it already. */
event_child->waitstatus.kind = TARGET_WAITKIND_IGNORE;
}
@@ -3473,13 +3461,23 @@ suspend_and_send_sigstop_callback (struct inferior_list_entry *entry,
static void
mark_lwp_dead (struct lwp_info *lwp, int wstat)
{
- /* It's dead, really. */
- lwp->dead = 1;
-
/* Store the exit status for later. */
lwp->status_pending_p = 1;
lwp->status_pending = wstat;
+ /* Store in waitstatus as well, as there's nothing else to process
+ for this event. */
+ if (WIFEXITED (wstat))
+ {
+ lwp->waitstatus.kind = TARGET_WAITKIND_EXITED;
+ lwp->waitstatus.value.integer = WEXITSTATUS (wstat);
+ }
+ else if (WIFSIGNALED (wstat))
+ {
+ lwp->waitstatus.kind = TARGET_WAITKIND_SIGNALLED;
+ lwp->waitstatus.value.sig = gdb_signal_from_host (WTERMSIG (wstat));
+ }
+
/* Prevent trying to stop it. */
lwp->stopped = 1;
@@ -3487,6 +3485,17 @@ mark_lwp_dead (struct lwp_info *lwp, int wstat)
lwp->stop_expected = 0;
}
+/* Return true if LWP has exited already, and has a pending exit event
+ to report to GDB. */
+
+static int
+lwp_is_marked_dead (struct lwp_info *lwp)
+{
+ return (lwp->status_pending_p
+ && (WIFEXITED (lwp->status_pending)
+ || WIFSIGNALED (lwp->status_pending)));
+}
+
/* Wait for all children to stop for the SIGSTOPs we just queued. */
static void
@@ -3603,7 +3612,7 @@ lwp_running (struct inferior_list_entry *entry, void *data)
struct thread_info *thread = (struct thread_info *) entry;
struct lwp_info *lwp = get_thread_lwp (thread);
- if (lwp->dead)
+ if (lwp_is_marked_dead (lwp))
return 0;
if (lwp->stopped)
return 0;
@@ -266,16 +266,13 @@ struct lwp_info
event already received in a wait()). */
int stopped;
- /* If this flag is set, the lwp is known to be dead already (exit
- event already received in a wait(), and is cached in
- status_pending). */
- int dead;
-
/* When stopped is set, the last wait status recorded for this lwp. */
int last_status;
- /* This is used to store extended ptrace event information until
- it is reported to GDB. */
+ /* If WAITSTATUS->KIND != TARGET_WAITKIND_IGNORE, the waitstatus for
+ this LWP's last event, to pass to GDB without any further
+ processing. This is used to store extended ptrace event
+ information or exit status until it can be reported to GDB. */
struct target_waitstatus waitstatus;
/* When stopped is set, this is where the lwp last stopped, with