diff mbox

[v3,09/17] Teach non-stop to do in-line step-overs (stop all, step, restart)

Message ID 555B7C56.7040208@redhat.com
State New
Headers show

Commit Message

Pedro Alves May 19, 2015, 6:09 p.m. UTC
Hi Doug,

Thanks for your comments, and sorry for the delay in getting
back to answering them.

On 04/27/2015 08:27 PM, Doug Evans wrote:

>  > diff --git a/gdb/breakpoint.c b/gdb/breakpoint.c
>  > index 31869f1..e23d223 100644
>  > --- a/gdb/breakpoint.c
>  > +++ b/gdb/breakpoint.c
>  > @@ -468,6 +468,8 @@ breakpoints_should_be_inserted_now (void)
>  >      }
>  >    else if (target_has_execution)
>  >      {
>  > +      struct thread_info *tp;
>  > +
>  >        if (always_inserted_mode)
>  >  	{
>  >  	  /* The user wants breakpoints inserted even if all threads
>  > @@ -477,6 +479,13 @@ breakpoints_should_be_inserted_now (void)
>  >  
>  >        if (threads_are_executing ())
>  >  	return 1;
> 
> Not a problem introduced by this patch, but as an fyi, the terminology
> employed here is a bit confusing.
> Why would we want to insert breakpoints into executing threads,
> or when threads are executing? That's what a simple reading of this
> code says is happening. This reading can't be correct of course. :-)

Right.  :-)

> 
>  > +
>  > +      /* Don't remove breakpoints yet if, even though all threads are
>  > +	 stopped, we still have events to process.  */
>  > +      ALL_NON_EXITED_THREADS (tp)
>  > +	if (tp->resumed
>  > +	    && tp->suspend.waitstatus_pending_p)
>  > +	  return 1;
> 
> Plus, this function is named "breakpoints_should_be_inserted_now"
> but the comment is talking about whether breakpoints should be removed.

Yeah, because if breakpoints are inserted now, and the function returns
false (meaning, they shouldn't be inserted now", they'll be removed.

> 
> Can you elaborate on how to interpret the name of this function?
> Guessing at how I'm supposed to interpret what this function is for,
> is a better name
> "breakpoints_should_have_been_inserted_by_now_or_should_remain_inserted"?
> [Not that that's my recommendation :-). Just trying to understand how
> to read this function.]

You got it right, but I'm afraid I lack the English skills to come
up with a better name.  "be inserted" is the state we want to reach, not
an action. Maybe "should_breakpoints_be_inserted_now" is little clearer,
but it still doesn't distinguish "state" vs "action".  Because
state("be inserted"=false) is clearly "no breakpoints on target",
while action("be inserted"=false) could mean that whatever 
breakpoint is already inserted remains inserted.

> 
>  >      }
>  >    return 0;
>  >  }
>  > diff --git a/gdb/gdbthread.h b/gdb/gdbthread.h
>  > index 7052ee1..22c8eb2 100644
>  > --- a/gdb/gdbthread.h
>  > +++ b/gdb/gdbthread.h
>  > @@ -29,6 +29,7 @@ struct symtab;
>  >  #include "inferior.h"
>  >  #include "btrace.h"
>  >  #include "common/vec.h"
>  > +#include "target/waitstatus.h"
>  >  
>  >  /* Frontend view of the thread state.  Possible extensions: stepping,
>  >     finishing, until(ling),...  */
>  > @@ -159,6 +160,23 @@ struct thread_suspend_state
>  >       should be suppressed, the core will take care of clearing this
>  >       before the target is resumed.  */
>  >    enum gdb_signal stop_signal;
>  > +
>  > +  /* The reason the thread last stopped, if we need to track it
>  > +     (breakpoint, watchpoint, etc.)  */
>  > +  enum target_stop_reason stop_reason;
>  > +
>  > +  /* The waitstatus for this thread's last event.  */
>  > +  struct target_waitstatus waitstatus;
>  > +  /* If true WAITSTATUS hasn't been handled yet.  */
>  > +  int waitstatus_pending_p;
>  > +
>  > +  /* Record the pc of the thread the last time it stopped.  (This is
>  > +     not the current thread's PC as that may have changed since the
>  > +     last stop, e.g., "return" command, or "p $pc = 0xf000").  This
>  > +     used in coordination with stop_reason and waitstatus_pending_p:
> 
> s/used/is used/ ?

Fixed.

>  >  typedef struct value *value_ptr;
>  > @@ -183,6 +201,14 @@ struct thread_info
>  >       thread is off and running.  */
>  >    int executing;
>  >  
>  > +  /* Non-zero if this thread will be/has been resumed.  Note that a
>  > +     thread can be marked both as not-executing and resumed at the
>  > +     same time.  This happens if we try to resume a thread that has a
>  > +     wait status pending.  We shouldn't let the thread run until that
>  > +     wait status has been processed, but we should not process that
>  > +     wait status if we didn't try to let the thread run.  */
>  > +  int resumed;
> 
> I suspect this will be another source of confusion, but I don't have
> a good suggestion at the moment for how to improve it.
> The "will be" in the comment speaks in future tense, but the name
> "resumed" is past tense. Maybe (though I'd have to spend more time
> reading the code to be sure) it would make sense to have this be
> multi-state: not-resumed, to-be-resumed, and resumed; thus splitting up
> "will be" resumed from "has been" resumed.

I've changed this to:

  /* Non-zero if this thread is resumed from infrun's perspective.
     Note that a thread can be marked both as not-executing and
     resumed at the same time.  This happens if we try to resume a
     thread that has a wait status pending.  We shouldn't let the
     thread really run until that wait status has been processed, but
     we should not process that wait status if we didn't try to let
     the thread run.  */
  int resumed;

>  > +  if (tp->suspend.waitstatus_pending_p)
>  > +    {
>  > +      if (debug_infrun)
>  > +	{
>  > +	  char *statstr;
>  > +
>  > +	  statstr = target_waitstatus_to_string (&tp->suspend.waitstatus);
>  > +	  fprintf_unfiltered (gdb_stdlog,
>  > +			      "infrun: resume: thread %s has pending wait status %s "
>  > +			      "(currently_stepping=%d).\n",
>  > +			      target_pid_to_str (tp->ptid),  statstr,
>  > +			      currently_stepping (tp));
>  > +	  xfree (statstr);
> 
> Not something that has to be done with this patch of course,
> but it's nice that we don't have to track the memory of target_pid_to_str;
> IWBN to be able to do the same for target_waitstatus_to_string.
> [In C++ it could just return a string, and we *could* just wait for C++.
> Just a thought.]

I'm just going with the flow you yourself created.  ;-)

> 
>  > +	}
>  > +
>  > +      tp->resumed = 1;
>  > +      /* Avoid confusing the next resume, if the next stop/resume
>  > +	 happens to apply to another thread.  */
>  > +      tp->suspend.stop_signal = GDB_SIGNAL_0;
> 
> This is a bit confusing. How will the value of stop_signal in one
> thread affect stop/resume in another thread?

The wonders of copy/paste led to that comment percolating all the way
up here.  I first added that comment in 2020b7abd8 elsewhere, when
stop_signal was converted to be per-thread instead of a global.
With all the 7.9 changes to make gdb pass signals to the right
threads, I think this comment no longer makes sense.  I'll remove it.

> Plus, the reader is left worried that we just clobbered the signal
> that we need to resume thread tp with. Can you elaborate on what's
> happening here?

Yeah good point.  I recall wanting to mention something about
this, but forgot.  This is the same situation we already have here
in linux-nat.c's target_resume:

  /* If we have a pending wait status for this thread, there is no
     point in resuming the process.  But first make sure that
     linux_nat_wait won't preemptively handle the event - we
     should never take this short-circuit if we are going to
     leave LP running, since we have skipped resuming all the
     other threads.  This bit of code needs to be synchronized
     with linux_nat_wait.  */

  if (lp->status && WIFSTOPPED (lp->status))
    {
      if (!lp->step
	  && WSTOPSIG (lp->status)
	  && sigismember (&pass_mask, WSTOPSIG (lp->status)))
	{
	  if (debug_linux_nat)
	    fprintf_unfiltered (gdb_stdlog,
				"LLR: Not short circuiting for ignored "
				"status 0x%x\n", lp->status);

	  /* FIXME: What should we do if we are supposed to continue
	     this thread with a signal?  */
	  gdb_assert (signo == GDB_SIGNAL_0);
	  signo = gdb_signal_from_host (WSTOPSIG (lp->status));
	  lp->status = 0;
	}
    }

Probably the only thing we can do is queue the new signal
to deliver later, somehow.   gdbserver's Linux backend handles
that with linux-low.c's lwp->pending_signals list.

Since Mark added that FIXME back in 2000 already, and this
is only enabled on native Linux for now, and this is only
reachable with gdbserver + non-stop on cases we're mishandle
before anyway, we're not losing anything by not handling
this yet.

So I think we should be fine with adding a warning for now:

      /* FIXME: What should we do if we are supposed to resume this
	 thread with a signal?  Maybe we should maintain a queue of
	 pending signals to deliver.  */
      if (sig != GDB_SIGNAL_0)
	{
	  warning (_("Couldn't deliver signal %s to %s.\n"),
		   gdb_signal_to_name (sig), target_pid_to_str (tp->ptid));
	}

I ran this against the testsuite and nothing exercises this today,
as expected; but I know there's a PR about the assert above.

>  > +      if (pc != tp->suspend.stop_pc)
>  > +	{
>  > +	  if (debug_infrun)
>  > +	    fprintf_unfiltered (gdb_stdlog,
>  > +				"infrun: PC of %s changed.  was=%s, now=%s\n",
>  > +				target_pid_to_str (tp->ptid),
>  > +				paddress (target_gdbarch (), tp->prev_pc),
>  > +				paddress (target_gdbarch (), pc));
> 
> s/target_gdbarch ()/gdbarch/ ?

Fixed.  Below as well.

>  > +wait_one (struct target_waitstatus *ws)
>  > +{
>  > +  ptid_t event_ptid;
>  > +  ptid_t wait_ptid = minus_one_ptid;
>  > +
>  > +  overlay_cache_invalid = 1;
>  > +
>  > +  /* Flush target cache before starting to handle each event.
>  > +     Target was running and cache could be stale.  This is just a
>  > +     heuristic.  Running threads may modify target memory, but we
>  > +     don't get any event.  */
>  > +  target_dcache_invalidate ();
>  > +
>  > +  if (deprecated_target_wait_hook)
>  > +    event_ptid = deprecated_target_wait_hook (wait_ptid, ws, 0);
>  > +  else
>  > +    event_ptid = target_wait (wait_ptid, ws, 0);
>  > +
>  > +  if (debug_infrun)
>  > +    print_target_wait_results (wait_ptid, event_ptid, ws);
>  > +
>  > +  if (ws->kind == TARGET_WAITKIND_SYSCALL_ENTRY
>  > +      || ws->kind == TARGET_WAITKIND_SYSCALL_RETURN)
>  > +    ws->value.syscall_number = UNKNOWN_SYSCALL;
> 
> IWBN to have a comment explaining why we set UNKNOWN_SYSCALL here.

Good question.  I honestly don't recall.  I ran the testsuite now
with that removed, and nothing broke.  Dropped.

>  > +/* Stop all threads.  */
>  > +
>  > +static void
>  > +stop_all_threads (void)
>  > +{
>  > +  /* We may need multiple passes to discover all threads.  */
>  > +  int pass;
>  > +  int iterations = 0;
>  > +  ptid_t entry_ptid;
>  > +  struct cleanup *old_chain;
>  > +
>  > +  gdb_assert (non_stop);
>  > +
>  > +  if (debug_infrun)
>  > +    fprintf_unfiltered (gdb_stdlog, "infrun: stop_all_threads\n");
>  > +
>  > +  entry_ptid = inferior_ptid;
>  > +  old_chain = make_cleanup (switch_to_thread_cleanup, &entry_ptid);
>  > +
>  > +  /* Stop threads in two passes since threads could be spawning as we
>  > +     go through the first pass.  In the second pass, we will stop such
>  > +     spawned threads.  */
>  > +  for (pass = 0; pass < 2; pass++, iterations++)
> 
> Can you rephrase the "Stop threads in two passes ... In the second pass ..."
> comment? What's happening here is that we keep iterating until two passes
> find no new threads (IIUC).

You're right.  Here's what I got now:

  /* Request threads to stop, and then wait for the stops.  Because
     threads we already know about can spawn more threads while we're
     trying to stop them, and we only learn about new threads when we
     update the thread list, do this in a loop, and keep iterating
     until two passes find no threads that need to be stopped.  */
  for (pass = 0; pass < 2; pass++, iterations++)
    {

Here's what I'm squashing to the original patch.  Let me know
what you think.

 gdb/infrun.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

Comments

Pedro Alves May 19, 2015, 6:49 p.m. UTC | #1
On 05/19/2015 07:09 PM, Pedro Alves wrote:

> Here's what I'm squashing to the original patch.  Let me know
> what you think.

Rebased and force-pushed to users/palves/all-stop-non-stop.

Thanks,
Pedro Alves
diff mbox

Patch

diff --git a/gdb/infrun.c b/gdb/infrun.c
index 9f3da09..540ca87 100644
--- a/gdb/infrun.c
+++ b/gdb/infrun.c
@@ -2262,8 +2262,16 @@  resume (enum gdb_signal sig)
 	}
 
       tp->resumed = 1;
-      /* Avoid confusing the next resume, if the next stop/resume
-	 happens to apply to another thread.  */
+
+      /* FIXME: What should we do if we are supposed to resume this
+	 thread with a signal?  Maybe we should maintain a queue of
+	 pending signals to deliver.  */
+      if (sig != GDB_SIGNAL_0)
+	{
+	  warning (_("Couldn't deliver signal %s to %s.\n"),
+		   gdb_signal_to_name (sig), target_pid_to_str (tp->ptid));
+	}
+
       tp->suspend.stop_signal = GDB_SIGNAL_0;
       discard_cleanups (old_cleanups);
 
@@ -3313,8 +3321,8 @@  do_target_wait (ptid_t ptid, struct target_waitstatus *status, int options)
 	    fprintf_unfiltered (gdb_stdlog,
 				"infrun: PC of %s changed.  was=%s, now=%s\n",
 				target_pid_to_str (tp->ptid),
-				paddress (target_gdbarch (), tp->prev_pc),
-				paddress (target_gdbarch (), pc));
+				paddress (gdbarch, tp->prev_pc),
+				paddress (gdbarch, pc));
 	  discard = 1;
 	}
       else if (!breakpoint_inserted_here_p (get_regcache_aspace (regcache), pc))
@@ -3323,7 +3331,7 @@  do_target_wait (ptid_t ptid, struct target_waitstatus *status, int options)
 	    fprintf_unfiltered (gdb_stdlog,
 				"infrun: previous breakpoint of %s, at %s gone\n",
 				target_pid_to_str (tp->ptid),
-				paddress (target_gdbarch (), pc));
+				paddress (gdbarch, pc));
 
 	  discard = 1;
 	}
@@ -4008,10 +4016,6 @@  wait_one (struct target_waitstatus *ws)
   if (debug_infrun)
     print_target_wait_results (wait_ptid, event_ptid, ws);
 
-  if (ws->kind == TARGET_WAITKIND_SYSCALL_ENTRY
-      || ws->kind == TARGET_WAITKIND_SYSCALL_RETURN)
-    ws->value.syscall_number = UNKNOWN_SYSCALL;
-
   return event_ptid;
 }
 
@@ -4146,9 +4150,11 @@  stop_all_threads (void)
   entry_ptid = inferior_ptid;
   old_chain = make_cleanup (switch_to_thread_cleanup, &entry_ptid);
 
-  /* Stop threads in two passes since threads could be spawning as we
-     go through the first pass.  In the second pass, we will stop such
-     spawned threads.  */
+  /* Request threads to stop, and then wait for the stops.  Because
+     threads we already know about can spawn more threads while we're
+     trying to stop them, and we only learn about new threads when we
+     update the thread list, do this in a loop, and keep iterating
+     until two passes find no threads that need to be stopped.  */
   for (pass = 0; pass < 2; pass++, iterations++)
     {
       if (debug_infrun)