[18/34] Windows gdbserver: Fix scheduler-locking

Message ID 20240507234233.371123-19-pedro@palves.net
State New
Headers
Series Windows non-stop mode |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Testing passed

Commit Message

Pedro Alves May 7, 2024, 11:42 p.m. UTC
  This rewrites win32_process_target::resume such that scheduler-locking
is implemented properly.

It also uses the new get_last_debug_event_ptid function to avoid
considering passing a signal to the wrong thread, like done for the
native side in a previous patch.

Note this code/comment being removed:

 -    /* Yes, we're ignoring resume_info[0].thread.  It'd be tricky to make
 -       the Windows resume code do the right thing for thread switching.  */
 -    tid = windows_process.current_event.dwThreadId;

This meant that scheduler-locking currently is broken badly unless you
stay in the thread that last reported an event.  If you switch to a
different thread from the one that last reported an event and step,
you get a spurious SIGTRAP in the thread that last reported a stop,
not the one that you tried to step:

 (gdb) t 1
 [Switching to thread 1 (Thread 3908)]
 #0  0x00007fffc768d6e4 in ntdll!ZwDelayExecution () from target:C:/Windows/SYSTEM32/ntdll.dll
 (gdb) set scheduler-locking on
 (gdb) set disassemble-next-line
 "on", "off" or "auto" expected.
 (gdb) set disassemble-next-line on
 (gdb) frame
 #0  0x00007fffc768d6e4 in ntdll!ZwDelayExecution () from target:C:/Windows/SYSTEM32/ntdll.dll
 => 0x00007fffc768d6e4 <ntdll!ZwDelayExecution+20>:       c3                      ret
 (gdb) si

 Thread 3 received signal SIGTRAP, Trace/breakpoint trap.
 [Switching to Thread 2660]
 0x00007fffc4e4e92e in KERNELBASE!EncodeRemotePointer () from target:C:/Windows/System32/KernelBase.dll
 => 0x00007fffc4e4e92e <KERNELBASE!EncodeRemotePointer+8254>:    eb 78                   jmp    0x7fffc4e4e9a8 <KERNELBASE!EncodeRemotePointer+8376>
 (gdb)

Note how we switched to thread 1, stepped, and GDBserver still stepped
thread 3...  This is fixed by this patch.  We now get:

  (gdb) info threads
    Id   Target Id         Frame
    1    Thread 920        0x00007ffe0372d6e4 in ntdll!ZwDelayExecution () from target:C:/Windows/SYSTEM32/ntdll.dll
    2    Thread 8528       0x00007ffe03730ad4 in ntdll!ZwWaitForWorkViaWorkerFactory () from target:C:/Windows/SYSTEM32/ntdll.dll
    3    Thread 3128       0x00007ffe03730ad4 in ntdll!ZwWaitForWorkViaWorkerFactory () from target:C:/Windows/SYSTEM32/ntdll.dll
  * 4    Thread 7164       0x00007ffe0102e929 in KERNELBASE!EncodeRemotePointer () from target:C:/Windows/System32/KernelBase.dll
    5    Thread 8348       0x00007ffe0372d6e4 in ntdll!ZwDelayExecution () from target:C:/Windows/SYSTEM32/ntdll.dll
    6    Thread 2064       0x00007ffe0372d6e4 in ntdll!ZwDelayExecution () from target:C:/Windows/SYSTEM32/ntdll.dll
  (gdb) t 1
  [Switching to thread 1 (Thread 920)]
  #0  0x00007ffe0372d6e4 in ntdll!ZwDelayExecution () from target:C:/Windows/SYSTEM32/ntdll.dll
  (gdb) set scheduler-locking on
  (gdb) si
  0x00007ffe0372d6e4 in ntdll!ZwDelayExecution () from target:C:/Windows/SYSTEM32/ntdll.dll
  (gdb) si
  0x00007ffe00f9b44e in SleepEx () from target:C:/Windows/System32/KernelBase.dll
  (gdb) si
  0x00007ffe00f9b453 in SleepEx () from target:C:/Windows/System32/KernelBase.dll

I.e., we kept stepping the right thread, thread 1.

Note we stopped again at 0x00007ffe0372d6e4 the first time (same PC
the thread already was at before the first stepi) because the thread
had been stopped at a syscall, so that's normal:

 (gdb) disassemble
 Dump of assembler code for function ntdll!ZwDelayExecution:
    0x00007ffe0372d6d0 <+0>:     mov    %rcx,%r10
    0x00007ffe0372d6d3 <+3>:     mov    $0x34,%eax
    0x00007ffe0372d6d8 <+8>:     testb  $0x1,0x7ffe0308
    0x00007ffe0372d6e0 <+16>:    jne    0x7ffe0372d6e5 <ntdll!ZwDelayExecution+21>
    0x00007ffe0372d6e2 <+18>:    syscall
 => 0x00007ffe0372d6e4 <+20>:    ret

Change-Id: I44f4fe4cb98592517569c6716b9d189f42db25a0
---
 gdbserver/win32-low.cc | 160 +++++++++++++++++++++--------------------
 1 file changed, 82 insertions(+), 78 deletions(-)
  

Patch

diff --git a/gdbserver/win32-low.cc b/gdbserver/win32-low.cc
index 8f51dd67dc2..7d37068ab91 100644
--- a/gdbserver/win32-low.cc
+++ b/gdbserver/win32-low.cc
@@ -425,17 +425,8 @@  continue_one_thread (thread_info *thread, int thread_id)
 }
 
 static BOOL
-child_continue (DWORD continue_status, int thread_id)
+child_continue_for_kill (DWORD continue_status, int thread_id)
 {
-  for (thread_info *thread : all_threads)
-    {
-      auto *th = (windows_thread_info *) thread_target_data (thread);
-      if ((thread_id == -1 || thread_id == (int) th->tid)
-	  && !th->suspended
-	  && th->pending_stop.status.kind () != TARGET_WAITKIND_IGNORE)
-	return TRUE;
-    }
-
   /* The inferior will only continue after the ContinueDebugEvent
      call.  */
   for_each_thread ([&] (thread_info *thread)
@@ -701,7 +692,7 @@  win32_process_target::kill (process_info *process)
   TerminateProcess (windows_process.handle, 0);
   for (;;)
     {
-      if (!child_continue (DBG_CONTINUE, -1))
+      if (!child_continue_for_kill (DBG_CONTINUE, -1))
 	break;
       if (!wait_for_debug_event (&windows_process.current_event, INFINITE))
 	break;
@@ -768,96 +759,109 @@  win32_process_target::thread_alive (ptid_t ptid)
   return find_thread_ptid (ptid) != NULL;
 }
 
-/* Resume the inferior process.  RESUME_INFO describes how we want
-   to resume.  */
-void
-win32_process_target::resume (thread_resume *resume_info, size_t n)
+static void
+resume_one_thread (thread_info *thread, bool step, gdb_signal sig,
+		   DWORD *continue_status)
 {
-  DWORD tid;
-  enum gdb_signal sig;
-  int step;
-  windows_thread_info *th;
-  DWORD continue_status = DBG_CONTINUE;
-  ptid_t ptid;
-
-  /* This handles the very limited set of resume packets that GDB can
-     currently produce.  */
-
-  if (n == 1 && resume_info[0].thread == minus_one_ptid)
-    tid = -1;
-  else if (n > 1)
-    tid = -1;
-  else
-    /* Yes, we're ignoring resume_info[0].thread.  It'd be tricky to make
-       the Windows resume code do the right thing for thread switching.  */
-    tid = windows_process.current_event.dwThreadId;
-
-  if (resume_info[0].thread != minus_one_ptid)
-    {
-      sig = gdb_signal_from_host (resume_info[0].sig);
-      step = resume_info[0].kind == resume_step;
-    }
-  else
-    {
-      sig = GDB_SIGNAL_0;
-      step = 0;
-    }
+  auto *th = (windows_thread_info *) thread_target_data (thread);
 
   if (sig != GDB_SIGNAL_0)
     {
-      if (windows_process.current_event.dwDebugEventCode
-	  != EXCEPTION_DEBUG_EVENT)
+      /* Allow continuing with the same signal that interrupted us.
+	 Otherwise complain.  */
+      if (thread->id != get_last_debug_event_ptid ())
 	{
-	  OUTMSG (("Cannot continue with signal %s here.\n",
+	  /* ContinueDebugEvent will be for a different thread.  */
+	  OUTMSG (("Cannot continue with signal %d here.  "
+		   "Not last-event thread", sig));
+	}
+      else if (windows_process.current_event.dwDebugEventCode
+	       != EXCEPTION_DEBUG_EVENT)
+	{
+	  OUTMSG (("Cannot continue with signal %s here.  "
+		   "Not stopped for EXCEPTION_DEBUG_EVENT.\n",
 		   gdb_signal_to_string (sig)));
 	}
       else if (sig == windows_process.last_sig)
-	continue_status = DBG_EXCEPTION_NOT_HANDLED;
+	*continue_status = DBG_EXCEPTION_NOT_HANDLED;
       else
 	OUTMSG (("Can only continue with received signal %s.\n",
 		 gdb_signal_to_string (windows_process.last_sig)));
     }
 
-  windows_process.last_sig = GDB_SIGNAL_0;
+  win32_prepare_to_resume (th);
 
-  /* Get context for the currently selected thread.  */
-  ptid = debug_event_ptid (&windows_process.current_event);
-  th = windows_process.find_thread (ptid);
-  if (th)
-    {
-      win32_prepare_to_resume (th);
-
-      DWORD *context_flags;
+  DWORD *context_flags;
 #ifdef __x86_64__
-      if (windows_process.wow64_process)
-	context_flags = &th->wow64_context.ContextFlags;
-      else
+  if (windows_process.wow64_process)
+    context_flags = &th->wow64_context.ContextFlags;
+  else
 #endif
-	context_flags = &th->context.ContextFlags;
-      if (*context_flags)
+    context_flags = &th->context.ContextFlags;
+  if (*context_flags)
+    {
+      /* Move register values from the inferior into the thread
+	 context structure.  */
+      regcache_invalidate ();
+
+      if (step)
 	{
-	  /* Move register values from the inferior into the thread
-	     context structure.  */
-	  regcache_invalidate ();
+	  if (the_low_target.single_step != NULL)
+	    (*the_low_target.single_step) (th);
+	  else
+	    error ("Single stepping is not supported "
+		   "in this configuration.\n");
+	}
+
+      win32_set_thread_context (th);
+      *context_flags = 0;
+    }
+
+  continue_one_thread (thread, th->tid);
+}
+
+/* Resume the inferior process.  RESUME_INFO describes how we want
+   to resume.  */
+void
+win32_process_target::resume (thread_resume *resume_info, size_t n)
+{
+  DWORD continue_status = DBG_CONTINUE;
+  bool any_pending = false;
 
-	  if (step)
+  for (thread_info *thread : all_threads)
+    {
+      auto *th = (windows_thread_info *) thread_target_data (thread);
+
+      for (int ndx = 0; ndx < n; ndx++)
+	{
+	  thread_resume &r = resume_info[ndx];
+	  ptid_t ptid = r.thread;
+	  gdb_signal sig = gdb_signal_from_host (r.sig);
+	  bool step = r.kind == resume_step;
+
+	  if (ptid == minus_one_ptid || ptid == thread->id
+	      /* Handle both 'pPID' and 'pPID.-1' as meaning 'all threads
+		 of PID'.  */
+	      || (ptid.pid () == pid_of (thread)
+		  && (ptid.is_pid () || ptid.lwp () == -1)))
 	    {
-	      if (the_low_target.single_step != NULL)
-		(*the_low_target.single_step) (th);
-	      else
-		error ("Single stepping is not supported "
-		       "in this configuration.\n");
-	    }
+	      /* Ignore (wildcard) resume requests for already-resumed
+		 threads.  */
+	      if (!th->suspended)
+		continue;
 
-	  win32_set_thread_context (th);
-	  *context_flags = 0;
+	      resume_one_thread (thread, step, sig, &continue_status);
+	      break;
+	    }
 	}
-    }
 
-  /* Allow continuing with the same signal that interrupted us.
-     Otherwise complain.  */
+      if (!th->suspended
+	  && th->pending_stop.status.kind () != TARGET_WAITKIND_IGNORE)
+	any_pending = true;
+    }
 
-  child_continue (continue_status, tid);
+  if (!any_pending)
+    continue_last_debug_event (continue_status, debug_threads);
 }
 
 /* See nat/windows-nat.h.  */