[30/34] windows-nat.c: Avoid writing debug registers if watchpoint hit pending

Message ID 20240507234233.371123-31-pedro@palves.net
State New
Headers
Series Windows non-stop mode |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Testing passed

Commit Message

Pedro Alves May 7, 2024, 11:42 p.m. UTC
  Several watchpoint-related testcases, such as
gdb.threads/watchthreads.exp for example, when tested with the backend
in non-stop mode, exposed an interesting detail of the Windows debug
API that wasn't considered before.  The symptom observed is spurious
SIGTRAPs, like:

  Thread 1 "watchthreads" received signal SIGTRAP, Trace/breakpoint trap.
  0x00000001004010b1 in main () at .../src/gdb/testsuite/gdb.threads/watchthreads.c:48
  48              args[i] = 1; usleep (1); /* Init value.  */

After a good amount of staring at logs and headscratching, I realized
the problem:

 #0 - It all starts in the fact that multiple threads can hit an event
      at the same time.  Say, a watchpoint for thread A, and a
      breakpoint for thread B.

 #1 - Say, WaitForDebugEvent reports the breakpoint hit for thread B
      first, then GDB for some reason decides to update debug
      registers, and continue.  Updating debug registers means writing
      the debug registers to _all_ threads, with SetThreadContext.

 #2 - WaitForDebugEvent reports the watchpoint hit for thread A.
      Watchpoint hits are reported as EXCEPTION_SINGLE_STEP.

 #3 - windows-nat checks the Dr6 debug register to check if the step
      was a watchpoint or hardware breakpoint stop, and finds that Dr6
      is completely cleared.  So windows-nat reports a plain SIGTRAP
      (given EXCEPTION_SINGLE_STEP) to the core.

 #4 - Thread A was not supposed to be stepping, so infrun reports the
      SIGTRAP to the user as a random signal.

The strange part is #3 above.  Why was Dr6 cleared?

Turns out what (at least in Windows 10 & 11), writing to _any_ debug
register has the side effect of clearing Dr6, even if you write the
same values the registers already had, back to the registers.

I confirmed it clearly by adding this hack to GDB:

  if (th->context.ContextFlags == 0)
    {
      th->context.ContextFlags = CONTEXT_DEBUGGER_DR;

      /* Get current values of debug registers.  */
      CHECK (GetThreadContext (th->h, &th->context));

      DEBUG_EVENTS ("For 0x%x (once),  Dr6=0x%llx", th->tid, th->context.Dr6);

      /* Write debug registers back to thread, same values,
	 and re-read them.  */
      CHECK (SetThreadContext (th->h, &th->context));
      CHECK (GetThreadContext (th->h, &th->context));

      DEBUG_EVENTS ("For 0x%x (twice), Dr6=0x%llx", th->tid, th->context.Dr6);
    }

Which showed Dr6=0 after the write + re-read:

  [windows events] fill_thread_context: For 0x6a0 (once),  Dr6=0xffff0ff1
  [windows events] fill_thread_context: For 0x6a0 (twice), Dr6=0x0

This commit fixes the issue by detecting that a thread has a pending
watchpoint hit to report (Dr6 has interesting bits set), and if so,
avoid mofiying any debug register.  Instead, let the pending
watchpoint hit be reported by WaitForDebugEvent.  If infrun did want
to modify watchpoints, it will still be done when the thread is
eventually re-resumed after the pending watchpoint hit is reported.
(infrun knows how to gracefully handle the case of a watchpoint hit
for a watchpoint that has since been deleted.)

Change-Id: I21a3daa9e34eecfa054f0fea706e5ab40aabe70a
---
 gdb/nat/windows-nat.h  |   7 +++
 gdb/windows-nat.c      | 106 ++++++++++++++++++++++++++++++-----------
 gdbserver/win32-low.cc |   8 ++++
 gdbserver/win32-low.h  |   2 +
 4 files changed, 96 insertions(+), 27 deletions(-)
  

Patch

diff --git a/gdb/nat/windows-nat.h b/gdb/nat/windows-nat.h
index 6283ff0a4ee..2efb54e1ce7 100644
--- a/gdb/nat/windows-nat.h
+++ b/gdb/nat/windows-nat.h
@@ -205,6 +205,13 @@  struct windows_process_info
 
   virtual bool handle_access_violation (const EXCEPTION_RECORD *rec) = 0;
 
+  /* Fill in the thread's CONTEXT/WOW64_CONTEXT, if it wasn't filled
+     in yet.
+
+     This function must be supplied by the embedding application.  */
+
+  virtual void fill_thread_context (windows_thread_info *th) = 0;
+
   handle_exception_result handle_exception
       (DEBUG_EVENT &current_event,
        struct target_waitstatus *ourstatus, bool debug_exceptions);
diff --git a/gdb/windows-nat.c b/gdb/windows-nat.c
index 5500d8b8c87..73a6237dfa7 100644
--- a/gdb/windows-nat.c
+++ b/gdb/windows-nat.c
@@ -114,6 +114,7 @@  struct windows_per_inferior : public windows_process_info
   bool handle_access_violation (const EXCEPTION_RECORD *rec) override;
 
   void invalidate_context (windows_thread_info *th);
+  void fill_thread_context (windows_thread_info *th) override;
 
   void continue_one_thread (windows_thread_info *th,
 			    windows_continue_flags cont_flags);
@@ -746,17 +747,10 @@  windows_fetch_one_register (struct regcache *regcache,
 }
 
 void
-windows_nat_target::fetch_registers (struct regcache *regcache, int r)
+windows_per_inferior::fill_thread_context (windows_thread_info *th)
 {
-  windows_thread_info *th = windows_process.find_thread (regcache->ptid ());
-
-  /* Check if TH exists.  Windows sometimes uses a non-existent
-     thread id in its events.  */
-  if (th == NULL)
-    return;
-
 #ifdef __x86_64__
-  if (windows_process.wow64_process)
+  if (wow64_process)
     {
       if (th->wow64_context.ContextFlags == 0)
 	{
@@ -773,6 +767,19 @@  windows_nat_target::fetch_registers (struct regcache *regcache, int r)
 	  CHECK (GetThreadContext (th->h, &th->context));
 	}
     }
+}
+
+void
+windows_nat_target::fetch_registers (struct regcache *regcache, int r)
+{
+  windows_thread_info *th = windows_process.find_thread (regcache->ptid ());
+
+  /* Check if TH exists.  Windows sometimes uses a non-existent
+     thread id in its events.  */
+  if (th == nullptr)
+    return;
+
+  windows_process.fill_thread_context (th);
 
   if (r < 0)
     for (r = 0; r < gdbarch_num_regs (regcache->arch()); r++)
@@ -1291,36 +1298,81 @@  windows_per_inferior::continue_one_thread (windows_thread_info *th,
   DWORD &context_flags_ref = (wow64_process
 			      ? th->wow64_context.ContextFlags
 			      : th->context.ContextFlags);
+  const DWORD64 dr6 = (wow64_process
+		       ? th->wow64_context.Dr6
+		       : th->context.Dr6);
 #else
   DWORD &context_flags_ref = th->context.ContextFlags;
+  const DWORD dr6 = th->context.Dr6;
 #endif
 
   if (th->debug_registers_changed)
     {
-      context_flags_ref |= CONTEXT_DEBUG_REGISTERS;
-#ifdef __x86_64__
-      if (wow64_process)
+      windows_process.fill_thread_context (th);
+
+      gdb_assert ((context_flags_ref & CONTEXT_DEBUG_REGISTERS) != 0);
+
+      /* Check whether the thread has Dr6 set indicating a
+	 watchpoint hit, and we haven't seen the watchpoint event
+	 yet (reported as
+	 EXCEPTION_SINGLE_STEP/STATUS_WX86_SINGLE_STEP).  In that
+	 case, don't change the debug registers.  Changing debug
+	 registers, even if to the same values, makes the kernel
+	 clear Dr6.  The result would be we would lose the
+	 unreported watchpoint hit.  */
+      if ((dr6 & ~DR6_CLEAR_VALUE) != 0)
 	{
-	  th->wow64_context.Dr0 = state->dr_mirror[0];
-	  th->wow64_context.Dr1 = state->dr_mirror[1];
-	  th->wow64_context.Dr2 = state->dr_mirror[2];
-	  th->wow64_context.Dr3 = state->dr_mirror[3];
-	  th->wow64_context.Dr6 = DR6_CLEAR_VALUE;
-	  th->wow64_context.Dr7 = state->dr_control_mirror;
+	  if (th->last_event.dwDebugEventCode == EXCEPTION_DEBUG_EVENT
+	      && (th->last_event.u.Exception.ExceptionRecord.ExceptionCode
+		  == EXCEPTION_SINGLE_STEP))
+	    {
+	      DEBUG_EVENTS ("0x%x already reported watchpoint", th->tid);
+	    }
+	  else
+	    {
+	      DEBUG_EVENTS ("0x%x last reported something else (0x%x)",
+			    th->tid,
+			    th->last_event.dwDebugEventCode);
+
+	      /* Don't touch debug registers.  Let the pending
+		 watchpoint event be reported instead.  We will
+		 update the debug registers later when the thread
+		 is re-resumed by the core after the watchpoint
+		 event.  */
+	      context_flags_ref &= ~CONTEXT_DEBUG_REGISTERS;
+	    }
 	}
       else
-#endif
+	DEBUG_EVENTS ("0x%x has no dr6 set", th->tid);
+
+      if ((context_flags_ref & CONTEXT_DEBUG_REGISTERS) != 0)
 	{
-	  th->context.Dr0 = state->dr_mirror[0];
-	  th->context.Dr1 = state->dr_mirror[1];
-	  th->context.Dr2 = state->dr_mirror[2];
-	  th->context.Dr3 = state->dr_mirror[3];
-	  th->context.Dr6 = DR6_CLEAR_VALUE;
-	  th->context.Dr7 = state->dr_control_mirror;
-	}
+	  DEBUG_EVENTS ("0x%x changing dregs", th->tid);
+#ifdef __x86_64__
+	  if (wow64_process)
+	    {
+	      th->wow64_context.Dr0 = state->dr_mirror[0];
+	      th->wow64_context.Dr1 = state->dr_mirror[1];
+	      th->wow64_context.Dr2 = state->dr_mirror[2];
+	      th->wow64_context.Dr3 = state->dr_mirror[3];
+	      th->wow64_context.Dr6 = DR6_CLEAR_VALUE;
+	      th->wow64_context.Dr7 = state->dr_control_mirror;
+	    }
+	  else
+#endif
+	    {
+	      th->context.Dr0 = state->dr_mirror[0];
+	      th->context.Dr1 = state->dr_mirror[1];
+	      th->context.Dr2 = state->dr_mirror[2];
+	      th->context.Dr3 = state->dr_mirror[3];
+	      th->context.Dr6 = DR6_CLEAR_VALUE;
+	      th->context.Dr7 = state->dr_control_mirror;
+	    }
 
-      th->debug_registers_changed = false;
+	  th->debug_registers_changed = false;
+	}
     }
+
   if (context_flags_ref != 0)
     {
       DWORD ec = 0;
diff --git a/gdbserver/win32-low.cc b/gdbserver/win32-low.cc
index 004bf94c83a..65b01dc97ac 100644
--- a/gdbserver/win32-low.cc
+++ b/gdbserver/win32-low.cc
@@ -141,6 +141,14 @@  win32_require_context (windows_thread_info *th)
 
 /* See nat/windows-nat.h.  */
 
+void
+gdbserver_windows_process::fill_thread_context (windows_thread_info *th)
+{
+  win32_require_context (th);
+}
+
+/* See nat/windows-nat.h.  */
+
 windows_thread_info *
 gdbserver_windows_process::find_thread (ptid_t ptid)
 {
diff --git a/gdbserver/win32-low.h b/gdbserver/win32-low.h
index e99e47ea829..ea2a9b4c5b6 100644
--- a/gdbserver/win32-low.h
+++ b/gdbserver/win32-low.h
@@ -181,6 +181,8 @@  struct gdbserver_windows_process : public windows_nat::windows_process_info
   void handle_unload_dll (const DEBUG_EVENT &current_event) override;
   bool handle_access_violation (const EXCEPTION_RECORD *rec) override;
 
+  void fill_thread_context (windows_nat::windows_thread_info *th) override;
+
   int attaching = 0;
 
   /* A status that hasn't been reported to the core yet, and so