[v3,2/2,gdb/dap] Fix stray KeyboardInterrupt after cancel

Message ID 20240229135936.10140-2-tdevries@suse.de
State Committed
Headers
Series [v3,1/2,gdb/dap] Move send_gdb and send_gdb_with_response to server module |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 fail Patch failed to apply

Commit Message

Tom de Vries Feb. 29, 2024, 1:59 p.m. UTC
  When running test-case gdb.dap/pause.exp 100 times in a loop, it passes
100/100.

But if we remove the two "sleep 0.2" from the test-case, we run into
(copied from dap.log and edited for readability):
...
Traceback (most recent call last):
  File "startup.py", line 251, in message
    def message():

KeyboardInterrupt
Quit
...

This happens as follows.

CancellationHandler.cancel calls gdb.interrupt to cancel a request in flight.

The idea is that this interrupt triggers while in fn here in message (a nested
function of send_gdb_with_response):
...
    def message():
        try:
            val = fn()
            result_q.put(val)
        except (Exception, KeyboardInterrupt) as e:
            result_q.put(e)
...
but instead it triggers outside the try/except.

Fix this by:
- in CancellationHandler, renaming variable in_flight to in_flight_dap_thread,
  and adding a variable in_flight_gdb_thread to be able to distinguish when
  a request is in flight in the dap thread or the gdb thread.
- adding a wrapper Cancellable to to deal with cancelling the wrapped
  event
- using Cancellable in send_gdb and send_gdb_with_response to wrap the posted
  event
- in CancellationHandler.cancel, only call gdb.interrupt if
  req == self.in_flight_gdb_thread.

This makes the test-case pass 100/100, also when adding the extra stressor of
"taskset -c 0", which makes the fail more likely without the patch.

Tested on aarch64-linux.

PR dap/31275
Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=31275
---
 gdb/python/lib/gdb/dap/server.py | 88 ++++++++++++++++++++++++--------
 gdb/testsuite/gdb.dap/pause.exp  |  4 +-
 2 files changed, 69 insertions(+), 23 deletions(-)
  

Comments

Tom Tromey Feb. 29, 2024, 5:41 p.m. UTC | #1
>>>>> "Tom" == Tom de Vries <tdevries@suse.de> writes:

Tom>  # Wait a little to try to ensure the command is running.
Tom> -sleep 0.2
Tom> +#sleep 0.2
Tom>  set cancel_id [dap_send_request cancel \
Tom>  		   [format {o requestId [i %d]} $cont_id]]
 
Tom> @@ -140,7 +140,7 @@ set cont_id [dap_send_request evaluate \
Tom>  		      $gdbfile]]
 
Tom>  # Wait a little to try to ensure the command is running.
Tom> -sleep 0.2
Tom> +#sleep 0.2
Tom>  set cancel_id [dap_send_request cancel \
Tom>  		   [format {o requestId [i %d]} $cont_id]]
 
These hunks should probably be dropped.

Ok with that change.
Approved-By: Tom Tromey <tom@tromey.com>

Tom
  

Patch

diff --git a/gdb/python/lib/gdb/dap/server.py b/gdb/python/lib/gdb/dap/server.py
index fe1f8bbd428..cba5a308ff1 100644
--- a/gdb/python/lib/gdb/dap/server.py
+++ b/gdb/python/lib/gdb/dap/server.py
@@ -19,6 +19,7 @@  import heapq
 import inspect
 import json
 import threading
+from contextlib import contextmanager
 
 from .io import start_json_writer, read_json
 from .startup import (
@@ -59,24 +60,19 @@  class CancellationHandler:
         # Methods on this class acquire this lock before proceeding.
         self.lock = threading.Lock()
         # The request currently being handled, or None.
-        self.in_flight = None
+        self.in_flight_dap_thread = None
+        self.in_flight_gdb_thread = None
         self.reqs = []
 
     def starting(self, req):
-        """Call at the start of the given request.
-
-        Throws the appropriate exception if the request should be
-        immediately cancelled."""
+        """Call at the start of the given request."""
         with self.lock:
-            self.in_flight = req
-            while len(self.reqs) > 0 and self.reqs[0] <= req:
-                if heapq.heappop(self.reqs) == req:
-                    raise KeyboardInterrupt()
+            self.in_flight_dap_thread = req
 
     def done(self, req):
         """Indicate that the request is done."""
         with self.lock:
-            self.in_flight = None
+            self.in_flight_dap_thread = None
 
     def cancel(self, req):
         """Call to cancel a request.
@@ -85,7 +81,7 @@  class CancellationHandler:
         If the request is in flight, it is interrupted.
         If the request has not yet been seen, the cancellation is queued."""
         with self.lock:
-            if req == self.in_flight:
+            if req == self.in_flight_gdb_thread:
                 gdb.interrupt()
             else:
                 # We don't actually ignore the request here, but in
@@ -96,6 +92,29 @@  class CancellationHandler:
                 # to try to check for this.
                 heapq.heappush(self.reqs, req)
 
+    @contextmanager
+    def interruptable_region(self, req):
+        """Return a new context manager that sets in_flight_gdb_thread to
+        REQ."""
+        if req is None:
+            # No request is handled in the region, just execute the region.
+            yield
+            return
+        try:
+            with self.lock:
+                # If the request is cancelled, don't execute the region.
+                while len(self.reqs) > 0 and self.reqs[0] <= req:
+                    if heapq.heappop(self.reqs) == req:
+                        raise KeyboardInterrupt()
+                # Request is being handled by the gdb thread.
+                self.in_flight_gdb_thread = req
+            # Execute region.  This may be interrupted by gdb.interrupt.
+            yield
+        finally:
+            with self.lock:
+                # Request is no longer handled by the gdb thread.
+                self.in_flight_gdb_thread = None
+
 
 class Server:
     """The DAP server class."""
@@ -434,13 +453,45 @@  class Invoker(object):
         exec_and_log(self.cmd)
 
 
+class Cancellable(object):
+
+    def __init__(self, fn, result_q=None):
+        self.fn = fn
+        self.result_q = result_q
+        with _server.canceller.lock:
+            self.req = _server.canceller.in_flight_dap_thread
+
+    # This is invoked in the gdb thread to run self.fn.
+    @in_gdb_thread
+    def __call__(self):
+        try:
+            with _server.canceller.interruptable_region(self.req):
+                val = self.fn()
+                if self.result_q is not None:
+                    self.result_q.put(val)
+        except (Exception, KeyboardInterrupt) as e:
+            if self.result_q is not None:
+                # Pass result or exception to caller.
+                self.result_q.put(e)
+            elif isinstance(e, KeyboardInterrupt):
+                # Fn was cancelled.
+                pass
+            else:
+                # Exception happened.  Ignore and log it.
+                err_string = "%s, %s" % (err, type(err))
+                thread_log("caught exception: " + err_string)
+                log_stack()
+
+
 def send_gdb(cmd):
     """Send CMD to the gdb thread.
     CMD can be either a function or a string.
     If it is a string, it is passed to gdb.execute."""
     if isinstance(cmd, str):
         cmd = Invoker(cmd)
-    gdb.post_event(cmd)
+
+    # Post the event and don't wait for the result.
+    gdb.post_event(Cancellable(cmd))
 
 
 def send_gdb_with_response(fn):
@@ -452,17 +503,12 @@  def send_gdb_with_response(fn):
     """
     if isinstance(fn, str):
         fn = Invoker(fn)
-    result_q = DAPQueue()
 
-    def message():
-        try:
-            val = fn()
-            result_q.put(val)
-        except (Exception, KeyboardInterrupt) as e:
-            result_q.put(e)
-
-    send_gdb(message)
+    # Post the event and wait for the result in result_q.
+    result_q = DAPQueue()
+    gdb.post_event(Cancellable(fn, result_q))
     val = result_q.get()
+
     if isinstance(val, (Exception, KeyboardInterrupt)):
         raise val
     return val
diff --git a/gdb/testsuite/gdb.dap/pause.exp b/gdb/testsuite/gdb.dap/pause.exp
index 9038d0f037f..1271b3bf95a 100644
--- a/gdb/testsuite/gdb.dap/pause.exp
+++ b/gdb/testsuite/gdb.dap/pause.exp
@@ -119,7 +119,7 @@  set cont_id [dap_send_request evaluate \
 		      $gdbfile]]
 
 # Wait a little to try to ensure the command is running.
-sleep 0.2
+#sleep 0.2
 set cancel_id [dap_send_request cancel \
 		   [format {o requestId [i %d]} $cont_id]]
 
@@ -140,7 +140,7 @@  set cont_id [dap_send_request evaluate \
 		      $gdbfile]]
 
 # Wait a little to try to ensure the command is running.
-sleep 0.2
+#sleep 0.2
 set cancel_id [dap_send_request cancel \
 		   [format {o requestId [i %d]} $cont_id]]