[v2] gdb/amd-dbgapi-target: suppress a repeated stop request

Message ID 20260520115215.2042499-1-tankutbaris.aktemur@amd.com
State New
Headers
Series [v2] gdb/amd-dbgapi-target: suppress a repeated stop request |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Test passed

Commit Message

Aktemur, Baris May 20, 2026, 11:52 a.m. UTC
  This revision uses a nested gdb_test_multiple/gdb_test when defining
the user command in the test.

Regards,
Baris

====

Sending a second stop request to an AMD GPU thread before fetching the
event caused by the first request leads to an error:

  wave_stop for wave_1 failed (The wave has an outstanding stop request)

Prevent sending a new stop request if there already is an outstanding
one.  The fix is in amd_dbgapi_target::stop.

A regression test is included.  The test uses non-stop mode and
executes the "interrupt" command twice, because in non-stop mode this
command uses the 'stop' target op, where the fix is applied.

To be able to execute two interrupt commands repeatedly, we define a
user command.
---
 gdb/amd-dbgapi-target.c                    |  8 ++-
 gdb/testsuite/gdb.rocm/interrupt-twice.cpp | 43 +++++++++++++
 gdb/testsuite/gdb.rocm/interrupt-twice.exp | 75 ++++++++++++++++++++++
 3 files changed, 123 insertions(+), 3 deletions(-)
 create mode 100644 gdb/testsuite/gdb.rocm/interrupt-twice.cpp
 create mode 100644 gdb/testsuite/gdb.rocm/interrupt-twice.exp
  

Comments

Lancelot SIX May 20, 2026, 8:31 p.m. UTC | #1
Hi,

On 20/05/2026 12:52, Tankut Baris Aktemur wrote:
> This revision uses a nested gdb_test_multiple/gdb_test when defining
> the user command in the test.
> 
> Regards,
> Baris
> 
> ====
> 
> Sending a second stop request to an AMD GPU thread before fetching the
> event caused by the first request leads to an error:
> 
>    wave_stop for wave_1 failed (The wave has an outstanding stop request)
> 
> Prevent sending a new stop request if there already is an outstanding
> one.  The fix is in amd_dbgapi_target::stop.
> 
> A regression test is included.  The test uses non-stop mode and
> executes the "interrupt" command twice, because in non-stop mode this
> command uses the 'stop' target op, where the fix is applied.
> 
> To be able to execute two interrupt commands repeatedly, we define a
> user command.
> ---
>   gdb/amd-dbgapi-target.c                    |  8 ++-
>   gdb/testsuite/gdb.rocm/interrupt-twice.cpp | 43 +++++++++++++
>   gdb/testsuite/gdb.rocm/interrupt-twice.exp | 75 ++++++++++++++++++++++
>   3 files changed, 123 insertions(+), 3 deletions(-)
>   create mode 100644 gdb/testsuite/gdb.rocm/interrupt-twice.cpp
>   create mode 100644 gdb/testsuite/gdb.rocm/interrupt-twice.exp
> 
> diff --git a/gdb/amd-dbgapi-target.c b/gdb/amd-dbgapi-target.c
> index 421ec8599ed..d44f03d0b80 100644
> --- a/gdb/amd-dbgapi-target.c
> +++ b/gdb/amd-dbgapi-target.c
> @@ -1090,14 +1090,16 @@ amd_dbgapi_target::stop (ptid_t ptid)
>   				    sizeof (state), &state);
>         if (status == AMD_DBGAPI_STATUS_SUCCESS)
>   	{
> -	  /* If the wave is already known to be stopped then do nothing.  */
> -	  if (state == AMD_DBGAPI_WAVE_STATE_STOP)
> +	  wave_info &wi = get_thread_wave_info (thread);
> +
> +	  /* If the wave is already known to be stopped or there is an
> +	     outstanding stop request, then do nothing.  */
> +	  if (state == AMD_DBGAPI_WAVE_STATE_STOP || wi.stopping)
>   	    return;
>   
>   	  status = amd_dbgapi_wave_stop (wave_id);
>   	  if (status == AMD_DBGAPI_STATUS_SUCCESS)
>   	    {
> -	      wave_info &wi = get_thread_wave_info (thread);
>   	      wi.stopping = true;
>   	      return;
>   	    }
> diff --git a/gdb/testsuite/gdb.rocm/interrupt-twice.cpp b/gdb/testsuite/gdb.rocm/interrupt-twice.cpp
> new file mode 100644
> index 00000000000..fc8d2cca697
> --- /dev/null
> +++ b/gdb/testsuite/gdb.rocm/interrupt-twice.cpp
> @@ -0,0 +1,43 @@
> +/* Copyright 2026 Free Software Foundation, Inc.
> +
> +   This file is part of GDB.
> +
> +   This program is free software; you can redistribute it and/or modify
> +   it under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3 of the License, or
> +   (at your option) any later version.
> +
> +   This program is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +   GNU General Public License for more details.
> +
> +   You should have received a copy of the GNU General Public License
> +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> +
> +#include <hip/hip_runtime.h>
> +#include "gdb_watchdog.h"
> +
> +__device__ void
> +loop ()
> +{
> +  while (true)
> +    __builtin_amdgcn_s_sleep (8);
> +}
> +
> +__global__ void
> +kern ()
> +{
> +  loop ();
> +}
> +
> +int
> +main ()
> +{
> +  /* Make sure that if anything goes wrong, the program eventually
> +     gets killed.  */
> +  gdb_watchdog (30);
> +
> +  kern<<<1, 1>>> ();
> +  return hipDeviceSynchronize () != hipSuccess;
> +}
> diff --git a/gdb/testsuite/gdb.rocm/interrupt-twice.exp b/gdb/testsuite/gdb.rocm/interrupt-twice.exp
> new file mode 100644
> index 00000000000..3c653547dc2
> --- /dev/null
> +++ b/gdb/testsuite/gdb.rocm/interrupt-twice.exp
> @@ -0,0 +1,75 @@
> +# Copyright 2026 Free Software Foundation, Inc.
> +
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 3 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
> +
> +# Test that sending repeated stop requests to a running GPU thread
> +# does not cause a failure.  This is done in non-stop mode because
> +# "interrupt" command in this mode uses the 'stop' target op.
> +
> +load_lib rocm.exp
> +
> +require allow_hipcc_tests
> +
> +standard_testfile .cpp
> +
> +if {[build_executable "failed to prepare" $testfile $srcfile {debug hip}]} {
> +    return
> +}
> +
> +with_rocm_gpu_lock {
> +    save_vars { ::GDBFLAGS } {
> +	append ::GDBFLAGS " -ex \"set non-stop on\""
> +	clean_restart $::testfile
> +    }
> +
> +    gdb_breakpoint "loop" {allow-pending} {temporary}
> +    gdb_run_cmd
> +
> +    set gpu_thread "undefined"
> +
> +    gdb_test_multiple "" "hit breakpoint" {
> +	-re -wrap "Thread ($decimal) \[^\r\n\]*hit Temporary breakpoint.*" {
> +	    set gpu_thread $expect_out(1,string)
> +	    pass $gdb_test_name
> +	}
> +    }
> +
> +    gdb_test "thread $gpu_thread" "Switching to.*" "switch to gpu thread"
> +
> +    # Resume the thread in the background.  It will loop.  Then we
> +    # interrupt twice.  To be able to run the "interrupt" command back
> +    # to back, we define a user command.
> +    gdb_test "continue &" "Continuing." "continue async"
> +
> +    gdb_test_multiple "define inttwice" "" {
> +	-re "End with .*\r\n>$"  {
> +	    gdb_test "interrupt\ninterrupt\nend" "" $gdb_test_name
> +	}
> +    }
> +
> +    # For logging purposes.
> +    gdb_test "show user inttwice"

Not sure this is needed, but if we really have it, shouldn't we check 
the output?

     gdb_test "show user inttwice" \
	[multi_line \
	    "User command \"inttwice\":" \
	    "\[\t \]*interrupt" \
	    "\[\t \]*interrupt" \
	    ""]

> +
> +    gdb_test_multiple "inttwice" "interrupt twice" {
> +	-re "wave_stop \[^\r\n\]+ failed \[^\r\n\]+ outstanding stop request\\)\r\n" {
> +	    fail $gdb_test_name
> +	}
> +	-re "Thread $gpu_thread \[^\r\n\]*stopped" {
> +	    pass $gdb_test_name
> +	}
> +	-re "$gdb_prompt" {

I expect this should be (with " $")

	-re "$gdb_prompt $" {

> +	    exp_continue
> +	}
> +    }
> +}

With this adjusted, this LGTM, thanks.  I have tested this on gfx1031, 
and also applied to downstream rocgdb.

Best,
Lancelot.

Approved-by: Lancelot Six <lancelot.six@amd.com> (amdgpu)
  
Aktemur, Baris May 21, 2026, 6:45 a.m. UTC | #2
AMD General

Hi Lancelot,

On Wednesday, May 20, 2026 10:32 PM, Six, Lancelot wrote:
> Hi,
>
> On 20/05/2026 12:52, Tankut Baris Aktemur wrote:
> > This revision uses a nested gdb_test_multiple/gdb_test when defining
> > the user command in the test.
> >
> > Regards,
> > Baris
> >
> > ====
> >
> > Sending a second stop request to an AMD GPU thread before fetching the
> > event caused by the first request leads to an error:
> >
> >    wave_stop for wave_1 failed (The wave has an outstanding stop request)
> >
> > Prevent sending a new stop request if there already is an outstanding
> > one.  The fix is in amd_dbgapi_target::stop.
> >
> > A regression test is included.  The test uses non-stop mode and
> > executes the "interrupt" command twice, because in non-stop mode this
> > command uses the 'stop' target op, where the fix is applied.
> >
> > To be able to execute two interrupt commands repeatedly, we define a
> > user command.
> > ---
> >   gdb/amd-dbgapi-target.c                    |  8 ++-
> >   gdb/testsuite/gdb.rocm/interrupt-twice.cpp | 43 +++++++++++++
> >   gdb/testsuite/gdb.rocm/interrupt-twice.exp | 75 ++++++++++++++++++++++
> >   3 files changed, 123 insertions(+), 3 deletions(-)
> >   create mode 100644 gdb/testsuite/gdb.rocm/interrupt-twice.cpp
> >   create mode 100644 gdb/testsuite/gdb.rocm/interrupt-twice.exp
> >
> > diff --git a/gdb/amd-dbgapi-target.c b/gdb/amd-dbgapi-target.c
> > index 421ec8599ed..d44f03d0b80 100644
> > --- a/gdb/amd-dbgapi-target.c
> > +++ b/gdb/amd-dbgapi-target.c
> > @@ -1090,14 +1090,16 @@ amd_dbgapi_target::stop (ptid_t ptid)
> >                                 sizeof (state), &state);
> >         if (status == AMD_DBGAPI_STATUS_SUCCESS)
> >     {
> > -     /* If the wave is already known to be stopped then do nothing.  */
> > -     if (state == AMD_DBGAPI_WAVE_STATE_STOP)
> > +     wave_info &wi = get_thread_wave_info (thread);
> > +
> > +     /* If the wave is already known to be stopped or there is an
> > +        outstanding stop request, then do nothing.  */
> > +     if (state == AMD_DBGAPI_WAVE_STATE_STOP || wi.stopping)
> >         return;
> >
> >       status = amd_dbgapi_wave_stop (wave_id);
> >       if (status == AMD_DBGAPI_STATUS_SUCCESS)
> >         {
> > -         wave_info &wi = get_thread_wave_info (thread);
> >           wi.stopping = true;
> >           return;
> >         }
> > diff --git a/gdb/testsuite/gdb.rocm/interrupt-twice.cpp
> b/gdb/testsuite/gdb.rocm/interrupt-twice.cpp
> > new file mode 100644
> > index 00000000000..fc8d2cca697
> > --- /dev/null
> > +++ b/gdb/testsuite/gdb.rocm/interrupt-twice.cpp
> > @@ -0,0 +1,43 @@
> > +/* Copyright 2026 Free Software Foundation, Inc.
> > +
> > +   This file is part of GDB.
> > +
> > +   This program is free software; you can redistribute it and/or modify
> > +   it under the terms of the GNU General Public License as published by
> > +   the Free Software Foundation; either version 3 of the License, or
> > +   (at your option) any later version.
> > +
> > +   This program is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +   GNU General Public License for more details.
> > +
> > +   You should have received a copy of the GNU General Public License
> > +   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
> > +
> > +#include <hip/hip_runtime.h>
> > +#include "gdb_watchdog.h"
> > +
> > +__device__ void
> > +loop ()
> > +{
> > +  while (true)
> > +    __builtin_amdgcn_s_sleep (8);
> > +}
> > +
> > +__global__ void
> > +kern ()
> > +{
> > +  loop ();
> > +}
> > +
> > +int
> > +main ()
> > +{
> > +  /* Make sure that if anything goes wrong, the program eventually
> > +     gets killed.  */
> > +  gdb_watchdog (30);
> > +
> > +  kern<<<1, 1>>> ();
> > +  return hipDeviceSynchronize () != hipSuccess;
> > +}
> > diff --git a/gdb/testsuite/gdb.rocm/interrupt-twice.exp
> b/gdb/testsuite/gdb.rocm/interrupt-twice.exp
> > new file mode 100644
> > index 00000000000..3c653547dc2
> > --- /dev/null
> > +++ b/gdb/testsuite/gdb.rocm/interrupt-twice.exp
> > @@ -0,0 +1,75 @@
> > +# Copyright 2026 Free Software Foundation, Inc.
> > +
> > +# This program is free software; you can redistribute it and/or modify
> > +# it under the terms of the GNU General Public License as published by
> > +# the Free Software Foundation; either version 3 of the License, or
> > +# (at your option) any later version.
> > +#
> > +# This program is distributed in the hope that it will be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public License
> > +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
> > +
> > +# Test that sending repeated stop requests to a running GPU thread
> > +# does not cause a failure.  This is done in non-stop mode because
> > +# "interrupt" command in this mode uses the 'stop' target op.
> > +
> > +load_lib rocm.exp
> > +
> > +require allow_hipcc_tests
> > +
> > +standard_testfile .cpp
> > +
> > +if {[build_executable "failed to prepare" $testfile $srcfile {debug hip}]} {
> > +    return
> > +}
> > +
> > +with_rocm_gpu_lock {
> > +    save_vars { ::GDBFLAGS } {
> > +   append ::GDBFLAGS " -ex \"set non-stop on\""
> > +   clean_restart $::testfile
> > +    }
> > +
> > +    gdb_breakpoint "loop" {allow-pending} {temporary}
> > +    gdb_run_cmd
> > +
> > +    set gpu_thread "undefined"
> > +
> > +    gdb_test_multiple "" "hit breakpoint" {
> > +   -re -wrap "Thread ($decimal) \[^\r\n\]*hit Temporary breakpoint.*" {
> > +       set gpu_thread $expect_out(1,string)
> > +       pass $gdb_test_name
> > +   }
> > +    }
> > +
> > +    gdb_test "thread $gpu_thread" "Switching to.*" "switch to gpu thread"
> > +
> > +    # Resume the thread in the background.  It will loop.  Then we
> > +    # interrupt twice.  To be able to run the "interrupt" command back
> > +    # to back, we define a user command.
> > +    gdb_test "continue &" "Continuing." "continue async"
> > +
> > +    gdb_test_multiple "define inttwice" "" {
> > +   -re "End with .*\r\n>$"  {
> > +       gdb_test "interrupt\ninterrupt\nend" "" $gdb_test_name
> > +   }
> > +    }
> > +
> > +    # For logging purposes.
> > +    gdb_test "show user inttwice"
>
> Not sure this is needed, but if we really have it, shouldn't we check
> the output?
>
>      gdb_test "show user inttwice" \
>       [multi_line \
>           "User command \"inttwice\":" \
>           "\[\t \]*interrupt" \
>           "\[\t \]*interrupt" \
>           ""]

Ok, I removed it.

> > +
> > +    gdb_test_multiple "inttwice" "interrupt twice" {
> > +   -re "wave_stop \[^\r\n\]+ failed \[^\r\n\]+ outstanding stop request\\)\r\n" {
> > +       fail $gdb_test_name
> > +   }
> > +   -re "Thread $gpu_thread \[^\r\n\]*stopped" {
> > +       pass $gdb_test_name
> > +   }
> > +   -re "$gdb_prompt" {
>
> I expect this should be (with " $")
>
>       -re "$gdb_prompt $" {

This doesn't work in general because the "Thread ... stopped" output
comes asynchronously after the prompt.

> > +       exp_continue
> > +   }
> > +    }
> > +}
>
> With this adjusted, this LGTM, thanks.  I have tested this on gfx1031,
> and also applied to downstream rocgdb.
>
> Best,
> Lancelot.
>
> Approved-by: Lancelot Six <lancelot.six@amd.com> (amdgpu)

Thank you.  I'm pushing the patch without the " $" change for the reason
I wrote above.

-Baris
  

Patch

diff --git a/gdb/amd-dbgapi-target.c b/gdb/amd-dbgapi-target.c
index 421ec8599ed..d44f03d0b80 100644
--- a/gdb/amd-dbgapi-target.c
+++ b/gdb/amd-dbgapi-target.c
@@ -1090,14 +1090,16 @@  amd_dbgapi_target::stop (ptid_t ptid)
 				    sizeof (state), &state);
       if (status == AMD_DBGAPI_STATUS_SUCCESS)
 	{
-	  /* If the wave is already known to be stopped then do nothing.  */
-	  if (state == AMD_DBGAPI_WAVE_STATE_STOP)
+	  wave_info &wi = get_thread_wave_info (thread);
+
+	  /* If the wave is already known to be stopped or there is an
+	     outstanding stop request, then do nothing.  */
+	  if (state == AMD_DBGAPI_WAVE_STATE_STOP || wi.stopping)
 	    return;
 
 	  status = amd_dbgapi_wave_stop (wave_id);
 	  if (status == AMD_DBGAPI_STATUS_SUCCESS)
 	    {
-	      wave_info &wi = get_thread_wave_info (thread);
 	      wi.stopping = true;
 	      return;
 	    }
diff --git a/gdb/testsuite/gdb.rocm/interrupt-twice.cpp b/gdb/testsuite/gdb.rocm/interrupt-twice.cpp
new file mode 100644
index 00000000000..fc8d2cca697
--- /dev/null
+++ b/gdb/testsuite/gdb.rocm/interrupt-twice.cpp
@@ -0,0 +1,43 @@ 
+/* Copyright 2026 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <hip/hip_runtime.h>
+#include "gdb_watchdog.h"
+
+__device__ void
+loop ()
+{
+  while (true)
+    __builtin_amdgcn_s_sleep (8);
+}
+
+__global__ void
+kern ()
+{
+  loop ();
+}
+
+int
+main ()
+{
+  /* Make sure that if anything goes wrong, the program eventually
+     gets killed.  */
+  gdb_watchdog (30);
+
+  kern<<<1, 1>>> ();
+  return hipDeviceSynchronize () != hipSuccess;
+}
diff --git a/gdb/testsuite/gdb.rocm/interrupt-twice.exp b/gdb/testsuite/gdb.rocm/interrupt-twice.exp
new file mode 100644
index 00000000000..3c653547dc2
--- /dev/null
+++ b/gdb/testsuite/gdb.rocm/interrupt-twice.exp
@@ -0,0 +1,75 @@ 
+# Copyright 2026 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Test that sending repeated stop requests to a running GPU thread
+# does not cause a failure.  This is done in non-stop mode because
+# "interrupt" command in this mode uses the 'stop' target op.
+
+load_lib rocm.exp
+
+require allow_hipcc_tests
+
+standard_testfile .cpp
+
+if {[build_executable "failed to prepare" $testfile $srcfile {debug hip}]} {
+    return
+}
+
+with_rocm_gpu_lock {
+    save_vars { ::GDBFLAGS } {
+	append ::GDBFLAGS " -ex \"set non-stop on\""
+	clean_restart $::testfile
+    }
+
+    gdb_breakpoint "loop" {allow-pending} {temporary}
+    gdb_run_cmd
+
+    set gpu_thread "undefined"
+
+    gdb_test_multiple "" "hit breakpoint" {
+	-re -wrap "Thread ($decimal) \[^\r\n\]*hit Temporary breakpoint.*" {
+	    set gpu_thread $expect_out(1,string)
+	    pass $gdb_test_name
+	}
+    }
+
+    gdb_test "thread $gpu_thread" "Switching to.*" "switch to gpu thread"
+
+    # Resume the thread in the background.  It will loop.  Then we
+    # interrupt twice.  To be able to run the "interrupt" command back
+    # to back, we define a user command.
+    gdb_test "continue &" "Continuing." "continue async"
+
+    gdb_test_multiple "define inttwice" "" {
+	-re "End with .*\r\n>$"  {
+	    gdb_test "interrupt\ninterrupt\nend" "" $gdb_test_name
+	}
+    }
+
+    # For logging purposes.
+    gdb_test "show user inttwice"
+
+    gdb_test_multiple "inttwice" "interrupt twice" {
+	-re "wave_stop \[^\r\n\]+ failed \[^\r\n\]+ outstanding stop request\\)\r\n" {
+	    fail $gdb_test_name
+	}
+	-re "Thread $gpu_thread \[^\r\n\]*stopped" {
+	    pass $gdb_test_name
+	}
+	-re "$gdb_prompt" {
+	    exp_continue
+	}
+    }
+}