[12/12] gdb: initial support for ROCm platform (AMDGPU) debugging

Message ID 20221206135729.3937767-13-simon.marchi@efficios.com
State New
Headers
Series Initial support for ROCm platform (AMDGPU) debugging |

Commit Message

Simon Marchi Dec. 6, 2022, 1:57 p.m. UTC
  This patch adds the foundation for GDB to be able to debug programs
offloaded to AMD GPUs using the AMD ROCm platform [1].  The latest
public release of the ROCm release at the time of writing is 5.4, so
this is what this patch targets.

The ROCm platform allows host programs to schedule bits of code for
execution on GPUs or similar accelerators.  The programs running on GPUs
are typically referred to as `kernels` (not related to operating system
kernels).

Programs offloaded with the AMD ROCm platform can be written in the HIP
language [2], OpenCL and OpenMP, but we're going to focus on HIP here.
The HIP language consists of a C++ Runtime API and kernel language.
Here's an example of a very simple HIP program:

    #include "hip/hip_runtime.h"
    #include <cassert>

    __global__ void
    do_an_addition (int a, int b, int *out)
    {
      *out = a + b;
    }

    int
    main ()
    {
      int *result_ptr, result;

      /* Allocate memory for the device to write the result to.  */
      hipError_t error = hipMalloc (&result_ptr, sizeof (int));
      assert (error == hipSuccess);

      /* Run `do_an_addition` on one workgroup containing one work item.  */
      do_an_addition<<<dim3(1), dim3(1), 0, 0>>> (1, 2, result_ptr);

      /* Copy result from device to host.  Note that this acts as a synchronization
         point, waiting for the kernel dispatch to complete.  */
      error = hipMemcpyDtoH (&result, result_ptr, sizeof (int));
      assert (error == hipSuccess);

      printf ("result is %d\n", result);
      assert (result == 3);

      return 0;
    }

This program can be compiled with:

    $ hipcc simple.cpp -g -O0 -o simple

... where `hipcc` is the HIP compiler, shipped with ROCm releases.  This
generates an ELF binary for the host architecture, containing another
ELF binary with the device code.  The ELF for the device can be
inspected with:

    $ roc-obj-ls simple
    1       host-x86_64-unknown-linux                                           file://simple#offset=8192&size=0
    1       hipv4-amdgcn-amd-amdhsa--gfx906                                     file://simple#offset=8192&size=34216
    $ roc-obj-extract 'file://simple#offset=8192&size=34216'
    $ file simple-offset8192-size34216.co
    simple-offset8192-size34216.co: ELF 64-bit LSB shared object, *unknown arch 0xe0* version 1, dynamically linked, with debug_info, not stripped
                                                                                 ^
                       amcgcn architecture that my `file` doesn't know about ----ยด

Running the program gives the very unimpressive result:

    $ ./simple
    result is 3

While running, this host program has copied the device program into the
GPU's memory and spawned an execution thread on it.  The goal of this
GDB port is to let the user debug host threads and these GPU threads
simultaneously.  Here's a sample session using a GDB with this patch
applied:

    $ ./gdb -q -nx --data-directory=data-directory ./simple
    Reading symbols from ./simple...
    (gdb) break do_an_addition
    Function "do_an_addition" not defined.
    Make breakpoint pending on future shared library load? (y or [n]) y
    Breakpoint 1 (do_an_addition) pending.
    (gdb) r
    Starting program: /home/smarchi/build/binutils-gdb-amdgpu/gdb/simple
    [Thread debugging using libthread_db enabled]
    Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
    [New Thread 0x7ffff5db7640 (LWP 1082911)]
    [New Thread 0x7ffef53ff640 (LWP 1082913)]
    [Thread 0x7ffef53ff640 (LWP 1082913) exited]
    [New Thread 0x7ffdecb53640 (LWP 1083185)]
    [New Thread 0x7ffff54bf640 (LWP 1083186)]
    [Thread 0x7ffdecb53640 (LWP 1083185) exited]
    [Switching to AMDGPU Wave 2:2:1:1 (0,0,0)/0]

    Thread 6 hit Breakpoint 1, do_an_addition (a=<error reading variable: DWARF-2 expression error: `DW_OP_regx' operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>,
        b=<error reading variable: DWARF-2 expression error: `DW_OP_regx' operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>,
        out=<error reading variable: DWARF-2 expression error: `DW_OP_regx' operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>) at simple.cpp:24
    24        *out = a + b;
    (gdb) info inferiors
      Num  Description       Connection           Executable
    * 1    process 1082907   1 (native)           /home/smarchi/build/binutils-gdb-amdgpu/gdb/simple
    (gdb) info threads
      Id   Target Id                                    Frame
      1    Thread 0x7ffff5dc9240 (LWP 1082907) "simple" 0x00007ffff5e9410b in ?? () from /opt/rocm-5.4.0/lib/libhsa-runtime64.so.1
      2    Thread 0x7ffff5db7640 (LWP 1082911) "simple" __GI___ioctl (fd=3, request=3222817548) at ../sysdeps/unix/sysv/linux/ioctl.c:36
      5    Thread 0x7ffff54bf640 (LWP 1083186) "simple" __GI___ioctl (fd=3, request=3222817548) at ../sysdeps/unix/sysv/linux/ioctl.c:36
    * 6    AMDGPU Wave 2:2:1:1 (0,0,0)/0                do_an_addition (
        a=<error reading variable: DWARF-2 expression error: `DW_OP_regx' operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>,
        b=<error reading variable: DWARF-2 expression error: `DW_OP_regx' operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>,
        out=<error reading variable: DWARF-2 expression error: `DW_OP_regx' operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>) at simple.cpp:24
    (gdb) bt
    Python Exception <class 'gdb.error'>: Unhandled dwarf expression opcode 0xe1
    #0  do_an_addition (a=<error reading variable: DWARF-2 expression error: `DW_OP_regx' operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>,
        b=<error reading variable: DWARF-2 expression error: `DW_OP_regx' operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>,
        out=<error reading variable: DWARF-2 expression error: `DW_OP_regx' operations must be used either alone or in conjunction with DW_OP_piece or DW_OP_bit_piece.>) at simple.cpp:24
    (gdb) continue
    Continuing.
    result is 3
    warning: Temporarily disabling breakpoints for unloaded shared library "file:///home/smarchi/build/binutils-gdb-amdgpu/gdb/simple#offset=8192&size=67208"
    [Thread 0x7ffff54bf640 (LWP 1083186) exited]
    [Thread 0x7ffff5db7640 (LWP 1082911) exited]
    [Inferior 1 (process 1082907) exited normally]

One thing to notice is the host and GPU threads appearing under
the same inferior.  This is a design goal for us, as programmers tend to
think of the threads running on the GPU as part of the same program as
the host threads, so showing them in the same inferior in GDB seems
natural.  Also, the host and GPU threads share a global memory space,
which fits the inferior model.

Another thing to notice is the error messages when trying to read
variables or printing a backtrace.  This is expected for the moment,
since the AMD GPU compiler produces some DWARF that uses some
non-standard extensions:

  https://llvm.org/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.html

There were already some patches posted by Zoran Zaric earlier to make
GDB support these extensions:

  https://inbox.sourceware.org/gdb-patches/20211105113849.118800-1-zoran.zaric@amd.com/

We think it's better to get the basic support for AMD GPU in first,
which will then give a better justification for GDB to support these
extensions.

GPU threads are named `AMDGPU Wave`: a wave is essentially a hardware
thread using the SIMT (single-instruction, multiple-threads) [3]
execution model.

GDB uses the amd-dbgapi library [4], included in the ROCm platform, for
a few things related to AMD GPU threads debugging.  Different components
talk to the library, as show on the following diagram:

    +---------------------------+     +-------------+     +------------------+
    | GDB   | amd-dbgapi target | <-> |     AMD     |     |    Linux kernel  |
    |       +-------------------+     |   Debugger  |     +--------+         |
    |       | amdgcn gdbarch    | <-> |     API     | <=> | AMDGPU |         |
    |       +-------------------+     |             |     | driver |         |
    |       | solib-rocm        | <-> | (dbgapi.so) |     +--------+---------+
    +---------------------------+     +-------------+

  - The amd-dbgapi target is a target_ops implementation used to control
    execution of GPU threads.  While the debugging of host threads works
    by using the ptrace / wait Linux kernel interface (as usual), control
    of GPU threads is done through a special interface (dubbed `kfd`)
    exposed by the `amdgpu` Linux kernel module.  GDB doesn't interact
    directly with `kfd`, but instead goes through the amd-dbgapi library
    (AMD Debugger API on the diagram).

    Since it provides execution control, the amd-dbgapi target should
    normally be a process_stratum_target, not just a target_ops.  More
    on that later.

  - The amdgcn gdbarch (describing the hardware architecture of the GPU
    execution units) offloads some requests to the amd-dbgapi library,
    so that knowledge about the various architectures doesn't need to be
    duplicated and baked in GDB.  This is for example for things like
    the list of registers.

  - The solib-rocm component is an solib provider that fetches the list of
    code objects loaded on the device from the amd-dbgapi library, and
    makes GDB read their symbols.  This is very similar to other solib
    providers that handle shared libraries, except that here the shared
    libraries are the pieces of code loaded on the device.

Given that Linux host threads are managed by the linux-nat target, and
the GPU threads are managed by the amd-dbgapi target, having all threads
appear in the same inferior requires the two targets to be in that
inferior's target stack.  However, there can only be one
process_stratum_target in a given target stack, since there can be only
one target per slot.  To achieve it, we therefore resort the hack^W
solution of placing the amd-dbgapi target in the arch_stratum slot of
the target stack, on top of the linux-nat target.  Doing so allows the
amd-dbgapi target to intercept target calls and handle them if they
concern GPU threads, and offload to beneath otherwise.  See
amd_dbgapi_target::fetch_registers for a simple example:

    void
    amd_dbgapi_target::fetch_registers (struct regcache *regcache, int regno)
    {
      if (!ptid_is_gpu (regcache->ptid ()))
        {
          beneath ()->fetch_registers (regcache, regno);
          return;
        }

      // handle it
    }

ptids of GPU threads are crafted with the following pattern:

  (pid, 1, wave id)

Where pid is the inferior's pid and "wave id" is the wave handle handed
to us by the amd-dbgapi library (in practice, a monotonically
incrementing integer).  The idea is that on Linux systems, the
combination (pid != 1, lwp == 1) is not possible.  lwp == 1 would always
belong to the init process, which would also have pid == 1 (and it's
improbable for the init process to offload work to the GPU and much less
for the user to debug it).  We can therefore differentiate GPU and
non-GPU ptids this way.  See ptid_is_gpu for more details.

Note that we believe that this scheme could break down in the context of
containers, where the initial process executed in a container has pid 1
(in its own pid namespace).  For instance, if you were to execute a ROCm
program in a container, then spawn a GDB in that container and attach to
the process, it will likely not work.  This is a known limitation.  A
workaround for this is to have a dummy process (like a shell) fork and
execute the program of interest.

The amd-dbgapi target watches native inferiors, and "attaches" to them
using amd_dbgapi_process_attach, which gives it a notifier fd that is
registered in the event loop (see enable_amd_dbgapi).  Note that this
isn't the same "attach" as in PTRACE_ATTACH, but being ptrace-attached
is a precondition for amd_dbgapi_process_attach to work.  When the
debugged process enables the ROCm runtime, the amd-dbgapi target gets
notified through that fd, and pushes itself on the target stack of the
inferior.  The amd-dbgapi target is then able to intercept target_ops
calls.  If the debugged process disables the ROCm runtime, the
amd-dbgapi target unpushes itself from the target stack.

This way, the amd-dbgapi target's footprint stays minimal when debugging
a process that doesn't use the AMD ROCm platform, it does not intercept
target calls.

The amd-dbgapi library is found using pkg-config.  Since enabling
support for the amdgpu architecture (amdgpu-tdep.c) depends on the
amd-dbgapi library being present, we have the following logic for
the interaction with --target and --enable-targets:

 - if the user explicitly asks for amdgcn support with
   --target=amdgcn-*-* or --enable-targets=amdgcn-*-*, we probe for
   the amd-dbgapi and fail if not found

 - if the user uses --enable-targets=all, we probe for amd-dbgapi,
   enable amdgcn support if found, disable amdgcn support if not found

 - if the user uses --enable-targets=all and --with-amd-dbgapi=yes,
   we probe for amd-dbgapi, enable amdgcn if found and fail if not found

 - if the user uses --enable-targets=all and --with-amd-dbgapi=no,
   we do not probe for amd-dbgapi, disable amdgcn support

 - otherwise, amd-dbgapi is not probed for and support for amdgcn is not
   enabled

Finally, a simple test is included.  It only tests hitting a breakpoint
in device code and resuming execution, pretty much like the example
shown above.

[1] https://docs.amd.com/category/ROCm_v5.4
[2] https://docs.amd.com/bundle/HIP-Programming-Guide-v5.4
[3] https://en.wikipedia.org/wiki/Single_instruction,_multiple_threads
[4] https://docs.amd.com/bundle/ROCDebugger-API-Guide-v5.4

Change-Id: I591edca98b8927b1e49e4b0abe4e304765fed9ee
Co-Authored-By: Zoran Zaric <zoran.zaric@amd.com>
Co-Authored-By: Laurent Morichetti <laurent.morichetti@amd.com>
Co-Authored-By: Tony Tye <Tony.Tye@amd.com>
Co-Authored-By: Lancelot SIX <lancelot.six@amd.com>
Co-Authored-By: Pedro Alves <pedro@palves.net>
---
 gdb/Makefile.in                   |   17 +-
 gdb/NEWS                          |    7 +
 gdb/README                        |   15 +
 gdb/amd-dbgapi-target.c           | 1966 +++++++++++++++++++++++++++++
 gdb/amd-dbgapi-target.h           |  116 ++
 gdb/amdgpu-tdep.c                 | 1367 ++++++++++++++++++++
 gdb/amdgpu-tdep.h                 |   93 ++
 gdb/configure                     |  425 +++++--
 gdb/configure.ac                  |   52 +
 gdb/configure.tgt                 |   23 +-
 gdb/doc/gdb.texinfo               |  758 +++++++++++
 gdb/regcache.c                    |    3 +-
 gdb/solib-rocm.c                  |  679 ++++++++++
 gdb/testsuite/gdb.rocm/simple.cpp |   48 +
 gdb/testsuite/gdb.rocm/simple.exp |   52 +
 gdb/testsuite/lib/future.exp      |   38 +
 gdb/testsuite/lib/gdb.exp         |    7 +
 gdb/testsuite/lib/rocm.exp        |   94 ++
 18 files changed, 5622 insertions(+), 138 deletions(-)
 create mode 100644 gdb/amd-dbgapi-target.c
 create mode 100644 gdb/amd-dbgapi-target.h
 create mode 100644 gdb/amdgpu-tdep.c
 create mode 100644 gdb/amdgpu-tdep.h
 create mode 100644 gdb/solib-rocm.c
 create mode 100644 gdb/testsuite/gdb.rocm/simple.cpp
 create mode 100644 gdb/testsuite/gdb.rocm/simple.exp
 create mode 100644 gdb/testsuite/lib/rocm.exp
  

Comments

Eli Zaretskii Dec. 6, 2022, 3 p.m. UTC | #1
> Cc: Simon Marchi <simon.marchi@efficios.com>,
>  Zoran Zaric <zoran.zaric@amd.com>,
>  Laurent Morichetti <laurent.morichetti@amd.com>,
>  Tony Tye <Tony.Tye@amd.com>, Lancelot SIX <lancelot.six@amd.com>,
>  Pedro Alves <pedro@palves.net>
> Date: Tue,  6 Dec 2022 08:57:29 -0500
> From: Simon Marchi via Gdb-patches <gdb-patches@sourceware.org>
> 
> This patch adds the foundation for GDB to be able to debug programs
> offloaded to AMD GPUs using the AMD ROCm platform [1].  The latest
> public release of the ROCm release at the time of writing is 5.4, so
> this is what this patch targets.

Thanks, the NEWS part is OK.
  
Simon Marchi Dec. 6, 2022, 3:10 p.m. UTC | #2
On 12/6/22 10:00, Eli Zaretskii via Gdb-patches wrote:
>> Cc: Simon Marchi <simon.marchi@efficios.com>,
>>  Zoran Zaric <zoran.zaric@amd.com>,
>>  Laurent Morichetti <laurent.morichetti@amd.com>,
>>  Tony Tye <Tony.Tye@amd.com>, Lancelot SIX <lancelot.six@amd.com>,
>>  Pedro Alves <pedro@palves.net>
>> Date: Tue,  6 Dec 2022 08:57:29 -0500
>> From: Simon Marchi via Gdb-patches <gdb-patches@sourceware.org>
>>
>> This patch adds the foundation for GDB to be able to debug programs
>> offloaded to AMD GPUs using the AMD ROCm platform [1].  The latest
>> public release of the ROCm release at the time of writing is 5.4, so
>> this is what this patch targets.
> 
> Thanks, the NEWS part is OK.

Thanks.  There are also changes to doc/gdb.texinfo, do you intend to
look at these as well?

Simon
  
Eli Zaretskii Dec. 6, 2022, 3:42 p.m. UTC | #3
> Cc: Simon Marchi <simon.marchi@efficios.com>,
>  Zoran Zaric <zoran.zaric@amd.com>,
>  Laurent Morichetti <laurent.morichetti@amd.com>,
>  Tony Tye <Tony.Tye@amd.com>, Lancelot SIX <lancelot.six@amd.com>,
>  Pedro Alves <pedro@palves.net>
> Date: Tue,  6 Dec 2022 08:57:29 -0500
> From: Simon Marchi via Gdb-patches <gdb-patches@sourceware.org>
> 
> --- a/gdb/doc/gdb.texinfo
> +++ b/gdb/doc/gdb.texinfo
> @@ -7021,6 +7021,8 @@ signal happened.  @value{GDBN} alerts you to the context switch with a
>  message such as @samp{[Switching to Thread @var{n}]} to identify the
>  thread.  
>  
> +@node set scheduler-locking

This @node is without any @chapter/@section, and does not appear in any
@menu.  That doesn't look right; did you actually succeed in building the
manual with these changes?

> +The following @acronym{AMD GPU} architectures are supported:
> +
> +@table @emph
> +
> +@item @samp{gfx900}
> +AMD Vega 10 devices, displayed as @samp{vega10} by @value{GDBN}.
> +
> +@item @samp{gfx906}
> +AMD Vega 7nm devices, displayed as @samp{vega20} by @value{GDBN}.

Do we really need this long list of architectures in the GDB manual?  It
sounds like an ad for AMD...

> +@smallexample
> +hipcc -O0 -g --offload-arch=gfx900 --offload-arch=gfx906 bit_extract.cpp -o bit_extract
> +@end smallexample

This and other @smallexamples in the patch have too long lines; please break
them into two or more, to avoid problems with the printed format of the
manual.

> +@subsubsection @acronym{AMD GPU} Wavefronts

I think a @cindex about wavefronts would be useful here.

> +@item file_path
> +The file's path specified as a URI encoded UTF-8 string.  In URI
> +encoding, every character that is not:
> +
> +@itemize
> +@item In the @samp{a-z}, @samp{A-Z}, @samp{0-9} ranges
> +@item @samp{/}, @samp{_}, @samp{.}, @samp{~} or @samp{-}
> +@end itemize
> +
> +is encoded as two uppercase hexadecimal digits proceeded by @samp{%}.

You want @noindent before the last line.

> +Directories in the path are separated by @samp{/}.

GNU coding standards frown on using "path" for anything that is not
PATH-style lists of directories.  Please use "file name" instead.

> +The @acronym{AMD GPU} entities have the following target identifier formats:
> +
> +@table @var

@var is wrong here, since the @item text is not a meta-syntactic variable:
it doesn't stand for something else.  I'd use @asis instead.

> +Make sure the container user is a member of the @var{render} group for
> +Ubuntu 20.04 onward and the @var{video} group for all other

??? Why are we talking about specific Linux distros in the manual?

Finally: maybe it's just me, but isn't this documentation way too detailed?
It weighs in at 800 lines, and includes many details that seem to be more
related to AMD, GPU, and HIP than to GDB.  Would it be reasonable to make
this section shorter by omitting too low-level and unrelated details?

Thanks.
  
Simon Marchi Dec. 7, 2022, 2:17 a.m. UTC | #4
>> --- a/gdb/doc/gdb.texinfo
>> +++ b/gdb/doc/gdb.texinfo
>> @@ -7021,6 +7021,8 @@ signal happened.  @value{GDBN} alerts you to the context switch with a
>>  message such as @samp{[Switching to Thread @var{n}]} to identify the
>>  thread.  
>>  
>> +@node set scheduler-locking
> 
> This @node is without any @chapter/@section, and does not appear in any
> @menu.  That doesn't look right; did you actually succeed in building the
> manual with these changes?
Yes, it builds.  It is used as a destination for a @pxref{set
scheduler-locking} later in the patch, if I remove it doesn't build:

    gdb.texinfo:26469: @pxref reference to nonexistent node `set scheduler-locking'

But I see the problem, in the HTML it introduces a new page starting at
that point, we don't want that.  Perhaps we should use @anchor instead?

>> +The following @acronym{AMD GPU} architectures are supported:
>> +
>> +@table @emph
>> +
>> +@item @samp{gfx900}
>> +AMD Vega 10 devices, displayed as @samp{vega10} by @value{GDBN}.
>> +
>> +@item @samp{gfx906}
>> +AMD Vega 7nm devices, displayed as @samp{vega20} by @value{GDBN}.
> 
> Do we really need this long list of architectures in the GDB manual?  It
> sounds like an ad for AMD...

We found it useful, as people often ask which devices / models the
debugger supports.  AMD produces a lot of GPU models.  A subset of that
can run ROCm programs.  And a subset of that support debugging.  I think
it's useful to tell users which devices GDB is expected to work with.

And it would not be a very good ad either, as most of these devices are
far from the latest and greatest :).

>> +@smallexample
>> +hipcc -O0 -g --offload-arch=gfx900 --offload-arch=gfx906 bit_extract.cpp -o bit_extract
>> +@end smallexample
> 
> This and other @smallexamples in the patch have too long lines; please break
> them into two or more, to avoid problems with the printed format of the
> manual.

What is the maximum line length for this?

I'll try, but it's a bit difficult when quoting actual GDB output.  For
instance, how would you do this one?

@smallexample
(@value{GDBP}) info sharedlibrary
From                To                  Syms Read   Shared Object Library
0x00007fd120664ac0  0x00007fd120682790  Yes (*)     /lib64/ld-linux-x86-64.so.2
...
0x00007fd0125d8ec0  0x00007fd015f21630  Yes (*)     /opt/rocm-3.5.0/hip/lib/../../lib/libamd_comgr.so
0x00007fd11d74e870  0x00007fd11d75a868  Yes (*)     /lib/x86_64-linux-gnu/libtinfo.so.5
0x00007fd11d001000  0x00007fd11d00173c  Yes         file:///home/rocm/examples/bit_extract#offset=6477&size=10832
0x00007fd11d008000  0x00007fd11d00adc0  Yes (*)     memory://95557/mem#offset=0x7fd0083e7f60&size=41416
(*): Shared library is missing debugging information.
(@value{GDBP})
@end smallexample

> 
>> +@subsubsection @acronym{AMD GPU} Wavefronts
> 
> I think a @cindex about wavefronts would be useful here.

I just add a `@cindex Wavefronts` under the line quoted above?

> 
>> +@item file_path
>> +The file's path specified as a URI encoded UTF-8 string.  In URI
>> +encoding, every character that is not:
>> +
>> +@itemize
>> +@item In the @samp{a-z}, @samp{A-Z}, @samp{0-9} ranges
>> +@item @samp{/}, @samp{_}, @samp{.}, @samp{~} or @samp{-}
>> +@end itemize
>> +
>> +is encoded as two uppercase hexadecimal digits proceeded by @samp{%}.
> 
> You want @noindent before the last line.

I can add it, but I don't see the difference (at least in the HTML
and PDF outputs).

>> +Directories in the path are separated by @samp{/}.
> 
> GNU coding standards frown on using "path" for anything that is not
> PATH-style lists of directories.  Please use "file name" instead.

Done.

>> +The @acronym{AMD GPU} entities have the following target identifier formats:
>> +
>> +@table @var
> 
> @var is wrong here, since the @item text is not a meta-syntactic variable:
> it doesn't stand for something else.  I'd use @asis instead.

Done.

>> +Make sure the container user is a member of the @var{render} group for
>> +Ubuntu 20.04 onward and the @var{video} group for all other
> 
> ??? Why are we talking about specific Linux distros in the manual?

Hmm, indeed, that probably doesn't belong here.  The downstream ROCm GDB
manual contains a lot of information about how it integrates in the rest
of the ROCm ecosystem.  Some of this information is probably not
relevant to upstream GDB, where we should stick to information
specifically about using GDB itself.  I removed a bunch of it already,
but it looks like there are still things we should remove.

> Finally: maybe it's just me, but isn't this documentation way too detailed?
> It weighs in at 800 lines, and includes many details that seem to be more
> related to AMD, GPU, and HIP than to GDB.  Would it be reasonable to make
> this section shorter by omitting too low-level and unrelated details?

I went over the page, and while I agree it's very thorough and detailed,
my impression is that it's all information that is one way or another
related to how GDB interacts with ROCm / HIP / AMD GPUs.  So, all
information that could be useful to someone with good knowledge of ROCm
/ HIP / AMD GPUs, if they wanted to use GDB to debug their program.  For
instance, the description of when GDB reports a SIGABRT is useful, as
the mapping between target debug events and Unix signals in GDB is kind
of arbitrary.

If you can point out specific parts that you think are not relevant, we
can discuss them specifically.

Simon
  
Eli Zaretskii Dec. 7, 2022, 1:29 p.m. UTC | #5
> Date: Tue, 6 Dec 2022 21:17:25 -0500
> Cc: gdb-patches@sourceware.org, zoran.zaric@amd.com,
>  laurent.morichetti@amd.com, Tony.Tye@amd.com, lancelot.six@amd.com,
>  pedro@palves.net
> From: Simon Marchi <simark@simark.ca>
> 
> >> +@node set scheduler-locking
> > 
> > This @node is without any @chapter/@section, and does not appear in any
> > @menu.  That doesn't look right; did you actually succeed in building the
> > manual with these changes?
> Yes, it builds.  It is used as a destination for a @pxref{set
> scheduler-locking} later in the patch, if I remove it doesn't build:
> 
>     gdb.texinfo:26469: @pxref reference to nonexistent node `set scheduler-locking'
> 
> But I see the problem, in the HTML it introduces a new page starting at
> that point, we don't want that.  Perhaps we should use @anchor instead?

You could use @anchor if all you want is a place to direct an @xref.

> >> +The following @acronym{AMD GPU} architectures are supported:
> >> +
> >> +@table @emph
> >> +
> >> +@item @samp{gfx900}
> >> +AMD Vega 10 devices, displayed as @samp{vega10} by @value{GDBN}.
> >> +
> >> +@item @samp{gfx906}
> >> +AMD Vega 7nm devices, displayed as @samp{vega20} by @value{GDBN}.
> > 
> > Do we really need this long list of architectures in the GDB manual?  It
> > sounds like an ad for AMD...
> 
> We found it useful, as people often ask which devices / models the
> debugger supports.  AMD produces a lot of GPU models.  A subset of that
> can run ROCm programs.  And a subset of that support debugging.  I think
> it's useful to tell users which devices GDB is expected to work with.
> 
> And it would not be a very good ad either, as most of these devices are
> far from the latest and greatest :).

I understand, but this is a _GDB_ manual, not a manual for debugging
AMD GPU programs.  We need to draw the line at some point.  Why cannot
these details be in some README somewhere, or on the Wiki?

> > This and other @smallexamples in the patch have too long lines; please break
> > them into two or more, to avoid problems with the printed format of the
> > manual.
> 
> What is the maximum line length for this?

I think 70 or 72.

> I'll try, but it's a bit difficult when quoting actual GDB output.  For
> instance, how would you do this one?
> 
> @smallexample
> (@value{GDBP}) info sharedlibrary
> >From                To                  Syms Read   Shared Object Library
> 0x00007fd120664ac0  0x00007fd120682790  Yes (*)     /lib64/ld-linux-x86-64.so.2

Use shorter addresses and directory names: they are immaterial for
your purposes here.

> >> +@subsubsection @acronym{AMD GPU} Wavefronts
> > 
> > I think a @cindex about wavefronts would be useful here.
> 
> I just add a `@cindex Wavefronts` under the line quoted above?

Yes, but "wavefronts", lower-case.  In general, index entries should
not use upper-case unless really necessary, because the sorting order
of mixed-case text depends on the locale and the underlying C library.

> >> +@item file_path
> >> +The file's path specified as a URI encoded UTF-8 string.  In URI
> >> +encoding, every character that is not:
> >> +
> >> +@itemize
> >> +@item In the @samp{a-z}, @samp{A-Z}, @samp{0-9} ranges
> >> +@item @samp{/}, @samp{_}, @samp{.}, @samp{~} or @samp{-}
> >> +@end itemize
> >> +
> >> +is encoded as two uppercase hexadecimal digits proceeded by @samp{%}.
> > 
> > You want @noindent before the last line.
> 
> I can add it, but I don't see the difference (at least in the HTML
> and PDF outputs).

It's unreliable to rely on this to produce un-indented lines.
Depending on the global settings such as @paragraphindent you can get
something you don't want.

> > Finally: maybe it's just me, but isn't this documentation way too detailed?
> > It weighs in at 800 lines, and includes many details that seem to be more
> > related to AMD, GPU, and HIP than to GDB.  Would it be reasonable to make
> > this section shorter by omitting too low-level and unrelated details?
> 
> I went over the page, and while I agree it's very thorough and detailed,
> my impression is that it's all information that is one way or another
> related to how GDB interacts with ROCm / HIP / AMD GPUs.  So, all
> information that could be useful to someone with good knowledge of ROCm
> / HIP / AMD GPUs, if they wanted to use GDB to debug their program.  For
> instance, the description of when GDB reports a SIGABRT is useful, as
> the mapping between target debug events and Unix signals in GDB is kind
> of arbitrary.

But you also describe in detail what each signal means, for example,
which is either redundant or belongs to the documentation of GPU
programming.  E.g., we don't explain in the manual what kind of signal
is SIGBUS or SIGFPE in GP CPUs, so why should we have this spelled out
for GPU programs?

> If you can point out specific parts that you think are not relevant, we
> can discuss them specifically.

For example, this sounds like a description of the GPU, not of GDB
features:

> +@acronym{AMD GPU} supports the following @var{reggroup} values for the
> +@samp{info registers @var{reggroup} @dots{}} command:
> +
> +@itemize @bullet
> +
> +@item
> +general
> +
> +@item
> +vector
> +
> +@item
> +scalar
> +
> +@item
> +system
> +
> +@end itemize
> +
> +The number of scalar and vector registers is configured when a
> +wavefront is created.  Only allocated registers are displayed.

Or why do we need this in our manual:

> +The code object path for @acronym{AMD GPU} code objects is shown as a
> +@acronym{URI, Universal Location Identifier} with a syntax defined by
> +the following BNF syntax:
> +
> +@smallexample
> +code_object_uri ::== file_uri | memory_uri
> +file_uri        ::== "file://" file_path [ range_specifier ]
> +memory_uri      ::== "memory://" process_id range_specifier
> +range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
> +file_path       ::== URI_ENCODED_OS_FILE_PATH
> +process_id      ::== DECIMAL_NUMBER
> +number          ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
> +@end smallexample

(followed by a longish legend of what each atom means above).

And this paragraph seems to describe the GPU, not what GDB does:

> +If any of these signals are delivered to the wavefront, it will cause
> +the wavefront to enter the halt state and cause the @acronym{AMD ROCm}
> +runtime to put the associated queue into the queue error state.  All
> +wavefronts associated with a queue that is in the queue error state
> +are inhibited from executing further instructions even if they are not
> +in the halt state.  In addition, when the @acronym{AMD ROCm} runtime
> +puts a queue into the queue error state it may invoke an application
> +registered callback that could either abort the application or delete
> +the queue which will delete any wavefronts associated with the queue.

There's also a lot of stuff only very remotely related to GDB, which
basically reads like a large number of tips and tricks for someone who
needs this mode.  For example:

> +@item
> +By default, for some architectures, the @acronym{AMD GPU} device
> +driver causes all @acronym{AMD GPU} wavefronts created when
> +@value{GDBN} is not attached to be unable to report the dispatch
> +associated with the wavefront, or the wavefront's work-group
> +position.  The @samp{info threads} command will display this
> +missing information with a @samp{?}.
> +
> +For example,
> +
> +@smallexample
> +(gdb) info threads
> +  Id   Target Id                                       Frame
> +* 1    Thread 0x7ffff6987840 (LWP 62056) "bit_extract" 0x00007ffff6da489b in sched_yield () at ../sysdeps/unix/syscall-template.S:78
> +  2    Thread 0x7ffff6986700 (LWP 62064) "bit_extract" 0x00007ffff6db650b in ioctl () at ../sysdeps/unix/syscall-template.S:78
> +  3    Thread 0x7ffff5f7f700 (LWP 62066) "bit_extract" 0x00007ffff6db650b in ioctl () at ../sysdeps/unix/syscall-template.S:78
> +  4    Thread 0x7ffff597f700 (LWP 62067) "bit_extract" 0x00007ffff6db650b in ioctl () at ../sysdeps/unix/syscall-template.S:78
> +  5    AMDGPU Wave 1:2:?:1 (?,?,?)/? "bit_extract"     bit_extract_kernel (C_d=<optimized out>, A_d=<optimized out>, N=<optimized out>) at bit_extract.cpp:41
> +@end smallexample
> +
> +This does not affect wavefronts created while @value{GDBN} is attached
> +which are always capable of reporting this information.
> +
> +If the @env{HSA_ENABLE_DEBUG} environment variable is set to @samp{1}
> +when the @acronym{AMD ROCm} runtime is initialized, then this
> +information will be available for all architectures even for wavefronts
> +created when @value{GDBN} was not attached.  Setting this environment
> +variable may very marginally increase wavefront launch latency for some
> +architectures for very short lived wavefronts.
> +
> +@item
> +If an @acronym{AMD GPU} wavefront has the @code{DX10_CLAMP} bit set in
> +the @code{MODE} register, enabled arithmetic exceptions will not be
> +reported as @code{SIGFPE} signals.  This happens if the
> +@code{DX10_CLAMP} kernel descriptor field is enabled.

The last paragraph in particular reads like something from the GPU
programming manual.  And even if this kind of info is useful and
should be in the GDB manual, why does it need to be so wordy, with so
many detailed examples?

I understand the urge to document all the potentially useful stuff
about this mode of GDB, but the result looks disproportionally long
and full of low-level information only tangentially related to GDB.

That said, if you feel strongly about the need to include all this,
and I'm the only one who raises the brow, feel free to install this, I
won't object anymore.

Thanks.
  
Simon Marchi Dec. 16, 2022, 5:37 p.m. UTC | #6
On 12/7/22 08:29, Eli Zaretskii wrote:
>> Date: Tue, 6 Dec 2022 21:17:25 -0500
>> Cc: gdb-patches@sourceware.org, zoran.zaric@amd.com,
>>  laurent.morichetti@amd.com, Tony.Tye@amd.com, lancelot.six@amd.com,
>>  pedro@palves.net
>> From: Simon Marchi <simark@simark.ca>
>>
>>>> +@node set scheduler-locking
>>>
>>> This @node is without any @chapter/@section, and does not appear in any
>>> @menu.  That doesn't look right; did you actually succeed in building the
>>> manual with these changes?
>> Yes, it builds.  It is used as a destination for a @pxref{set
>> scheduler-locking} later in the patch, if I remove it doesn't build:
>>
>>     gdb.texinfo:26469: @pxref reference to nonexistent node `set scheduler-locking'
>>
>> But I see the problem, in the HTML it introduces a new page starting at
>> that point, we don't want that.  Perhaps we should use @anchor instead?
> 
> You could use @anchor if all you want is a place to direct an @xref.

Ok, made that change.

> 
>>>> +The following @acronym{AMD GPU} architectures are supported:
>>>> +
>>>> +@table @emph
>>>> +
>>>> +@item @samp{gfx900}
>>>> +AMD Vega 10 devices, displayed as @samp{vega10} by @value{GDBN}.
>>>> +
>>>> +@item @samp{gfx906}
>>>> +AMD Vega 7nm devices, displayed as @samp{vega20} by @value{GDBN}.
>>>
>>> Do we really need this long list of architectures in the GDB manual?  It
>>> sounds like an ad for AMD...
>>
>> We found it useful, as people often ask which devices / models the
>> debugger supports.  AMD produces a lot of GPU models.  A subset of that
>> can run ROCm programs.  And a subset of that support debugging.  I think
>> it's useful to tell users which devices GDB is expected to work with.
>>
>> And it would not be a very good ad either, as most of these devices are
>> far from the latest and greatest :).
> 
> I understand, but this is a _GDB_ manual, not a manual for debugging
> AMD GPU programs.  We need to draw the line at some point.  Why cannot
> these details be in some README somewhere, or on the Wiki?

After discussing with the original author, we came to a conclusion that
agrees with you.  The supported devices in fact mostly depends on the
version of the amd-dbgapi library that GDB uses to debug ROCm programs.
An upgrade of that library, without changing / upgrading / rebuilding
GDB can change the list of supported devices.  Therefore, it would be
wrong to have a list of supported devices in the manual for a given
version of GDB.

If the device support was baked into GDB itself, then I would argue that
it would be useful to have a list of supported devices in the GDB
manual, but it's not the case.

So, I'm replacing the contents of that subsubsection with:

  The list of @acronym{AMD GPU} architectures supported by @value{GDBN}
  depends on the version of the AMD Debugger API library installed.  See
  its @uref{https://docs.amd.com/bundle/ROCDebugger_User_and_API,
  documentation} for more details.

>>> This and other @smallexamples in the patch have too long lines; please break
>>> them into two or more, to avoid problems with the printed format of the
>>> manual.
>>
>> What is the maximum line length for this?
> 
> I think 70 or 72.

Ack, will try to use 72 then.

> 
>> I'll try, but it's a bit difficult when quoting actual GDB output.  For
>> instance, how would you do this one?
>>
>> @smallexample
>> (@value{GDBP}) info sharedlibrary
>> >From                To                  Syms Read   Shared Object Library
>> 0x00007fd120664ac0  0x00007fd120682790  Yes (*)     /lib64/ld-linux-x86-64.so.2
> 
> Use shorter addresses and directory names: they are immaterial for
> your purposes here.

Ok.

>>>> +@subsubsection @acronym{AMD GPU} Wavefronts
>>>
>>> I think a @cindex about wavefronts would be useful here.
>>
>> I just add a `@cindex Wavefronts` under the line quoted above?
> 
> Yes, but "wavefronts", lower-case.  In general, index entries should
> not use upper-case unless really necessary, because the sorting order
> of mixed-case text depends on the locale and the underlying C library.

Ok.

> 
>>>> +@item file_path
>>>> +The file's path specified as a URI encoded UTF-8 string.  In URI
>>>> +encoding, every character that is not:
>>>> +
>>>> +@itemize
>>>> +@item In the @samp{a-z}, @samp{A-Z}, @samp{0-9} ranges
>>>> +@item @samp{/}, @samp{_}, @samp{.}, @samp{~} or @samp{-}
>>>> +@end itemize
>>>> +
>>>> +is encoded as two uppercase hexadecimal digits proceeded by @samp{%}.
>>>
>>> You want @noindent before the last line.
>>
>> I can add it, but I don't see the difference (at least in the HTML
>> and PDF outputs).
> 
> It's unreliable to rely on this to produce un-indented lines.
> Depending on the global settings such as @paragraphindent you can get
> something you don't want.

Note: I ended up deleting this part.

>>> Finally: maybe it's just me, but isn't this documentation way too detailed?
>>> It weighs in at 800 lines, and includes many details that seem to be more
>>> related to AMD, GPU, and HIP than to GDB.  Would it be reasonable to make
>>> this section shorter by omitting too low-level and unrelated details?
>>
>> I went over the page, and while I agree it's very thorough and detailed,
>> my impression is that it's all information that is one way or another
>> related to how GDB interacts with ROCm / HIP / AMD GPUs.  So, all
>> information that could be useful to someone with good knowledge of ROCm
>> / HIP / AMD GPUs, if they wanted to use GDB to debug their program.  For
>> instance, the description of when GDB reports a SIGABRT is useful, as
>> the mapping between target debug events and Unix signals in GDB is kind
>> of arbitrary.
> 
> But you also describe in detail what each signal means, for example,
> which is either redundant or belongs to the documentation of GPU
> programming.  E.g., we don't explain in the manual what kind of signal
> is SIGBUS or SIGFPE in GP CPUs, so why should we have this spelled out
> for GPU programs?

I think it is relevant, because GDB makes some arbitrary mappings
between target events and Unix signals, in order to present stops to the
user as Unix signals.  The text says "AMD GPU wavefronts can raise the
following signals when executing instructions", but I think it's a bit
misleading.  The wavefronts don't raise signals.  They encounter some
events or conditions that GDB translates to these signals.  Perhaps
that's not the greatest user experience, for GDB to shoehorn everything
into Unix signals, but that's what it is.  We don't really need to
document what signals mean for Unix-based platforms, because there is
no translations, the signals shown in GDB mean what they mean on the
platform.  But for non-Unix platforms, GDB makes some arbitrary
decisions that I think are relevant to document.  If an AMD GPU user
sees their program stop with SIGBUS, what does it mean for them?  SIGBUS
is not a thing on a GPU.

> 
>> If you can point out specific parts that you think are not relevant, we
>> can discuss them specifically.
> 
> For example, this sounds like a description of the GPU, not of GDB
> features:
> 
>> +@acronym{AMD GPU} supports the following @var{reggroup} values for the
>> +@samp{info registers @var{reggroup} @dots{}} command:
>> +
>> +@itemize @bullet
>> +
>> +@item
>> +general
>> +
>> +@item
>> +vector
>> +
>> +@item
>> +scalar
>> +
>> +@item
>> +system
>> +
>> +@end itemize
>> +
>> +The number of scalar and vector registers is configured when a
>> +wavefront is created.  Only allocated registers are displayed.

Well, the first part describes what registers groups the user can use
when debugging an AMD GPU.  It sounds to me like it describes the
behavior of GDB, when debugging an AMD GPU (different arches support
different reggroups).

The second part explains that GDB will not necessarily show all hardware
registers.  I feel like this describes how GDB behaves when debugging
an AMD GPU.

I agree that none of this is super critical, we can always let the users
figure it out by themselves but... isn't the goal of the doc to avoid
users having to figure things out the hard way?  I don't understand
where to draw the line between what's relevant and what is not.

> 
> Or why do we need this in our manual:
> 
>> +The code object path for @acronym{AMD GPU} code objects is shown as a
>> +@acronym{URI, Universal Location Identifier} with a syntax defined by
>> +the following BNF syntax:
>> +
>> +@smallexample
>> +code_object_uri ::== file_uri | memory_uri
>> +file_uri        ::== "file://" file_path [ range_specifier ]
>> +memory_uri      ::== "memory://" process_id range_specifier
>> +range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
>> +file_path       ::== URI_ENCODED_OS_FILE_PATH
>> +process_id      ::== DECIMAL_NUMBER
>> +number          ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
>> +@end smallexample
> 
> (followed by a longish legend of what each atom means above).

I agree that this is a bit too much.  I think the example, plus a less
formal description, is sufficent for a reasonably informed human to
understand.  I will change that part to be less long and dry.

However, one reason I see why we would want this is for MI.  If there
are people writing frontends for this (and there are), they'll need to
know precisely the format to expect, if they want to parse it.  Of
course, they can work something out of the example or go look at the
code, but I think it's bad to have poorly / loosely documented machine
interfaces.

> 
> And this paragraph seems to describe the GPU, not what GDB does:
> 
>> +If any of these signals are delivered to the wavefront, it will cause
>> +the wavefront to enter the halt state and cause the @acronym{AMD ROCm}
>> +runtime to put the associated queue into the queue error state.  All
>> +wavefronts associated with a queue that is in the queue error state
>> +are inhibited from executing further instructions even if they are not
>> +in the halt state.  In addition, when the @acronym{AMD ROCm} runtime
>> +puts a queue into the queue error state it may invoke an application
>> +registered callback that could either abort the application or delete
>> +the queue which will delete any wavefronts associated with the queue.

Ok, I'll delete that.

> 
> There's also a lot of stuff only very remotely related to GDB, which
> basically reads like a large number of tips and tricks for someone who
> needs this mode.  For example:
> 
>> +@item
>> +By default, for some architectures, the @acronym{AMD GPU} device
>> +driver causes all @acronym{AMD GPU} wavefronts created when
>> +@value{GDBN} is not attached to be unable to report the dispatch
>> +associated with the wavefront, or the wavefront's work-group
>> +position.  The @samp{info threads} command will display this
>> +missing information with a @samp{?}.
>> +
>> +For example,
>> +
>> +@smallexample
>> +(gdb) info threads
>> +  Id   Target Id                                       Frame
>> +* 1    Thread 0x7ffff6987840 (LWP 62056) "bit_extract" 0x00007ffff6da489b in sched_yield () at ../sysdeps/unix/syscall-template.S:78
>> +  2    Thread 0x7ffff6986700 (LWP 62064) "bit_extract" 0x00007ffff6db650b in ioctl () at ../sysdeps/unix/syscall-template.S:78
>> +  3    Thread 0x7ffff5f7f700 (LWP 62066) "bit_extract" 0x00007ffff6db650b in ioctl () at ../sysdeps/unix/syscall-template.S:78
>> +  4    Thread 0x7ffff597f700 (LWP 62067) "bit_extract" 0x00007ffff6db650b in ioctl () at ../sysdeps/unix/syscall-template.S:78
>> +  5    AMDGPU Wave 1:2:?:1 (?,?,?)/? "bit_extract"     bit_extract_kernel (C_d=<optimized out>, A_d=<optimized out>, N=<optimized out>) at bit_extract.cpp:41
>> +@end smallexample
>> +
>> +This does not affect wavefronts created while @value{GDBN} is attached
>> +which are always capable of reporting this information.
>> +
>> +If the @env{HSA_ENABLE_DEBUG} environment variable is set to @samp{1}
>> +when the @acronym{AMD ROCm} runtime is initialized, then this
>> +information will be available for all architectures even for wavefronts
>> +created when @value{GDBN} was not attached.  Setting this environment
>> +variable may very marginally increase wavefront launch latency for some
>> +architectures for very short lived wavefronts.

I can imagine a user being in this situation and asking "When I
attach to my program, why do I see question marks in the thread / wave
ids?".  It seems relevant to me to mention it here so we can refer them
to the doc.  And then we give them a workaround, if they want to have
that information.  Seems useful to me.

>> +
>> +@item
>> +If an @acronym{AMD GPU} wavefront has the @code{DX10_CLAMP} bit set in
>> +the @code{MODE} register, enabled arithmetic exceptions will not be
>> +reported as @code{SIGFPE} signals.  This happens if the
>> +@code{DX10_CLAMP} kernel descriptor field is enabled.
> 
> The last paragraph in particular reads like something from the GPU
> programming manual.

Ok, I can agree with that.  We already describe above which condition
leads to GDB reporting a SIGFPE (some arithmetic error).  The fact that
setting this bit makes the GPU not report the arithmetic error is indeed
behavior specific to the GPU.

> And even if this kind of info is useful and
> should be in the GDB manual, why does it need to be so wordy, with so
> many detailed examples?

This is the writing style of Tony Tye, the original author, he's used to
writing very formally and unambiguously.

Some info is there because users asked these questions, and then we
thought that we might as well document it, since others may have the
same question.

> 
> I understand the urge to document all the potentially useful stuff
> about this mode of GDB, but the result looks disproportionally long
> and full of low-level information only tangentially related to GDB.
> 
> That said, if you feel strongly about the need to include all this,
> and I'm the only one who raises the brow, feel free to install this, I
> won't object anymore.

I think you raise some good points.  So far we were working in the ROCm
bubble, where the line between ROCm-GDB and the rest of the ecosystem is
sometimes blurry.  So it's easy to leak in some details that are not
really relevant for upstream GDB.  We may not agree completely on what
is relevant and what isn't, there are definitely things that don't
belong here.  I already pruned a bunch of stuff.  I will send a v2 with
updated doc, but it will likely be after the new year.

Thanks,

Simon
  

Patch

diff --git a/gdb/Makefile.in b/gdb/Makefile.in
index fb4d42c7baa..f4e498c98c5 100644
--- a/gdb/Makefile.in
+++ b/gdb/Makefile.in
@@ -229,6 +229,9 @@  PTHREAD_LIBS = @PTHREAD_LIBS@
 DEBUGINFOD_CFLAGS = @DEBUGINFOD_CFLAGS@
 DEBUGINFOD_LIBS = @DEBUGINFOD_LIBS@
 
+AMD_DBGAPI_CFLAGS = @AMD_DBGAPI_CFLAGS@
+AMD_DBGAPI_LIBS = @AMD_DBGAPI_LIBS@
+
 RDYNAMIC = @RDYNAMIC@
 
 # Where is the INTL library?  Typically in ../intl.
@@ -633,7 +636,8 @@  INTERNAL_CFLAGS_BASE = \
 	$(ZSTD_CFLAGS) $(BFD_CFLAGS) $(INCLUDE_CFLAGS) $(LIBDECNUMBER_CFLAGS) \
 	$(INTL_CFLAGS) $(INCGNU) $(INCSUPPORT) $(LIBBACKTRACE_INC) \
 	$(ENABLE_CFLAGS) $(INTERNAL_CPPFLAGS) $(SRCHIGH_CFLAGS) \
-	$(TOP_CFLAGS) $(PTHREAD_CFLAGS) $(DEBUGINFOD_CFLAGS)
+	$(TOP_CFLAGS) $(PTHREAD_CFLAGS) $(DEBUGINFOD_CFLAGS) \
+	$(AMD_DBGAPI_CFLAGS)
 INTERNAL_WARN_CFLAGS = $(INTERNAL_CFLAGS_BASE) $(GDB_WARN_CFLAGS)
 INTERNAL_CFLAGS = $(INTERNAL_WARN_CFLAGS) $(GDB_WERROR_CFLAGS)
 
@@ -655,7 +659,7 @@  INTERNAL_LDFLAGS = \
 CLIBS = $(SIM) $(READLINE) $(OPCODES) $(LIBCTF) $(BFD) $(ZLIB) $(ZSTD_LIBS) \
         $(LIBSUPPORT) $(INTL) $(LIBIBERTY) $(LIBDECNUMBER) \
 	$(XM_CLIBS) $(GDBTKLIBS)  $(LIBBACKTRACE_LIB) \
-	@LIBS@ @GUILE_LIBS@ @PYTHON_LIBS@ \
+	@LIBS@ @GUILE_LIBS@ @PYTHON_LIBS@ $(AMD_DBGAPI_LIBS) \
 	$(LIBEXPAT) $(LIBLZMA) $(LIBBABELTRACE) $(LIBIPT) \
 	$(WIN32LIBS) $(LIBGNU) $(LIBGNU_EXTRA_LIBS) $(LIBICONV) \
 	$(LIBMPFR) $(LIBGMP) $(SRCHIGH_LIBS) $(LIBXXHASH) $(PTHREAD_LIBS) \
@@ -693,6 +697,12 @@  SIM_OBS = @SIM_OBS@
 # Target-dependent object files.
 TARGET_OBS = @TARGET_OBS@
 
+# All target-dependent object files that require the amd-dbgapi
+# target to be available (used with --enable-targets=all).
+ALL_AMD_DBGAPI_TARGET_OBS = \
+	amdgpu-tdep.o \
+	solib-rocm.o
+
 # All target-dependent objects files that require 64-bit CORE_ADDR
 # (used with --enable-targets=all --enable-64-bit-bfd).
 ALL_64_TARGET_OBS = \
@@ -1638,6 +1648,7 @@  ALLDEPFILES = \
 	alpha-netbsd-tdep.c \
 	alpha-obsd-tdep.c \
 	alpha-tdep.c \
+	amd-dbgapi-target.c \
 	amd64-bsd-nat.c \
 	amd64-darwin-tdep.c \
 	amd64-dicos-tdep.c \
@@ -1653,6 +1664,7 @@  ALLDEPFILES = \
 	amd64-ravenscar-thread.c \
 	amd64-sol2-tdep.c \
 	amd64-tdep.c \
+	amdgpu-tdep.c \
 	arc-linux-nat.c \
 	arc-tdep.c \
 	arm-bsd-tdep.c \
@@ -1794,6 +1806,7 @@  ALLDEPFILES = \
 	sh-tdep.c \
 	sol2-tdep.c \
 	solib-aix.c \
+	solib-rocm.c \
 	solib-svr4.c \
 	sparc-linux-nat.c \
 	sparc-linux-tdep.c \
diff --git a/gdb/NEWS b/gdb/NEWS
index c4ccfcc9e32..12acd0c3800 100644
--- a/gdb/NEWS
+++ b/gdb/NEWS
@@ -184,6 +184,8 @@  GNU/Linux/LoongArch (gdbserver)	loongarch*-*-linux*
 
 GNU/Linux/CSKY (gdbserver) csky*-*linux*
 
+AMDGPU amdgcn-*-*
+
 * MI changes
 
  ** The async record stating the stopped reason 'breakpoint-hit' now
@@ -278,6 +280,11 @@  GNU/Linux/CSKY (gdbserver) csky*-*linux*
 
 GDB now supports floating-point on LoongArch GNU/Linux.
 
+* AMD GPU ROCm debugging support
+
+GDB now supports debugging programs offloaded to AMD GPUs using the ROCm
+platform.
+
 *** Changes in GDB 12
 
 * DBX mode is deprecated, and will be removed in GDB 13
diff --git a/gdb/README b/gdb/README
index 406df046053..514093e3bc8 100644
--- a/gdb/README
+++ b/gdb/README
@@ -531,6 +531,21 @@  more obscure GDB `configure' options are not listed here.
      speeds up various GDB operations such as symbol loading.  Enabled
      by default if libxxhash is found.
 
+`--with-amd-dbgapi=[auto,yes,no]'
+     Whether to use the amd-dbgapi library to support local debugging of
+     AMD GCN architecture GPUs.
+
+     When explicitly requesting support for an AMD GCN architecture through
+     `--enable-targets' or `--target', there is no need to use
+     `--with-amd-dbgapi': `configure' will automatically look for the
+     amd-dbgapi library and fail if not found.
+
+     When using --enable-targets=all, support for the AMD GCN architecture will
+     only be included if the amd-dbgapi is found.  `--with-amd-dbgapi=yes' can
+     be used to make it a failure if the amd-dbgapi library is not found.
+     `--with-amd-dbgapi=no' can be used to prevent looking for the amd-dbgapi
+     library altogether.
+
 `--without-included-regex'
      Don't use the regex library included with GDB (as part of the
      libiberty library).  This is the default on hosts with version 2
diff --git a/gdb/amd-dbgapi-target.c b/gdb/amd-dbgapi-target.c
new file mode 100644
index 00000000000..abd8b4b3a3f
--- /dev/null
+++ b/gdb/amd-dbgapi-target.c
@@ -0,0 +1,1966 @@ 
+/* Target used to communicate with the AMD Debugger API.
+
+   Copyright (C) 2019-2022 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "defs.h"
+
+#include "amd-dbgapi-target.h"
+#include "amdgpu-tdep.h"
+#include "async-event.h"
+#include "cli/cli-cmds.h"
+#include "cli/cli-style.h"
+#include "inf-loop.h"
+#include "inferior.h"
+#include "objfiles.h"
+#include "observable.h"
+#include "registry.h"
+#include "solib.h"
+#include "target.h"
+
+/* When true, print debug messages relating to the amd-dbgapi target.  */
+
+static bool debug_amd_dbgapi = false;
+
+/* Make a copy of S styled in green.  */
+
+static std::string
+make_green (const char *s)
+{
+  cli_style_option style (nullptr, ui_file_style::GREEN);
+  string_file sf (true);
+  gdb_printf (&sf, "%ps", styled_string (style.style(), s));
+  return sf.release ();
+}
+
+/* Debug module names.  "amd-dbgapi" is for the target debug messages (this
+   file), whereas "amd-dbgapi-lib" is for logging messages output by the
+   amd-dbgapi library.  */
+
+static const char *amd_dbgapi_debug_module_unstyled = "amd-dbgapi";
+static const char *amd_dbgapi_lib_debug_module_unstyled
+  = "amd-dbgapi-lib";
+
+/* Styled variants of the above.  */
+
+static const std::string amd_dbgapi_debug_module_styled
+  = make_green (amd_dbgapi_debug_module_unstyled);
+static const std::string amd_dbgapi_lib_debug_module_styled
+  = make_green (amd_dbgapi_lib_debug_module_unstyled);
+
+/* Return the styled or unstyled variant of the amd-dbgapi module name,
+   depending on whether gdb_stdlog can emit colors.  */
+
+static const char *
+amd_dbgapi_debug_module ()
+{
+  if (gdb_stdlog->can_emit_style_escape ())
+    return amd_dbgapi_debug_module_styled.c_str ();
+  else
+    return amd_dbgapi_debug_module_unstyled;
+}
+
+/* Same as the above, but for the amd-dbgapi-lib module name.  */
+
+static const char *
+amd_dbgapi_lib_debug_module ()
+{
+  if (gdb_stdlog->can_emit_style_escape ())
+    return amd_dbgapi_lib_debug_module_styled.c_str ();
+  else
+    return amd_dbgapi_lib_debug_module_unstyled;
+}
+
+/* Print an amd-dbgapi debug statement.  */
+
+#define amd_dbgapi_debug_printf(fmt, ...) \
+  debug_prefixed_printf_cond (debug_amd_dbgapi, \
+			      amd_dbgapi_debug_module (), \
+			      fmt, ##__VA_ARGS__)
+
+/* Print amd-dbgapi start/end debug statements.  */
+
+#define AMD_DBGAPI_SCOPED_DEBUG_START_END(fmt, ...) \
+    scoped_debug_start_end (debug_infrun, amd_dbgapi_debug_module (), \
+			    fmt, ##__VA_ARGS__)
+
+/* inferior_created observer token.  */
+
+static gdb::observers::token amd_dbgapi_target_inferior_created_observer_token;
+
+const gdb::observers::token &
+get_amd_dbgapi_target_inferior_created_observer_token ()
+{
+  return amd_dbgapi_target_inferior_created_observer_token;
+}
+
+
+/* Big enough to hold the size of the largest register in bytes.  */
+#define AMDGPU_MAX_REGISTER_SIZE 256
+
+/* amd-dbgapi-specific inferior data.  */
+
+struct amd_dbgapi_inferior_info
+{
+  explicit amd_dbgapi_inferior_info (inferior *inf)
+    : inf (inf)
+  {}
+
+  /* Backlink to inferior.  */
+  inferior *inf;
+
+  /* The amd_dbgapi_process_id for this inferior.  */
+  amd_dbgapi_process_id_t process_id = AMD_DBGAPI_PROCESS_NONE;
+
+  /* The amd_dbgapi_notifier_t for this inferior.  */
+  amd_dbgapi_notifier_t notifier = -1;
+
+  /* The status of the inferior's runtime support.  */
+  amd_dbgapi_runtime_state_t runtime_state = AMD_DBGAPI_RUNTIME_STATE_UNLOADED;
+
+  /* This value mirrors the current "forward progress needed" value for this
+     process in amd-dbgapi.  It is used to avoid unnecessary calls to
+     amd_dbgapi_process_set_progress, to reduce the noise in the logs.
+
+     Initialized to true, since that's the default in amd-dbgapi too.  */
+  bool forward_progress_required = true;
+
+  std::unordered_map<decltype (amd_dbgapi_breakpoint_id_t::handle),
+		     struct breakpoint *>
+    breakpoint_map;
+
+  /* List of pending events the amd-dbgapi target retrieved from the dbgapi.  */
+  std::list<std::pair<ptid_t, target_waitstatus>> wave_events;
+};
+
+static amd_dbgapi_event_id_t process_event_queue
+  (amd_dbgapi_process_id_t process_id = AMD_DBGAPI_PROCESS_NONE,
+   amd_dbgapi_event_kind_t until_event_kind = AMD_DBGAPI_EVENT_KIND_NONE);
+
+static const target_info amd_dbgapi_target_info = {
+  "amd-dbgapi",
+  N_("AMD Debugger API"),
+  N_("GPU debugging using the AMD Debugger API")
+};
+
+static amd_dbgapi_log_level_t get_debug_amd_dbgapi_lib_log_level ();
+
+struct amd_dbgapi_target final : public target_ops
+{
+  const target_info &
+  info () const override
+  {
+    return amd_dbgapi_target_info;
+  }
+  strata
+  stratum () const override
+  {
+    return arch_stratum;
+  }
+
+  void close () override;
+  void mourn_inferior () override;
+  void detach (inferior *inf, int from_tty) override;
+
+  void async (bool enable) override;
+
+  bool has_pending_events () override;
+  ptid_t wait (ptid_t, struct target_waitstatus *, target_wait_flags) override;
+  void resume (ptid_t, int, enum gdb_signal) override;
+  void commit_resumed () override;
+  void stop (ptid_t ptid) override;
+
+  void fetch_registers (struct regcache *, int) override;
+  void store_registers (struct regcache *, int) override;
+
+  void update_thread_list () override;
+
+  struct gdbarch *thread_architecture (ptid_t) override;
+
+  void thread_events (int enable) override;
+
+  std::string pid_to_str (ptid_t ptid) override;
+
+  const char *thread_name (thread_info *tp) override;
+
+  const char *extra_thread_info (thread_info *tp) override;
+
+  bool thread_alive (ptid_t ptid) override;
+
+  enum target_xfer_status xfer_partial (enum target_object object,
+					const char *annex, gdb_byte *readbuf,
+					const gdb_byte *writebuf,
+					ULONGEST offset, ULONGEST len,
+					ULONGEST *xfered_len) override;
+
+  bool stopped_by_watchpoint () override;
+
+  bool stopped_by_sw_breakpoint () override;
+  bool stopped_by_hw_breakpoint () override;
+
+private:
+  /* True if we must report thread events.  */
+  bool m_report_thread_events = false;
+
+  /* Cache for the last value returned by thread_architecture.  */
+  gdbarch *m_cached_arch = nullptr;
+  ptid_t::tid_type m_cached_arch_tid = 0;
+};
+
+static struct amd_dbgapi_target the_amd_dbgapi_target;
+
+/* Per-inferior data key.  */
+
+static const registry<inferior>::key<amd_dbgapi_inferior_info>
+  amd_dbgapi_inferior_data;
+
+/* The async event handler registered with the event loop, indicating that we
+   might have events to report to the core and that we'd like our wait method
+   to be called.
+
+   This is nullptr when async is disabled and non-nullptr when async is
+   enabled.
+
+   It is marked when a notifier fd tells us there's an event available.  The
+   callback triggers handle_inferior_event in order to pull the event from
+   amd-dbgapi and handle it.  */
+
+static async_event_handler *amd_dbgapi_async_event_handler = nullptr;
+
+/* Return the target id string for a given wave.  */
+
+static std::string
+wave_target_id_string (amd_dbgapi_wave_id_t wave_id)
+{
+  amd_dbgapi_dispatch_id_t dispatch_id;
+  amd_dbgapi_queue_id_t queue_id;
+  amd_dbgapi_agent_id_t agent_id;
+  uint32_t group_ids[3], wave_in_group;
+  std::string str = "AMDGPU Wave";
+
+  amd_dbgapi_status_t status
+    = amd_dbgapi_wave_get_info (wave_id, AMD_DBGAPI_WAVE_INFO_AGENT,
+				sizeof (agent_id), &agent_id);
+  str += (status == AMD_DBGAPI_STATUS_SUCCESS
+	  ? string_printf (" %ld", agent_id.handle)
+	  : " ?");
+
+  status = amd_dbgapi_wave_get_info (wave_id, AMD_DBGAPI_WAVE_INFO_QUEUE,
+				     sizeof (queue_id), &queue_id);
+  str += (status == AMD_DBGAPI_STATUS_SUCCESS
+	  ? string_printf (":%ld", queue_id.handle)
+	  : ":?");
+
+  status = amd_dbgapi_wave_get_info (wave_id, AMD_DBGAPI_WAVE_INFO_DISPATCH,
+				     sizeof (dispatch_id), &dispatch_id);
+  str += (status == AMD_DBGAPI_STATUS_SUCCESS
+	  ? string_printf (":%ld", dispatch_id.handle)
+	  : ":?");
+
+  str += string_printf (":%ld", wave_id.handle);
+
+  status = amd_dbgapi_wave_get_info (wave_id,
+				     AMD_DBGAPI_WAVE_INFO_WORKGROUP_COORD,
+				     sizeof (group_ids), &group_ids);
+  str += (status == AMD_DBGAPI_STATUS_SUCCESS
+	  ? string_printf (" (%d,%d,%d)", group_ids[0], group_ids[1],
+			   group_ids[2])
+	  : " (?,?,?)");
+
+  status = amd_dbgapi_wave_get_info
+    (wave_id, AMD_DBGAPI_WAVE_INFO_WAVE_NUMBER_IN_WORKGROUP,
+     sizeof (wave_in_group), &wave_in_group);
+  str += (status == AMD_DBGAPI_STATUS_SUCCESS
+	  ? string_printf ("/%d", wave_in_group)
+	  : "/?");
+
+  return str;
+}
+
+/* Clear our async event handler.  */
+
+static void
+async_event_handler_clear ()
+{
+  gdb_assert (amd_dbgapi_async_event_handler != nullptr);
+  clear_async_event_handler (amd_dbgapi_async_event_handler);
+}
+
+/* Mark our async event handler.  */
+
+static void
+async_event_handler_mark ()
+{
+  gdb_assert (amd_dbgapi_async_event_handler != nullptr);
+  mark_async_event_handler (amd_dbgapi_async_event_handler);
+}
+
+/* Fetch the amd_dbgapi_inferior_info data for the given inferior.  */
+
+static struct amd_dbgapi_inferior_info *
+get_amd_dbgapi_inferior_info (struct inferior *inferior)
+{
+  amd_dbgapi_inferior_info *info = amd_dbgapi_inferior_data.get (inferior);
+
+  if (info == nullptr)
+    info = amd_dbgapi_inferior_data.emplace (inferior, inferior);
+
+  return info;
+}
+
+/* Set forward progress requirement to REQUIRE for all processes of PROC_TARGET
+   matching PTID.  */
+
+static void
+require_forward_progress (ptid_t ptid, process_stratum_target *proc_target,
+			  bool require)
+{
+  for (inferior *inf : all_inferiors (proc_target))
+    {
+      if (ptid != minus_one_ptid && inf->pid != ptid.pid ())
+	continue;
+
+      amd_dbgapi_inferior_info *info = get_amd_dbgapi_inferior_info (inf);
+
+      if (info->process_id == AMD_DBGAPI_PROCESS_NONE)
+	continue;
+
+      /* Don't do unnecessary calls to amd-dbgapi to avoid polluting the logs.  */
+      if (info->forward_progress_required == require)
+	continue;
+
+      amd_dbgapi_status_t status
+	= amd_dbgapi_process_set_progress
+	    (info->process_id, (require
+				? AMD_DBGAPI_PROGRESS_NORMAL
+				: AMD_DBGAPI_PROGRESS_NO_FORWARD));
+      gdb_assert (status == AMD_DBGAPI_STATUS_SUCCESS);
+
+      info->forward_progress_required = require;
+
+      /* If ptid targets a single inferior and we have found it, no need to
+         continue.  */
+      if (ptid != minus_one_ptid)
+	break;
+    }
+}
+
+/* See amd-dbgapi-target.h.  */
+
+amd_dbgapi_process_id_t
+get_amd_dbgapi_process_id (inferior *inf)
+{
+  return get_amd_dbgapi_inferior_info (inf)->process_id;
+}
+
+/* A breakpoint dbgapi wants us to insert, to handle shared library
+   loading/unloading.  */
+
+struct amd_dbgapi_target_breakpoint : public code_breakpoint
+{
+  amd_dbgapi_target_breakpoint (struct gdbarch *gdbarch, CORE_ADDR address)
+    : code_breakpoint (gdbarch, bp_breakpoint)
+  {
+    symtab_and_line sal;
+    sal.pc = address;
+    sal.section = find_pc_overlay (sal.pc);
+    sal.pspace = current_program_space;
+    add_location (sal);
+
+    pspace = current_program_space;
+    disposition = disp_donttouch;
+  }
+
+  void re_set () override;
+  void check_status (struct bpstat *bs) override;
+};
+
+void
+amd_dbgapi_target_breakpoint::re_set ()
+{
+  /* Nothing.  */
+}
+
+void
+amd_dbgapi_target_breakpoint::check_status (struct bpstat *bs)
+{
+  inferior *inf = current_inferior ();
+  amd_dbgapi_inferior_info *info = get_amd_dbgapi_inferior_info (inf);
+  amd_dbgapi_status_t status;
+
+  bs->stop = 0;
+  bs->print_it = print_it_noop;
+
+  /* Find the address the breakpoint is set at.  */
+  auto match_breakpoint
+    = [bs] (const decltype (info->breakpoint_map)::value_type &value)
+      { return value.second == bs->breakpoint_at; };
+  auto it
+    = std::find_if (info->breakpoint_map.begin (), info->breakpoint_map.end (),
+		    match_breakpoint);
+
+  if (it == info->breakpoint_map.end ())
+    error (_("Could not find breakpoint_id for breakpoint at %s"),
+	   paddress (inf->gdbarch, bs->bp_location_at->address));
+
+  amd_dbgapi_breakpoint_id_t breakpoint_id { it->first };
+  amd_dbgapi_breakpoint_action_t action;
+
+  status = amd_dbgapi_report_breakpoint_hit
+    (breakpoint_id,
+     reinterpret_cast<amd_dbgapi_client_thread_id_t> (inferior_thread ()),
+     &action);
+
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("amd_dbgapi_report_breakpoint_hit failed for breakpoint %ld "
+	     "at %s (%s)"),
+	   breakpoint_id.handle, paddress (inf->gdbarch, bs->bp_location_at->address),
+	   get_status_string (status));
+
+  if (action == AMD_DBGAPI_BREAKPOINT_ACTION_RESUME)
+    return;
+
+  /* If the action is AMD_DBGAPI_BREAKPOINT_ACTION_HALT, we need to wait until
+     a breakpoint resume event for this breakpoint_id is seen.  */
+  amd_dbgapi_event_id_t resume_event_id
+    = process_event_queue (info->process_id,
+			   AMD_DBGAPI_EVENT_KIND_BREAKPOINT_RESUME);
+
+  /* We should always get a breakpoint_resume event after processing all
+     events generated by reporting the breakpoint hit.  */
+  gdb_assert (resume_event_id != AMD_DBGAPI_EVENT_NONE);
+
+  amd_dbgapi_breakpoint_id_t resume_breakpoint_id;
+  status = amd_dbgapi_event_get_info (resume_event_id,
+				      AMD_DBGAPI_EVENT_INFO_BREAKPOINT,
+				      sizeof (resume_breakpoint_id),
+				      &resume_breakpoint_id);
+
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("amd_dbgapi_event_get_info failed (%s)"), get_status_string (status));
+
+  /* The debugger API guarantees that [breakpoint_hit...resume_breakpoint]
+     sequences cannot interleave, so this breakpoint resume event must be
+     for our breakpoint_id.  */
+  if (resume_breakpoint_id != breakpoint_id)
+    error (_("breakpoint resume event is not for this breakpoint. "
+	      "Expected breakpoint_%ld, got breakpoint_%ld"),
+	   breakpoint_id.handle, resume_breakpoint_id.handle);
+
+  amd_dbgapi_event_processed (resume_event_id);
+}
+
+bool
+amd_dbgapi_target::thread_alive (ptid_t ptid)
+{
+  if (!ptid_is_gpu (ptid))
+    return beneath ()->thread_alive (ptid);
+
+  /* Check that the wave_id is valid.  */
+
+  amd_dbgapi_wave_state_t state;
+  amd_dbgapi_status_t status
+    = amd_dbgapi_wave_get_info (get_amd_dbgapi_wave_id (ptid),
+				AMD_DBGAPI_WAVE_INFO_STATE, sizeof (state),
+				&state);
+  return status == AMD_DBGAPI_STATUS_SUCCESS;
+}
+
+const char *
+amd_dbgapi_target::thread_name (thread_info *tp)
+{
+  if (!ptid_is_gpu (tp->ptid))
+    return beneath ()->thread_name (tp);
+
+  return nullptr;
+}
+
+std::string
+amd_dbgapi_target::pid_to_str (ptid_t ptid)
+{
+  if (!ptid_is_gpu (ptid))
+    return beneath ()->pid_to_str (ptid);
+
+  return wave_target_id_string (get_amd_dbgapi_wave_id (ptid));
+}
+
+const char *
+amd_dbgapi_target::extra_thread_info (thread_info *tp)
+{
+  if (!ptid_is_gpu (tp->ptid))
+    beneath ()->extra_thread_info (tp);
+
+  return nullptr;
+}
+
+target_xfer_status
+amd_dbgapi_target::xfer_partial (enum target_object object, const char *annex,
+			       gdb_byte *readbuf, const gdb_byte *writebuf,
+			       ULONGEST offset, ULONGEST requested_len,
+			       ULONGEST *xfered_len)
+{
+  gdb::optional<scoped_restore_current_thread> maybe_restore_thread;
+
+  if (!ptid_is_gpu (inferior_ptid))
+    return beneath ()->xfer_partial (object, annex, readbuf, writebuf, offset,
+				     requested_len, xfered_len);
+
+  gdb_assert (requested_len > 0);
+  gdb_assert (xfered_len != nullptr);
+
+  if (object != TARGET_OBJECT_MEMORY)
+    return TARGET_XFER_E_IO;
+
+  amd_dbgapi_process_id_t process_id
+    = get_amd_dbgapi_process_id (current_inferior ());
+  amd_dbgapi_wave_id_t wave_id = get_amd_dbgapi_wave_id (inferior_ptid);
+
+  size_t len = requested_len;
+  amd_dbgapi_status_t status;
+
+  if (readbuf != nullptr)
+    status = amd_dbgapi_read_memory (process_id, wave_id, 0,
+				     AMD_DBGAPI_ADDRESS_SPACE_GLOBAL,
+				     offset, &len, readbuf);
+  else
+    status = amd_dbgapi_write_memory (process_id, wave_id, 0,
+				      AMD_DBGAPI_ADDRESS_SPACE_GLOBAL,
+				      offset, &len, writebuf);
+
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    return TARGET_XFER_E_IO;
+
+  *xfered_len = len;
+  return TARGET_XFER_OK;
+}
+
+bool
+amd_dbgapi_target::stopped_by_watchpoint ()
+{
+  if (!ptid_is_gpu (inferior_ptid))
+    return beneath ()->stopped_by_watchpoint ();
+
+  return false;
+}
+
+void
+amd_dbgapi_target::resume (ptid_t scope_ptid, int step, enum gdb_signal signo)
+{
+  amd_dbgapi_debug_printf ("scope_ptid = %s", scope_ptid.to_string ().c_str ());
+
+  /* The amd_dbgapi_exceptions_t matching SIGNO will only be used if the
+     thread which is the target of the signal SIGNO is a GPU thread.  If so,
+     make sure that there is a corresponding amd_dbgapi_exceptions_t for SIGNO
+     before we try to resume any thread.  */
+  amd_dbgapi_exceptions_t exception = AMD_DBGAPI_EXCEPTION_NONE;
+  if (ptid_is_gpu (inferior_ptid))
+    {
+      switch (signo)
+	{
+	case GDB_SIGNAL_BUS:
+	  exception = AMD_DBGAPI_EXCEPTION_WAVE_APERTURE_VIOLATION;
+	  break;
+	case GDB_SIGNAL_SEGV:
+	  exception = AMD_DBGAPI_EXCEPTION_WAVE_MEMORY_VIOLATION;
+	  break;
+	case GDB_SIGNAL_ILL:
+	  exception = AMD_DBGAPI_EXCEPTION_WAVE_ILLEGAL_INSTRUCTION;
+	  break;
+	case GDB_SIGNAL_FPE:
+	  exception = AMD_DBGAPI_EXCEPTION_WAVE_MATH_ERROR;
+	  break;
+	case GDB_SIGNAL_ABRT:
+	  exception = AMD_DBGAPI_EXCEPTION_WAVE_ABORT;
+	  break;
+	case GDB_SIGNAL_TRAP:
+	  exception = AMD_DBGAPI_EXCEPTION_WAVE_TRAP;
+	  break;
+	case GDB_SIGNAL_0:
+	  exception = AMD_DBGAPI_EXCEPTION_NONE;
+	  break;
+	default:
+	  error (_("Resuming with signal %s is not supported by this agent."),
+		 gdb_signal_to_name (signo));
+	}
+    }
+
+  if (!ptid_is_gpu (inferior_ptid) || scope_ptid != inferior_ptid)
+    {
+      beneath ()->resume (scope_ptid, step, signo);
+
+      /* If the request is for a single thread, we are done.  */
+      if (scope_ptid == inferior_ptid)
+	return;
+    }
+
+  process_stratum_target *proc_target = current_inferior ()->process_target ();
+
+  /* Disable forward progress requirement.  */
+  require_forward_progress (scope_ptid, proc_target, false);
+
+  for (thread_info *thread : all_non_exited_threads (proc_target, scope_ptid))
+    {
+      if (!ptid_is_gpu (thread->ptid))
+	continue;
+
+      amd_dbgapi_wave_id_t wave_id = get_amd_dbgapi_wave_id (thread->ptid);
+      amd_dbgapi_status_t status;
+      if (thread->ptid == inferior_ptid)
+	status = amd_dbgapi_wave_resume (wave_id,
+					 (step
+					  ? AMD_DBGAPI_RESUME_MODE_SINGLE_STEP
+					  : AMD_DBGAPI_RESUME_MODE_NORMAL),
+					 exception);
+      else
+	status = amd_dbgapi_wave_resume (wave_id, AMD_DBGAPI_RESUME_MODE_NORMAL,
+					 AMD_DBGAPI_EXCEPTION_NONE);
+
+      if (status != AMD_DBGAPI_STATUS_SUCCESS
+	  /* Ignore the error that wave is no longer valid as that could
+             indicate that the process has exited.  GDB treats resuming a
+	     thread that no longer exists as being successful.  */
+	  && status != AMD_DBGAPI_STATUS_ERROR_INVALID_WAVE_ID)
+	error (_("wave_resume for wave_%ld failed (%s)"), wave_id.handle,
+	       get_status_string (status));
+    }
+}
+
+void
+amd_dbgapi_target::commit_resumed ()
+{
+  amd_dbgapi_debug_printf ("called");
+
+  beneath ()->commit_resumed ();
+
+  process_stratum_target *proc_target = current_inferior ()->process_target ();
+  require_forward_progress (minus_one_ptid, proc_target, true);
+}
+
+void
+amd_dbgapi_target::stop (ptid_t ptid)
+{
+  amd_dbgapi_debug_printf ("ptid = %s", ptid.to_string ().c_str ());
+
+  bool many_threads = ptid == minus_one_ptid || ptid.is_pid ();
+
+  if (!ptid_is_gpu (ptid) || many_threads)
+    {
+      beneath ()->stop (ptid);
+
+      /* The request is for a single thread, we are done.  */
+      if (!many_threads)
+	return;
+    }
+
+  auto stop_one_thread = [this] (thread_info *thread)
+    {
+      gdb_assert (thread != nullptr);
+
+      amd_dbgapi_wave_id_t wave_id = get_amd_dbgapi_wave_id (thread->ptid);
+      amd_dbgapi_wave_state_t state;
+      amd_dbgapi_status_t status
+	= amd_dbgapi_wave_get_info (wave_id, AMD_DBGAPI_WAVE_INFO_STATE,
+				    sizeof (state), &state);
+      if (status == AMD_DBGAPI_STATUS_SUCCESS)
+	{
+	  /* If the wave is already known to be stopped then do nothing.  */
+	  if (state == AMD_DBGAPI_WAVE_STATE_STOP)
+	    return;
+
+	  status = amd_dbgapi_wave_stop (wave_id);
+	  if (status == AMD_DBGAPI_STATUS_SUCCESS)
+	    return;
+
+	  if (status != AMD_DBGAPI_STATUS_ERROR_INVALID_WAVE_ID)
+	    error (_("wave_stop for wave_%ld failed (%s)"), wave_id.handle,
+		   get_status_string (status));
+	}
+      else if (status != AMD_DBGAPI_STATUS_ERROR_INVALID_WAVE_ID)
+	error (_("wave_get_info for wave_%ld failed (%s)"), wave_id.handle,
+	       get_status_string (status));
+
+      /* The status is AMD_DBGAPI_STATUS_ERROR_INVALID_WAVE_ID.  The wave
+	 could have terminated since the last time the wave list was
+	 refreshed.  */
+
+      if (m_report_thread_events)
+	{
+	  get_amd_dbgapi_inferior_info (thread->inf)->wave_events.emplace_back
+	    (thread->ptid, target_waitstatus ().set_thread_exited (0));
+
+	  if (target_is_async_p ())
+	    async_event_handler_mark ();
+	}
+
+      delete_thread_silent (thread);
+    };
+
+  process_stratum_target *proc_target = current_inferior ()->process_target ();
+
+  /* Disable forward progress requirement.  */
+  require_forward_progress (ptid, proc_target, false);
+
+  if (!many_threads)
+    {
+      /* No need to iterate all non-exited threads if the request is to stop a
+	 specific thread.  */
+      stop_one_thread (find_thread_ptid (proc_target, ptid));
+      return;
+    }
+
+  for (auto *inf : all_inferiors (proc_target))
+    /* Use the threads_safe iterator since stop_one_thread may delete the
+       thread if it has exited.  */
+    for (auto *thread : inf->threads_safe ())
+      if (thread->state != THREAD_EXITED && thread->ptid.matches (ptid)
+	  && ptid_is_gpu (thread->ptid))
+	stop_one_thread (thread);
+}
+
+/* Callback for our async event handler.  */
+
+static void
+handle_target_event (gdb_client_data client_data)
+{
+  inferior_event_handler (INF_REG_EVENT);
+}
+
+struct scoped_amd_dbgapi_event_processed
+{
+  scoped_amd_dbgapi_event_processed (amd_dbgapi_event_id_t event_id)
+    : m_event_id (event_id)
+  {
+    gdb_assert (event_id != AMD_DBGAPI_EVENT_NONE);
+  }
+
+  ~scoped_amd_dbgapi_event_processed ()
+  {
+    amd_dbgapi_status_t status = amd_dbgapi_event_processed (m_event_id);
+    if (status != AMD_DBGAPI_STATUS_SUCCESS)
+      warning (_("Failed to acknowledge amd-dbgapi event %" PRIu64),
+	       m_event_id.handle);
+  }
+
+  DISABLE_COPY_AND_ASSIGN (scoped_amd_dbgapi_event_processed);
+
+private:
+  amd_dbgapi_event_id_t m_event_id;
+};
+
+/* Called when a dbgapi notifier fd is readable.  CLIENT_DATA is the
+   amd_dbgapi_inferior_info object corresponding to the notifier.  */
+
+static void
+dbgapi_notifier_handler (int err, gdb_client_data client_data)
+{
+  amd_dbgapi_inferior_info *info = (amd_dbgapi_inferior_info *) client_data;
+  int ret;
+
+  /* Drain the notifier pipe.  */
+  do
+    {
+      char buf;
+      ret = read (info->notifier, &buf, 1);
+    }
+  while (ret >= 0 || (ret == -1 && errno == EINTR));
+
+  if (info->inf->target_is_pushed (&the_amd_dbgapi_target))
+    {
+      /* The amd-dbgapi target is pushed: signal our async handler, the event
+	 will be consumed through our wait method.  */
+
+      async_event_handler_mark ();
+    }
+  else
+    {
+      /* The amd-dbgapi target is not pushed: if there's an event, the only
+	 expected one is one of the RUNTIME kind.  If the event tells us the
+	 inferior as activated the ROCm runtime, push the amd-dbgapi
+	 target.  */
+
+      amd_dbgapi_event_id_t event_id;
+      amd_dbgapi_event_kind_t event_kind;
+      amd_dbgapi_status_t status
+	= amd_dbgapi_process_next_pending_event (info->process_id, &event_id,
+						 &event_kind);
+      if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	error (_("next_pending_event failed (%s)"), get_status_string (status));
+
+      if (event_id == AMD_DBGAPI_EVENT_NONE)
+	return;
+
+      gdb_assert (event_kind == AMD_DBGAPI_EVENT_KIND_RUNTIME);
+
+      scoped_amd_dbgapi_event_processed mark_event_processed (event_id);
+
+      amd_dbgapi_runtime_state_t runtime_state;
+      status = amd_dbgapi_event_get_info (event_id,
+					  AMD_DBGAPI_EVENT_INFO_RUNTIME_STATE,
+					  sizeof (runtime_state),
+					  &runtime_state);
+      if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	error (_("event_get_info for event_%ld failed (%s)"),
+	       event_id.handle, get_status_string (status));
+
+      switch (runtime_state)
+	{
+	case AMD_DBGAPI_RUNTIME_STATE_LOADED_SUCCESS:
+	  gdb_assert (info->runtime_state == AMD_DBGAPI_RUNTIME_STATE_UNLOADED);
+	  info->runtime_state = runtime_state;
+	  amd_dbgapi_debug_printf ("pushing amd-dbgapi target");
+	  info->inf->push_target (&the_amd_dbgapi_target);
+
+	  /* The underlying target will already be async if we are running, but not if
+	     we are attaching.  */
+	  if (info->inf->process_target ()->is_async_p ())
+	    {
+	      scoped_restore_current_thread restore_thread;
+	      switch_to_inferior_no_thread (info->inf);
+
+	      /* Make sure our async event handler is created.  */
+	      target_async (true);
+	    }
+	  break;
+
+	case AMD_DBGAPI_RUNTIME_STATE_UNLOADED:
+	  gdb_assert (info->runtime_state
+		      == AMD_DBGAPI_RUNTIME_STATE_LOADED_ERROR_RESTRICTION);
+	  info->runtime_state = runtime_state;
+	  break;
+
+	case AMD_DBGAPI_RUNTIME_STATE_LOADED_ERROR_RESTRICTION:
+	  gdb_assert (info->runtime_state == AMD_DBGAPI_RUNTIME_STATE_UNLOADED);
+	  info->runtime_state = runtime_state;
+	  warning (_("amd-dbgapi: unable to enable GPU debugging "
+		     "due to a restriction error"));
+	  break;
+	}
+    }
+}
+
+void
+amd_dbgapi_target::async (bool enable)
+{
+  beneath ()->async (enable);
+
+  if (enable)
+    {
+      if (amd_dbgapi_async_event_handler != nullptr)
+	{
+	  /* Already enabled.  */
+	  return;
+	}
+
+      /* The library gives us one notifier file descriptor per inferior (even
+	 the ones that have not yet loaded their runtime).  Register them
+	 all with the event loop.  */
+      process_stratum_target *proc_target
+	= current_inferior ()->process_target ();
+
+      for (inferior *inf : all_non_exited_inferiors (proc_target))
+	{
+	  amd_dbgapi_inferior_info *info = get_amd_dbgapi_inferior_info (inf);
+
+	  if (info->notifier != -1)
+	    add_file_handler (info->notifier, dbgapi_notifier_handler, info,
+			      string_printf ("amd-dbgapi notifier for pid %d",
+					     inf->pid));
+	}
+
+      amd_dbgapi_async_event_handler
+	= create_async_event_handler (handle_target_event, nullptr,
+				      "amd-dbgapi");
+
+      /* There may be pending events to handle.  Tell the event loop to poll
+	 them.  */
+      async_event_handler_mark ();
+    }
+  else
+    {
+      if (amd_dbgapi_async_event_handler == nullptr)
+	return;
+
+      for (inferior *inf : all_inferiors ())
+	{
+	  amd_dbgapi_inferior_info *info = get_amd_dbgapi_inferior_info (inf);
+
+	  if (info->notifier != -1)
+	    delete_file_handler (info->notifier);
+	}
+
+      delete_async_event_handler (&amd_dbgapi_async_event_handler);
+    }
+}
+
+/* Make a ptid for a GPU wave.  See comment on ptid_is_gpu for more details.  */
+
+static ptid_t
+make_gpu_ptid (ptid_t::pid_type pid, amd_dbgapi_wave_id_t wave_id)
+{
+ return ptid_t (pid, 1, wave_id.handle);
+}
+
+/* Process an event that was just pulled out of the amd-dbgapi library.  */
+
+static void
+process_one_event (amd_dbgapi_event_id_t event_id,
+		   amd_dbgapi_event_kind_t event_kind)
+{
+  /* Automatically mark this event processed when going out of scope.  */
+  scoped_amd_dbgapi_event_processed mark_event_processed (event_id);
+
+  amd_dbgapi_process_id_t process_id;
+  amd_dbgapi_status_t status
+    = amd_dbgapi_event_get_info (event_id, AMD_DBGAPI_EVENT_INFO_PROCESS,
+				 sizeof (process_id), &process_id);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("event_get_info for event_%ld failed (%s)"), event_id.handle,
+	   get_status_string (status));
+
+  amd_dbgapi_os_process_id_t pid;
+  status = amd_dbgapi_process_get_info (process_id,
+					AMD_DBGAPI_PROCESS_INFO_OS_ID,
+				        sizeof (pid), &pid);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("process_get_info for process_%ld failed (%s)"),
+	   process_id.handle, get_status_string (status));
+
+  auto *proc_target = current_inferior ()->process_target ();
+  inferior *inf = find_inferior_pid (proc_target, pid);
+  gdb_assert (inf != nullptr);
+  amd_dbgapi_inferior_info *info = get_amd_dbgapi_inferior_info (inf);
+
+  switch (event_kind)
+    {
+    case AMD_DBGAPI_EVENT_KIND_WAVE_COMMAND_TERMINATED:
+    case AMD_DBGAPI_EVENT_KIND_WAVE_STOP:
+      {
+	amd_dbgapi_wave_id_t wave_id;
+	status
+	  = amd_dbgapi_event_get_info (event_id, AMD_DBGAPI_EVENT_INFO_WAVE,
+				       sizeof (wave_id), &wave_id);
+	if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	  error (_("event_get_info for event_%ld failed (%s)"),
+		 event_id.handle, get_status_string (status));
+
+	ptid_t event_ptid = make_gpu_ptid (pid, wave_id);
+	target_waitstatus ws;
+
+	amd_dbgapi_wave_stop_reasons_t stop_reason;
+	status = amd_dbgapi_wave_get_info (wave_id,
+					   AMD_DBGAPI_WAVE_INFO_STOP_REASON,
+					   sizeof (stop_reason), &stop_reason);
+	if (status == AMD_DBGAPI_STATUS_ERROR_INVALID_WAVE_ID
+	    && event_kind == AMD_DBGAPI_EVENT_KIND_WAVE_COMMAND_TERMINATED)
+	  ws.set_thread_exited (0);
+	else if (status == AMD_DBGAPI_STATUS_SUCCESS)
+	  {
+	    if (stop_reason & AMD_DBGAPI_WAVE_STOP_REASON_APERTURE_VIOLATION)
+	      ws.set_stopped (GDB_SIGNAL_BUS);
+	    else if (stop_reason
+		     & AMD_DBGAPI_WAVE_STOP_REASON_MEMORY_VIOLATION)
+	      ws.set_stopped (GDB_SIGNAL_SEGV);
+	    else if (stop_reason
+		     & AMD_DBGAPI_WAVE_STOP_REASON_ILLEGAL_INSTRUCTION)
+	      ws.set_stopped (GDB_SIGNAL_ILL);
+	    else if (stop_reason
+		     & (AMD_DBGAPI_WAVE_STOP_REASON_FP_INPUT_DENORMAL
+			| AMD_DBGAPI_WAVE_STOP_REASON_FP_DIVIDE_BY_0
+			| AMD_DBGAPI_WAVE_STOP_REASON_FP_OVERFLOW
+			| AMD_DBGAPI_WAVE_STOP_REASON_FP_UNDERFLOW
+			| AMD_DBGAPI_WAVE_STOP_REASON_FP_INEXACT
+			| AMD_DBGAPI_WAVE_STOP_REASON_FP_INVALID_OPERATION
+			| AMD_DBGAPI_WAVE_STOP_REASON_INT_DIVIDE_BY_0))
+	      ws.set_stopped (GDB_SIGNAL_FPE);
+	    else if (stop_reason
+		     & (AMD_DBGAPI_WAVE_STOP_REASON_BREAKPOINT
+			| AMD_DBGAPI_WAVE_STOP_REASON_WATCHPOINT
+			| AMD_DBGAPI_WAVE_STOP_REASON_SINGLE_STEP
+			| AMD_DBGAPI_WAVE_STOP_REASON_DEBUG_TRAP
+			| AMD_DBGAPI_WAVE_STOP_REASON_TRAP))
+	      ws.set_stopped (GDB_SIGNAL_TRAP);
+	    else if (stop_reason & AMD_DBGAPI_WAVE_STOP_REASON_ASSERT_TRAP)
+	      ws.set_stopped (GDB_SIGNAL_ABRT);
+	    else
+	      ws.set_stopped (GDB_SIGNAL_0);
+
+	    thread_info *thread = find_thread_ptid (proc_target, event_ptid);
+	    if (thread == nullptr)
+	      {
+		/* Silently create new GPU threads to avoid spamming the
+		   terminal with thousands of "[New Thread ...]" messages.  */
+		thread = add_thread_silent (proc_target, event_ptid);
+		set_running (proc_target, event_ptid, true);
+		set_executing (proc_target, event_ptid, true);
+	      }
+
+	    /* If the wave is stopped because of a software breakpoint, the
+	       program counter needs to be adjusted so that it points to the
+	       breakpoint instruction.  */
+	    if ((stop_reason & AMD_DBGAPI_WAVE_STOP_REASON_BREAKPOINT) != 0)
+	      {
+		regcache *regcache = get_thread_regcache (thread);
+		gdbarch *gdbarch = regcache->arch ();
+
+		CORE_ADDR pc = regcache_read_pc (regcache);
+		CORE_ADDR adjusted_pc
+		  = pc - gdbarch_decr_pc_after_break (gdbarch);
+
+		if (adjusted_pc != pc)
+		  regcache_write_pc (regcache, adjusted_pc);
+	      }
+	  }
+	else
+	  error (_("wave_get_info for wave_%ld failed (%s)"),
+		 wave_id.handle, get_status_string (status));
+
+	info->wave_events.emplace_back (event_ptid, ws);
+	break;
+      }
+
+    case AMD_DBGAPI_EVENT_KIND_CODE_OBJECT_LIST_UPDATED:
+      /* We get here when the following sequence of events happens:
+
+	   - the inferior hits the amd-dbgapi "r_brk" internal breakpoint
+	   - amd_dbgapi_target_breakpoint::check_status calls
+	     amd_dbgapi_report_breakpoint_hit, which queues an event of this
+	     kind in dbgapi
+	   - amd_dbgapi_target_breakpoint::check_status calls
+	     process_event_queue, which pulls the event out of dbgapi, and
+	     gets us here
+
+	 When amd_dbgapi_target_breakpoint::check_status is called, the current
+	 inferior is the inferior that hit the breakpoint, which should still be
+	 the case now.  */
+      gdb_assert (inf == current_inferior ());
+      handle_solib_event ();
+      break;
+
+    case AMD_DBGAPI_EVENT_KIND_BREAKPOINT_RESUME:
+      /* Breakpoint resume events should be handled by the breakpoint
+	 action, and this code should not reach this.  */
+      gdb_assert_not_reached ("unhandled event kind");
+      break;
+
+    case AMD_DBGAPI_EVENT_KIND_RUNTIME:
+      {
+	amd_dbgapi_runtime_state_t runtime_state;
+
+	status = amd_dbgapi_event_get_info (event_id,
+					    AMD_DBGAPI_EVENT_INFO_RUNTIME_STATE,
+					    sizeof (runtime_state),
+					    &runtime_state);
+	if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	  error (_("event_get_info for event_%ld failed (%s)"),
+		 event_id.handle, get_status_string (status));
+
+	gdb_assert (runtime_state == AMD_DBGAPI_RUNTIME_STATE_UNLOADED);
+	gdb_assert
+	  (info->runtime_state == AMD_DBGAPI_RUNTIME_STATE_LOADED_SUCCESS);
+
+	info->runtime_state = runtime_state;
+
+	gdb_assert (inf->target_is_pushed (&the_amd_dbgapi_target));
+	inf->unpush_target (&the_amd_dbgapi_target);
+      }
+      break;
+
+    default:
+      error (_("event kind (%d) not supported"), event_kind);
+    }
+}
+
+/* Return a textual version of KIND.  */
+
+static const char *
+event_kind_str (amd_dbgapi_event_kind_t kind)
+{
+  switch (kind)
+    {
+    case AMD_DBGAPI_EVENT_KIND_NONE:
+      return "NONE";
+
+    case AMD_DBGAPI_EVENT_KIND_WAVE_STOP:
+      return "WAVE_STOP";
+
+    case AMD_DBGAPI_EVENT_KIND_WAVE_COMMAND_TERMINATED:
+      return "WAVE_COMMAND_TERMINATED";
+
+    case AMD_DBGAPI_EVENT_KIND_CODE_OBJECT_LIST_UPDATED:
+      return "CODE_OBJECT_LIST_UPDATED";
+
+    case AMD_DBGAPI_EVENT_KIND_BREAKPOINT_RESUME:
+      return "BREAKPOINT_RESUME";
+
+    case AMD_DBGAPI_EVENT_KIND_RUNTIME:
+      return "RUNTIME";
+
+    case AMD_DBGAPI_EVENT_KIND_QUEUE_ERROR:
+      return "QUEUE_ERROR";
+    }
+
+  gdb_assert_not_reached ("unhandled amd_dbgapi_event_kind_t value");
+}
+
+/* Drain the dbgapi event queue of a given process_id, or of all processes if
+   process_id is AMD_DBGAPI_PROCESS_NONE.  Stop processing the events if an
+   event of a given kind is requested and `process_id` is not
+   AMD_DBGAPI_PROCESS_NONE.  Wave stop events that are not returned are queued
+   into their inferior's amd_dbgapi_inferior_info pending wave events. */
+
+static amd_dbgapi_event_id_t
+process_event_queue (amd_dbgapi_process_id_t process_id,
+		     amd_dbgapi_event_kind_t until_event_kind)
+{
+  /* An event of a given type can only be requested from a single
+     process_id.  */
+  gdb_assert (until_event_kind == AMD_DBGAPI_EVENT_KIND_NONE
+	      || process_id != AMD_DBGAPI_PROCESS_NONE);
+
+  while (true)
+    {
+      amd_dbgapi_event_id_t event_id;
+      amd_dbgapi_event_kind_t event_kind;
+
+      amd_dbgapi_status_t status
+	= amd_dbgapi_process_next_pending_event (process_id, &event_id,
+						 &event_kind);
+
+      if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	error (_("next_pending_event failed (%s)"), get_status_string (status));
+
+      if (event_kind != AMD_DBGAPI_EVENT_KIND_NONE)
+	amd_dbgapi_debug_printf ("Pulled event from dbgapi: "
+				 "event_id.handle = %" PRIu64 ", "
+				 "event_kind = %s",
+				 event_id.handle,
+				 event_kind_str (event_kind));
+
+      if (event_id == AMD_DBGAPI_EVENT_NONE || event_kind == until_event_kind)
+	return event_id;
+
+      process_one_event (event_id, event_kind);
+    }
+}
+
+bool
+amd_dbgapi_target::has_pending_events ()
+{
+  if (amd_dbgapi_async_event_handler != nullptr
+      && async_event_handler_marked (amd_dbgapi_async_event_handler))
+    return true;
+
+  return beneath ()->has_pending_events ();
+}
+
+/* Pop one pending event from the per-inferior structures.
+
+   If PID is not -1, restrict the search to the inferior with that pid.  */
+
+static std::pair<ptid_t, target_waitstatus>
+consume_one_event (int pid)
+{
+  auto *target = current_inferior ()->process_target ();
+  struct amd_dbgapi_inferior_info *info = nullptr;
+
+  if (pid == -1)
+    {
+      for (inferior *inf : all_inferiors (target))
+	{
+	  info = get_amd_dbgapi_inferior_info (inf);
+	  if (!info->wave_events.empty ())
+	    break;
+	}
+
+      gdb_assert (info != nullptr);
+    }
+  else
+    {
+      inferior *inf = find_inferior_pid (target, pid);
+
+      gdb_assert (inf != nullptr);
+      info = get_amd_dbgapi_inferior_info (inf);
+    }
+
+  if (info->wave_events.empty ())
+    return { minus_one_ptid, {} };
+
+  auto event = info->wave_events.front ();
+  info->wave_events.pop_front ();
+
+  return event;
+}
+
+ptid_t
+amd_dbgapi_target::wait (ptid_t ptid, struct target_waitstatus *ws,
+		       target_wait_flags target_options)
+{
+  gdb_assert (!current_inferior ()->process_target ()->commit_resumed_state);
+  gdb_assert (ptid == minus_one_ptid || ptid.is_pid ());
+
+  amd_dbgapi_debug_printf ("ptid = %s", ptid.to_string ().c_str ());
+
+  ptid_t event_ptid = beneath ()->wait (ptid, ws, target_options);
+  if (event_ptid != minus_one_ptid)
+    {
+      if (ws->kind () == TARGET_WAITKIND_EXITED
+         || ws->kind () == TARGET_WAITKIND_SIGNALLED)
+       {
+         /* This inferior has exited so drain its dbgapi event queue.  */
+	 while (consume_one_event (event_ptid.pid ()).first
+                != minus_one_ptid)
+           ;
+       }
+      return event_ptid;
+    }
+
+  gdb_assert (ws->kind () == TARGET_WAITKIND_NO_RESUMED
+	      || ws->kind () == TARGET_WAITKIND_IGNORE);
+
+  /* Flush the async handler first.  */
+  if (target_is_async_p ())
+    async_event_handler_clear ();
+
+  /* There may be more events to process (either already in `wave_events` or
+     that we need to fetch from dbgapi.  Mark the async event handler so that
+     amd_dbgapi_target::wait gets called again and again, until it eventually
+     returns minus_one_ptid.  */
+  auto more_events = make_scope_exit ([] ()
+    {
+      if (target_is_async_p ())
+	async_event_handler_mark ();
+    });
+
+  auto *proc_target = current_inferior ()->process_target ();
+
+  /* Disable forward progress for the specified pid in ptid if it isn't
+     minus_on_ptid, or all attached processes if ptid is minus_one_ptid.  */
+  require_forward_progress (ptid, proc_target, false);
+
+  target_waitstatus gpu_waitstatus;
+  std::tie (event_ptid, gpu_waitstatus) = consume_one_event (ptid.pid ());
+  if (event_ptid == minus_one_ptid)
+    {
+      /* Drain the events from the amd_dbgapi and preserve the ordering.  */
+      process_event_queue ();
+
+      std::tie (event_ptid, gpu_waitstatus) = consume_one_event (ptid.pid ());
+      if (event_ptid == minus_one_ptid)
+	{
+	  /* If we requested a specific ptid, and nothing came out, assume
+	     another ptid may have more events, otherwise, keep the
+	     async_event_handler flushed.  */
+	  if (ptid == minus_one_ptid)
+	    more_events.release ();
+
+	  if (ws->kind () == TARGET_WAITKIND_NO_RESUMED)
+	    {
+	      /* We can't easily check that all GPU waves are stopped, and no
+		 new waves can be created (the GPU has fixed function hardware
+		 to create new threads), so even if the target beneath returns
+		 waitkind_no_resumed, we have to report waitkind_ignore if GPU
+		 debugging is enabled for at least one resumed inferior handled
+		 by the amd-dbgapi target.  */
+
+	      for (inferior *inf : all_inferiors ())
+		if (inf->target_at (arch_stratum) == &the_amd_dbgapi_target
+		    && get_amd_dbgapi_inferior_info (inf)->runtime_state
+			 == AMD_DBGAPI_RUNTIME_STATE_LOADED_SUCCESS)
+		  {
+		    ws->set_ignore ();
+		    break;
+		  }
+	    }
+
+	  /* There are no events to report, return the target beneath's
+	     waitstatus (either IGNORE or NO_RESUMED).  */
+	  return minus_one_ptid;
+	}
+    }
+
+  *ws = gpu_waitstatus;
+  return event_ptid;
+}
+
+bool
+amd_dbgapi_target::stopped_by_sw_breakpoint ()
+{
+  if (!ptid_is_gpu (inferior_ptid))
+    return beneath ()->stopped_by_sw_breakpoint ();
+
+  amd_dbgapi_wave_id_t wave_id = get_amd_dbgapi_wave_id (inferior_ptid);
+
+  amd_dbgapi_wave_stop_reasons_t stop_reason;
+  amd_dbgapi_status_t status
+    = amd_dbgapi_wave_get_info (wave_id, AMD_DBGAPI_WAVE_INFO_STOP_REASON,
+				sizeof (stop_reason), &stop_reason);
+
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    return false;
+
+  return (stop_reason & AMD_DBGAPI_WAVE_STOP_REASON_BREAKPOINT) != 0;
+}
+
+bool
+amd_dbgapi_target::stopped_by_hw_breakpoint ()
+{
+  if (!ptid_is_gpu (inferior_ptid))
+    return beneath ()->stopped_by_hw_breakpoint ();
+
+  return false;
+}
+
+/* Make the amd-dbgapi library attach to the process behind INF.
+
+   Note that this is unrelated to the "attach" GDB concept / command.
+
+   By attaching to the process, we get a notifier fd that tells us when it
+   activates the ROCm runtime and when there are subsequent debug events.  */
+
+static void
+attach_amd_dbgapi (inferior *inf)
+{
+  AMD_DBGAPI_SCOPED_DEBUG_START_END ("inf num = %d", inf->num);
+
+  if (!target_can_async_p ())
+    {
+      warning (_("The amd-dbgapi target requires the target beneath to be "
+		 "asynchronous, GPU debugging is disabled"));
+      return;
+    }
+
+  auto *info = get_amd_dbgapi_inferior_info (inf);
+
+  /* Are we already attached?  */
+  if (info->process_id != AMD_DBGAPI_PROCESS_NONE)
+    {
+      amd_dbgapi_debug_printf
+	("already attached: process_id = %" PRIu64, info->process_id.handle);
+      return;
+    }
+
+  amd_dbgapi_status_t status
+    = amd_dbgapi_process_attach
+	(reinterpret_cast<amd_dbgapi_client_process_id_t> (inf),
+	 &info->process_id);
+  if (status == AMD_DBGAPI_STATUS_ERROR_RESTRICTION)
+    {
+      warning (_("amd-dbgapi: unable to enable GPU debugging due to a "
+		 "restriction error"));
+      return;
+    }
+  else if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    {
+      warning (_("amd-dbgapi: could not attach to process %d (%s), GPU "
+		 "debugging will not be available."), inf->pid,
+	       get_status_string (status));
+      return;
+    }
+
+  if (amd_dbgapi_process_get_info (info->process_id,
+				   AMD_DBGAPI_PROCESS_INFO_NOTIFIER,
+				   sizeof (info->notifier), &info->notifier)
+      != AMD_DBGAPI_STATUS_SUCCESS)
+    {
+      amd_dbgapi_process_detach (info->process_id);
+      info->process_id = AMD_DBGAPI_PROCESS_NONE;
+      warning (_("amd-dbgapi: could not retrieve process %d's notifier, GPU "
+		 "debugging will not be available."), inf->pid);
+      return;
+    }
+
+  amd_dbgapi_debug_printf ("process_id = %" PRIu64 ", notifier fd = %d",
+			   info->process_id.handle, info->notifier);
+
+  /* If GDB is attaching to a process that has the runtime loaded, there will
+     already be a "runtime loaded" event available.  Consume it and push the
+     target.  */
+  dbgapi_notifier_handler (0, info);
+
+  add_file_handler (info->notifier, dbgapi_notifier_handler, info,
+		    "amd-dbgapi notifier");
+}
+
+static void maybe_reset_amd_dbgapi ();
+
+/* Make the amd-dbgapi library detach from INF.
+
+   Note that this us unrelated to the "detach" GDB concept / command.
+
+   This undoes what attach_amd_dbgapi does.  */
+
+static void
+detach_amd_dbgapi (inferior *inf)
+{
+  AMD_DBGAPI_SCOPED_DEBUG_START_END ("inf num = %d", inf->num);
+
+  auto *info = get_amd_dbgapi_inferior_info (inf);
+
+  if (info->process_id == AMD_DBGAPI_PROCESS_NONE)
+    return;
+
+  info->runtime_state = AMD_DBGAPI_RUNTIME_STATE_UNLOADED;
+
+  amd_dbgapi_status_t status = amd_dbgapi_process_detach (info->process_id);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    warning (_("amd-dbgapi: could not detach from process %d (%s)"),
+	     inf->pid, get_status_string (status));
+
+  gdb_assert (info->notifier != -1);
+  delete_file_handler (info->notifier);
+
+  /* This is a noop if the target is not pushed.  */
+  inf->unpush_target (&the_amd_dbgapi_target);
+
+  /* Delete the breakpoints that are still active.  */
+  for (auto &&value : info->breakpoint_map)
+    delete_breakpoint (value.second);
+
+  /* Reset the amd_dbgapi_inferior_info.  */
+  *info = amd_dbgapi_inferior_info (inf);
+
+  maybe_reset_amd_dbgapi ();
+}
+
+void
+amd_dbgapi_target::mourn_inferior ()
+{
+  detach_amd_dbgapi (current_inferior ());
+  beneath ()->mourn_inferior ();
+}
+
+void
+amd_dbgapi_target::detach (inferior *inf, int from_tty)
+{
+  /* We're about to resume the waves by detaching the dbgapi library from the
+     inferior, so we need to remove all breakpoints that are still inserted.
+
+     Breakpoints may still be inserted because the inferior may be running in
+     non-stop mode, or because GDB changed the default setting to leave all
+     breakpoints inserted in all-stop mode when all threads are stopped.  */
+  remove_breakpoints_inf (current_inferior ());
+
+  detach_amd_dbgapi (inf);
+  beneath ()->detach (inf, from_tty);
+}
+
+void
+amd_dbgapi_target::fetch_registers (struct regcache *regcache, int regno)
+{
+  if (!ptid_is_gpu (regcache->ptid ()))
+    {
+      beneath ()->fetch_registers (regcache, regno);
+      return;
+    }
+
+  struct gdbarch *gdbarch = regcache->arch ();
+  gdb_assert (is_amdgpu_arch (gdbarch));
+
+  amdgpu_gdbarch_tdep *tdep = get_amdgpu_gdbarch_tdep (gdbarch);
+  amd_dbgapi_wave_id_t wave_id = get_amd_dbgapi_wave_id (regcache->ptid ());
+  gdb_byte raw[AMDGPU_MAX_REGISTER_SIZE];
+  amd_dbgapi_status_t status
+    = amd_dbgapi_read_register (wave_id, tdep->register_ids[regno], 0,
+				register_type (gdbarch, regno)->length (),
+				raw);
+
+  if (status == AMD_DBGAPI_STATUS_SUCCESS)
+    regcache->raw_supply (regno, raw);
+  else if (status != AMD_DBGAPI_STATUS_ERROR_REGISTER_NOT_AVAILABLE)
+    warning (_("Couldn't read register %s (#%d) (%s)."),
+	     gdbarch_register_name (gdbarch, regno), regno,
+	     get_status_string (status));
+}
+
+void
+amd_dbgapi_target::store_registers (struct regcache *regcache, int regno)
+{
+  if (!ptid_is_gpu (regcache->ptid ()))
+    {
+      beneath ()->store_registers (regcache, regno);
+      return;
+    }
+
+  struct gdbarch *gdbarch = regcache->arch ();
+  gdb_assert (is_amdgpu_arch (gdbarch));
+
+  gdb_byte raw[AMDGPU_MAX_REGISTER_SIZE];
+  regcache->raw_collect (regno, &raw);
+
+  amdgpu_gdbarch_tdep *tdep = get_amdgpu_gdbarch_tdep (gdbarch);
+
+  /* If the register has read-only bits, invalidate the value in the regcache
+     as the value actualy written may differ.  */
+  if (tdep->register_properties[regno]
+      & AMD_DBGAPI_REGISTER_PROPERTY_READONLY_BITS)
+    regcache->invalidate (regno);
+
+  /* Invalidate all volatile registers if this register has the invalidate
+     volatile property.  For example, writting to VCC may change the content
+     of STATUS.VCCZ.  */
+  if (tdep->register_properties[regno]
+      & AMD_DBGAPI_REGISTER_PROPERTY_INVALIDATE_VOLATILE)
+    {
+      for (size_t r = 0; r < tdep->register_properties.size (); ++r)
+	if (tdep->register_properties[r] & AMD_DBGAPI_REGISTER_PROPERTY_VOLATILE)
+	  regcache->invalidate (r);
+    }
+
+  amd_dbgapi_wave_id_t wave_id = get_amd_dbgapi_wave_id (regcache->ptid ());
+  amd_dbgapi_status_t status
+    = amd_dbgapi_write_register (wave_id, tdep->register_ids[regno], 0,
+				 register_type (gdbarch, regno)->length (),
+				 raw);
+
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    warning (_("Couldn't write register %s (#%d)."),
+	     gdbarch_register_name (gdbarch, regno), regno);
+}
+
+struct gdbarch *
+amd_dbgapi_target::thread_architecture (ptid_t ptid)
+{
+  if (!ptid_is_gpu (ptid))
+    return beneath ()->thread_architecture (ptid);
+
+  /* We can cache the gdbarch for a given wave_id (ptid::tid) because
+     wave IDs are unique, and aren't reused.  */
+  if (ptid.tid () == m_cached_arch_tid)
+    return m_cached_arch;
+
+  amd_dbgapi_wave_id_t wave_id = get_amd_dbgapi_wave_id (ptid);
+  amd_dbgapi_architecture_id_t architecture_id;
+  amd_dbgapi_status_t status;
+
+  status = amd_dbgapi_wave_get_info (wave_id, AMD_DBGAPI_WAVE_INFO_ARCHITECTURE,
+				     sizeof (architecture_id),
+				     &architecture_id);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("Couldn't get architecture for wave_%ld"), ptid.tid ());
+
+  uint32_t elf_amdgpu_machine;
+  status = amd_dbgapi_architecture_get_info
+    (architecture_id, AMD_DBGAPI_ARCHITECTURE_INFO_ELF_AMDGPU_MACHINE,
+     sizeof (elf_amdgpu_machine), &elf_amdgpu_machine);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("Couldn't get elf_amdgpu_machine for architecture_%ld"),
+	   architecture_id.handle);
+
+  struct gdbarch_info info;
+  info.bfd_arch_info = bfd_lookup_arch (bfd_arch_amdgcn, elf_amdgpu_machine);
+  info.byte_order = BFD_ENDIAN_LITTLE;
+
+  m_cached_arch_tid = ptid.tid ();
+  m_cached_arch = gdbarch_find_by_info (info);
+  if (m_cached_arch == nullptr)
+    error (_("Couldn't get elf_amdgpu_machine (%#x)"), elf_amdgpu_machine);
+
+  return m_cached_arch;
+}
+
+void
+amd_dbgapi_target::thread_events (int enable)
+{
+  m_report_thread_events = enable;
+  beneath ()->thread_events (enable);
+}
+
+void
+amd_dbgapi_target::update_thread_list ()
+{
+  for (inferior *inf : all_inferiors ())
+    {
+      amd_dbgapi_process_id_t process_id
+	= get_amd_dbgapi_process_id (inf);
+      if (process_id == AMD_DBGAPI_PROCESS_NONE)
+	{
+	  /* The inferior may not be attached yet.  */
+	  continue;
+	}
+
+      size_t count;
+      amd_dbgapi_wave_id_t *wave_list;
+      amd_dbgapi_changed_t changed;
+      amd_dbgapi_status_t status
+	= amd_dbgapi_process_wave_list (process_id, &count, &wave_list,
+					&changed);
+      if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	error (_("amd_dbgapi_wave_list failed (%s)"),
+	       get_status_string (status));
+
+      if (changed == AMD_DBGAPI_CHANGED_NO)
+	continue;
+
+      /* Create a set and free the wave list.  */
+      std::set<ptid_t::tid_type> threads;
+      for (size_t i = 0; i < count; ++i)
+	threads.emplace (wave_list[i].handle);
+
+      xfree (wave_list);
+
+      /* Prune the wave_ids that already have a thread_info.  Any thread_info
+	 which does not have a corresponding wave_id represents a wave which
+	 is gone at this point and should be deleted.  */
+      for (thread_info *tp : inf->threads_safe ())
+	if (ptid_is_gpu (tp->ptid) && tp->state != THREAD_EXITED)
+	  {
+	    auto it = threads.find (tp->ptid.tid ());
+
+	    if (it == threads.end ())
+	      delete_thread (tp);
+	    else
+	      threads.erase (it);
+	  }
+
+      /* The wave_ids that are left require a new thread_info.  */
+      for (ptid_t::tid_type tid : threads)
+	{
+	  ptid_t wave_ptid
+	    = make_gpu_ptid (inf->pid, amd_dbgapi_wave_id_t {tid});
+
+	  add_thread_silent (inf->process_target (), wave_ptid);
+	  set_running (inf->process_target (), wave_ptid, true);
+	  set_executing (inf->process_target (), wave_ptid, true);
+	}
+    }
+
+  /* Give the beneath target a chance to do extra processing.  */
+  this->beneath ()->update_thread_list ();
+}
+
+/* inferior_created observer.  */
+
+static void
+amd_dbgapi_target_inferior_created (inferior *inf)
+{
+  /* If the inferior is not running on the native target (e.g. it is running
+     on a remote target), we don't want to deal with it.  */
+  if (inf->process_target () != get_native_target ())
+    return;
+
+  attach_amd_dbgapi (inf);
+}
+
+/* inferior_exit observer.
+
+   This covers normal exits, but also detached inferiors (including detached
+   fork parents).  */
+
+static void
+amd_dbgapi_inferior_exited (inferior *inf)
+{
+  detach_amd_dbgapi (inf);
+}
+
+/* inferior_pre_detach observer.  */
+
+static void
+amd_dbgapi_inferior_pre_detach (inferior *inf)
+{
+  /* We need to amd-dbgapi-detach before we ptrace-detach.  If the amd-dbgapi
+     target isn't pushed, do that now.  If the amd-dbgapi target is pushed,
+     we'll do it in amd_dbgapi_target::detach.  */
+  if (!inf->target_is_pushed (&the_amd_dbgapi_target))
+    detach_amd_dbgapi (inf);
+}
+
+/* get_os_pid callback.  */
+
+static amd_dbgapi_status_t
+amd_dbgapi_get_os_pid_callback
+  (amd_dbgapi_client_process_id_t client_process_id, pid_t *pid)
+{
+  inferior *inf = reinterpret_cast<inferior *> (client_process_id);
+
+  if (inf->pid == 0)
+    return AMD_DBGAPI_STATUS_ERROR_PROCESS_EXITED;
+
+  *pid = inf->pid;
+  return AMD_DBGAPI_STATUS_SUCCESS;
+}
+
+/* insert_breakpoint callback.  */
+
+static amd_dbgapi_status_t
+amd_dbgapi_insert_breakpoint_callback
+  (amd_dbgapi_client_process_id_t client_process_id,
+   amd_dbgapi_global_address_t address,
+   amd_dbgapi_breakpoint_id_t breakpoint_id)
+{
+  inferior *inf = reinterpret_cast<inferior *> (client_process_id);
+  struct amd_dbgapi_inferior_info *info = get_amd_dbgapi_inferior_info (inf);
+
+  auto it = info->breakpoint_map.find (breakpoint_id.handle);
+  if (it != info->breakpoint_map.end ())
+    return AMD_DBGAPI_STATUS_ERROR_INVALID_BREAKPOINT_ID;
+
+  /* We need to find the address in the given inferior's program space.  */
+  scoped_restore_current_thread restore_thread;
+  switch_to_inferior_no_thread (inf);
+
+  /* Create a new breakpoint.  */
+  struct obj_section *section = find_pc_section (address);
+  if (section == nullptr || section->objfile == nullptr)
+    return AMD_DBGAPI_STATUS_ERROR;
+
+  std::unique_ptr<breakpoint> bp_up
+    (new amd_dbgapi_target_breakpoint (section->objfile->arch (), address));
+
+  breakpoint *bp = install_breakpoint (true, std::move (bp_up), 1);
+
+  info->breakpoint_map.emplace (breakpoint_id.handle, bp);
+  return AMD_DBGAPI_STATUS_SUCCESS;
+}
+
+/* remove_breakpoint callback.  */
+
+static amd_dbgapi_status_t
+amd_dbgapi_remove_breakpoint_callback
+  (amd_dbgapi_client_process_id_t client_process_id,
+   amd_dbgapi_breakpoint_id_t breakpoint_id)
+{
+  inferior *inf = reinterpret_cast<inferior *> (client_process_id);
+  struct amd_dbgapi_inferior_info *info = get_amd_dbgapi_inferior_info (inf);
+
+  auto it = info->breakpoint_map.find (breakpoint_id.handle);
+  if (it == info->breakpoint_map.end ())
+    return AMD_DBGAPI_STATUS_ERROR_INVALID_BREAKPOINT_ID;
+
+  delete_breakpoint (it->second);
+  info->breakpoint_map.erase (it);
+
+  return AMD_DBGAPI_STATUS_SUCCESS;
+}
+
+/* Style for some kinds of messages.  */
+
+static cli_style_option fatal_error_style
+  ("amd_dbgapi_fatal_error", ui_file_style::RED);
+static cli_style_option warning_style
+  ("amd_dbgapi_warning", ui_file_style::YELLOW);
+
+/* BLACK + BOLD means dark gray.  */
+static cli_style_option trace_style
+  ("amd_dbgapi_trace", ui_file_style::BLACK, ui_file_style::BOLD);
+
+/* log_message callback.  */
+
+static void
+amd_dbgapi_log_message_callback (amd_dbgapi_log_level_t level,
+				 const char *message)
+{
+  gdb::optional<target_terminal::scoped_restore_terminal_state> tstate;
+
+  if (target_supports_terminal_ours ())
+    {
+      tstate.emplace ();
+      target_terminal::ours_for_output ();
+    }
+
+  /* Error and warning messages are meant to be printed to the user.  */
+  if (level == AMD_DBGAPI_LOG_LEVEL_FATAL_ERROR
+      || level == AMD_DBGAPI_LOG_LEVEL_WARNING)
+    {
+      begin_line ();
+      ui_file_style style = (level == AMD_DBGAPI_LOG_LEVEL_FATAL_ERROR
+			     ? fatal_error_style : warning_style).style ();
+      gdb_printf (gdb_stderr, "%ps\n", styled_string (style, message));
+      return;
+    }
+
+  /* Print other messages as debug logs.  TRACE and VERBOSE messages are
+     very verbose, print them dark grey so it's easier to spot other messages
+     through the flood.  */
+  if (level >= AMD_DBGAPI_LOG_LEVEL_TRACE)
+    {
+      debug_prefixed_printf (amd_dbgapi_lib_debug_module (), nullptr, "%ps",
+			     styled_string (trace_style.style (), message));
+      return;
+    }
+
+  debug_prefixed_printf (amd_dbgapi_lib_debug_module (), nullptr, "%s",
+			 message);
+}
+
+/* Callbacks passed to amd_dbgapi_initialize.  */
+
+static amd_dbgapi_callbacks_t dbgapi_callbacks = {
+  .allocate_memory = malloc,
+  .deallocate_memory = free,
+  .get_os_pid = amd_dbgapi_get_os_pid_callback,
+  .insert_breakpoint = amd_dbgapi_insert_breakpoint_callback,
+  .remove_breakpoint = amd_dbgapi_remove_breakpoint_callback,
+  .log_message = amd_dbgapi_log_message_callback,
+};
+
+void
+amd_dbgapi_target::close ()
+{
+  if (amd_dbgapi_async_event_handler != nullptr)
+    delete_async_event_handler (&amd_dbgapi_async_event_handler);
+}
+
+/* List of set/show debug amd-dbgapi-lib commands.  */
+struct cmd_list_element *set_debug_amd_dbgapi_lib_list;
+struct cmd_list_element *show_debug_amd_dbgapi_lib_list;
+
+/* Mapping from amd-dbgapi log level enum values to text.  */
+
+static constexpr const char *debug_amd_dbgapi_lib_log_level_enums[] =
+{
+  /* [AMD_DBGAPI_LOG_LEVEL_NONE] = */ "off",
+  /* [AMD_DBGAPI_LOG_LEVEL_FATAL_ERROR] = */ "error",
+  /* [AMD_DBGAPI_LOG_LEVEL_WARNING] = */ "warning",
+  /* [AMD_DBGAPI_LOG_LEVEL_INFO] = */ "info",
+  /* [AMD_DBGAPI_LOG_LEVEL_TRACE] = */ "trace",
+  /* [AMD_DBGAPI_LOG_LEVEL_VERBOSE] = */ "verbose",
+  nullptr
+};
+
+/* Storage for "set debug amd-dbgapi-lib log-level".  */
+
+static const char *debug_amd_dbgapi_lib_log_level
+  = debug_amd_dbgapi_lib_log_level_enums[AMD_DBGAPI_LOG_LEVEL_WARNING];
+
+/* Get the amd-dbgapi library log level requested by the user.  */
+
+static amd_dbgapi_log_level_t
+get_debug_amd_dbgapi_lib_log_level ()
+{
+  for (size_t pos = 0;
+       debug_amd_dbgapi_lib_log_level_enums[pos] != nullptr;
+       ++pos)
+    if (debug_amd_dbgapi_lib_log_level
+	== debug_amd_dbgapi_lib_log_level_enums[pos])
+      return static_cast<amd_dbgapi_log_level_t> (pos);
+
+  gdb_assert_not_reached ("invalid log level");
+}
+
+/* Callback for "set debug amd-dbgapi log-level", apply the selected log level
+   to the library.  */
+
+static void
+set_debug_amd_dbgapi_lib_log_level (const char *args, int from_tty,
+				    struct cmd_list_element *c)
+{
+  amd_dbgapi_set_log_level (get_debug_amd_dbgapi_lib_log_level ());
+}
+
+/* Callback for "show debug amd-dbgapi log-level".  */
+
+static void
+show_debug_amd_dbgapi_lib_log_level (struct ui_file *file, int from_tty,
+				     struct cmd_list_element *c,
+				     const char *value)
+{
+  gdb_printf (file, _("The amd-dbgapi library log level is %s.\n"), value);
+}
+
+/* If the amd-dbgapi library is not attached to any process, finalize and
+   re-initialize it so that the handle ID numbers will all start from the
+   beginning again.  This is only for convenience, not essential.  */
+
+static void
+maybe_reset_amd_dbgapi ()
+{
+  for (inferior *inf : all_non_exited_inferiors ())
+    {
+      amd_dbgapi_inferior_info *info = get_amd_dbgapi_inferior_info (inf);
+
+      if (info->process_id != AMD_DBGAPI_PROCESS_NONE)
+	return;
+    }
+
+  amd_dbgapi_status_t status = amd_dbgapi_finalize ();
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("amd-dbgapi failed to finalize (%s)"),
+	   get_status_string (status));
+
+  status = amd_dbgapi_initialize (&dbgapi_callbacks);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("amd-dbgapi failed to initialize (%s)"),
+	   get_status_string (status));
+}
+
+extern initialize_file_ftype _initialize_amd_dbgapi_target;
+
+void
+_initialize_amd_dbgapi_target ()
+{
+  /* Make sure the loaded debugger library version is greater than or equal to
+     the one used to build GDB.  */
+  uint32_t major, minor, patch;
+  amd_dbgapi_get_version (&major, &minor, &patch);
+  if (major != AMD_DBGAPI_VERSION_MAJOR || minor < AMD_DBGAPI_VERSION_MINOR)
+    error (_("amd-dbgapi library version mismatch, got %d.%d.%d, need %d.%d+"),
+	   major, minor, patch, AMD_DBGAPI_VERSION_MAJOR,
+	   AMD_DBGAPI_VERSION_MINOR);
+
+  /* Initialize the AMD Debugger API.  */
+  amd_dbgapi_status_t status = amd_dbgapi_initialize (&dbgapi_callbacks);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("amd-dbgapi failed to initialize (%s)"),
+	   get_status_string (status));
+
+  /* Set the initial log level.  */
+  amd_dbgapi_set_log_level (get_debug_amd_dbgapi_lib_log_level ());
+
+  /* Install observers.  */
+  gdb::observers::inferior_created.attach
+    (amd_dbgapi_target_inferior_created,
+     amd_dbgapi_target_inferior_created_observer_token, "amd-dbgapi");
+  gdb::observers::inferior_exit.attach (amd_dbgapi_inferior_exited, "amd-dbgapi");
+  gdb::observers::inferior_pre_detach.attach (amd_dbgapi_inferior_pre_detach, "amd-dbgapi");
+
+  add_basic_prefix_cmd ("amd-dbgapi-lib", no_class,
+			_("Generic command for setting amd-dbgapi library "
+			  "debugging flags."),
+			&set_debug_amd_dbgapi_lib_list, 0, &setdebuglist);
+
+  add_show_prefix_cmd ("amd-dbgapi-lib", no_class,
+		       _("Generic command for showing amd-dbgapi library "
+			 "debugging flags."),
+		       &show_debug_amd_dbgapi_lib_list, 0, &showdebuglist);
+
+  add_setshow_enum_cmd ("log-level", class_maintenance,
+			debug_amd_dbgapi_lib_log_level_enums,
+			&debug_amd_dbgapi_lib_log_level,
+			_("Set the amd-dbgapi library log level."),
+			_("Show the amd-dbgapi library log level."),
+			_("off     == no logging is enabled\n"
+			  "error   == fatal errors are reported\n"
+			  "warning == fatal errors and warnings are reported\n"
+			  "info    == fatal errors, warnings, and info "
+			  "messages are reported\n"
+			  "trace   == fatal errors, warnings, info, and "
+			  "API tracing messages are reported\n"
+			  "verbose == all messages are reported"),
+			set_debug_amd_dbgapi_lib_log_level,
+			show_debug_amd_dbgapi_lib_log_level,
+			&set_debug_amd_dbgapi_lib_list,
+			&show_debug_amd_dbgapi_lib_list);
+
+  add_setshow_boolean_cmd ("amd-dbgapi", class_maintenance,
+			   &debug_amd_dbgapi,
+			   _("Set debugging of amd-dbgapi target."),
+			   _("Show debugging of amd-dbgapi target."),
+			   _("\
+When on, print debug messages relating to the amd-dbgapi target."),
+			   nullptr, nullptr,
+			   &setdebuglist, &showdebuglist);
+}
diff --git a/gdb/amd-dbgapi-target.h b/gdb/amd-dbgapi-target.h
new file mode 100644
index 00000000000..beff2ad0bed
--- /dev/null
+++ b/gdb/amd-dbgapi-target.h
@@ -0,0 +1,116 @@ 
+/* Target used to communicate with the AMD Debugger API.
+
+   Copyright (C) 2019-2022 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef AMD_DBGAPI_TARGET_H
+#define AMD_DBGAPI_TARGET_H 1
+
+#include "gdbsupport/observable.h"
+
+#include <amd-dbgapi/amd-dbgapi.h>
+
+struct inferior;
+
+namespace detail
+{
+
+template <typename T>
+using is_amd_dbgapi_handle
+  = gdb::Or<std::is_same<T, amd_dbgapi_address_class_id_t>,
+	    std::is_same<T, amd_dbgapi_address_space_id_t>,
+	    std::is_same<T, amd_dbgapi_architecture_id_t>,
+	    std::is_same<T, amd_dbgapi_agent_id_t>,
+	    std::is_same<T, amd_dbgapi_breakpoint_id_t>,
+	    std::is_same<T, amd_dbgapi_code_object_id_t>,
+	    std::is_same<T, amd_dbgapi_dispatch_id_t>,
+	    std::is_same<T, amd_dbgapi_displaced_stepping_id_t>,
+	    std::is_same<T, amd_dbgapi_event_id_t>,
+	    std::is_same<T, amd_dbgapi_process_id_t>,
+	    std::is_same<T, amd_dbgapi_queue_id_t>,
+	    std::is_same<T, amd_dbgapi_register_class_id_t>,
+	    std::is_same<T, amd_dbgapi_register_id_t>,
+	    std::is_same<T, amd_dbgapi_watchpoint_id_t>,
+	    std::is_same<T, amd_dbgapi_wave_id_t>>;
+
+} /* namespace detail */
+
+/* Get the token of amd-dbgapi's inferior_created observer.  */
+
+const gdb::observers::token &
+  get_amd_dbgapi_target_inferior_created_observer_token ();
+
+/* Comparison operators for amd-dbgapi handle types.  */
+
+template <typename T,
+	  typename = gdb::Requires<detail::is_amd_dbgapi_handle<T>>>
+bool
+operator== (const T &lhs, const T &rhs)
+{
+  return lhs.handle == rhs.handle;
+}
+
+template <typename T,
+	  typename = gdb::Requires<detail::is_amd_dbgapi_handle<T>>>
+bool
+operator!= (const T &lhs, const T &rhs)
+{
+  return !(lhs == rhs);
+}
+
+/* Return true if the given ptid is a GPU thread (wave) ptid.  */
+
+static inline bool
+ptid_is_gpu (ptid_t ptid)
+{
+  /* FIXME: Currently using values that are known not to conflict with other
+     processes to indicate if it is a GPU thread.  ptid.pid 1 is the init
+     process and is the only process that could have a ptid.lwp of 1.  The init
+     process cannot have a GPU.  No other process can have a ptid.lwp of 1.
+     The GPU wave ID is stored in the ptid.tid.  */
+  return ptid.pid () != 1 && ptid.lwp () == 1;
+}
+
+/* Return INF's amd_dbgapi process id.  */
+
+amd_dbgapi_process_id_t get_amd_dbgapi_process_id (inferior *inf);
+
+/* Get the amd-dbgapi wave id for PTID.  */
+
+static inline amd_dbgapi_wave_id_t
+get_amd_dbgapi_wave_id (ptid_t ptid)
+{
+  gdb_assert (ptid_is_gpu (ptid));
+  return amd_dbgapi_wave_id_t {
+    static_cast<decltype (amd_dbgapi_wave_id_t::handle)> (ptid.tid ())
+  };
+}
+
+/* Get the textual version of STATUS.
+
+   Always returns non-nullptr, and asserts that STATUS has a valid value.  */
+
+static inline const char *
+get_status_string (amd_dbgapi_status_t status)
+{
+  const char *ret;
+  status = amd_dbgapi_get_status_string (status, &ret);
+  gdb_assert (status == AMD_DBGAPI_STATUS_SUCCESS);
+  return ret;
+}
+
+#endif /* AMD_DBGAPI_TARGET_H */
diff --git a/gdb/amdgpu-tdep.c b/gdb/amdgpu-tdep.c
new file mode 100644
index 00000000000..fc5e2438c7f
--- /dev/null
+++ b/gdb/amdgpu-tdep.c
@@ -0,0 +1,1367 @@ 
+/* Target-dependent code for the AMDGPU architectures.
+
+   Copyright (C) 2019-2022 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "defs.h"
+
+#include "amd-dbgapi-target.h"
+#include "amdgpu-tdep.h"
+#include "arch-utils.h"
+#include "disasm.h"
+#include "dwarf2/frame.h"
+#include "frame-unwind.h"
+#include "gdbarch.h"
+#include "gdbsupport/selftest.h"
+#include "gdbtypes.h"
+#include "inferior.h"
+#include "objfiles.h"
+#include "observable.h"
+#include "producer.h"
+#include "reggroups.h"
+
+/* See amdgpu-tdep.h.  */
+
+bool
+is_amdgpu_arch (struct gdbarch *arch)
+{
+  gdb_assert (arch != nullptr);
+  return gdbarch_bfd_arch_info (arch)->arch == bfd_arch_amdgcn;
+}
+
+/* See amdgpu-tdep.h.  */
+
+amdgpu_gdbarch_tdep *
+get_amdgpu_gdbarch_tdep (gdbarch *arch)
+{
+  return gdbarch_tdep<amdgpu_gdbarch_tdep> (arch);
+}
+
+/* Return the name of register REGNUM.  */
+
+static const char *
+amdgpu_register_name (struct gdbarch *gdbarch, int regnum)
+{
+  /* The list of registers reported by amd-dbgapi for a given architecture
+     contains some duplicate names.  For instance, there is an "exec" register
+     for waves in the wave32 mode and one for the waves in the wave64 mode.
+     However, at most one register with a given name is actually allocated for
+     a specific wave.  If INFERIOR_PTID represents a GPU wave, we query
+     amd-dbgapi to know whether the requested register actually exists for the
+     current wave, so there won't be duplicates in the the register names we
+     report for that wave.
+
+     But there are two known cases where INFERIOR_PTID doesn't represent a GPU
+     wave:
+
+      - The user does "set arch amdgcn:gfxNNN" followed with "maint print
+	registers"
+      - The "register_name" selftest
+
+     In these cases, we can't query amd-dbgapi to know whether we should hide
+     the register or not.  The "register_name" selftest checks that there aren't
+     duplicates in the register names returned by the gdbarch, so if we simply
+     return all register names, that test will fail.  The other simple option is
+     to never return a register name, which is what we do here.  */
+  if (!ptid_is_gpu (inferior_ptid))
+    return "";
+
+  amd_dbgapi_wave_id_t wave_id = get_amd_dbgapi_wave_id (inferior_ptid);
+  amdgpu_gdbarch_tdep *tdep = get_amdgpu_gdbarch_tdep (gdbarch);
+
+  amd_dbgapi_register_exists_t register_exists;
+  if (amd_dbgapi_wave_register_exists (wave_id, tdep->register_ids[regnum],
+				       &register_exists)
+	!= AMD_DBGAPI_STATUS_SUCCESS
+      || register_exists != AMD_DBGAPI_REGISTER_PRESENT)
+    return "";
+
+  return tdep->register_names[regnum].c_str ();
+}
+
+/* Return the internal register number for the DWARF register number DWARF_REG.
+
+   Return -1 if there's no internal register mapping to DWARF_REG.  */
+
+static int
+amdgpu_dwarf_reg_to_regnum (struct gdbarch *gdbarch, int dwarf_reg)
+{
+  amdgpu_gdbarch_tdep *tdep = get_amdgpu_gdbarch_tdep (gdbarch);
+
+  if (dwarf_reg < tdep->dwarf_regnum_to_gdb_regnum.size ())
+    return tdep->dwarf_regnum_to_gdb_regnum[dwarf_reg];
+
+  return -1;
+}
+
+/* A hierarchy of classes to represent an amd-dbgapi register type.  */
+
+struct amd_dbgapi_register_type
+{
+  enum class kind
+    {
+      INTEGER,
+      FLOAT,
+      DOUBLE,
+      VECTOR,
+      CODE_PTR,
+      FLAGS,
+      ENUM,
+    };
+
+  amd_dbgapi_register_type (kind kind, std::string lookup_name)
+    : m_kind (kind), m_lookup_name (std::move (lookup_name))
+  {}
+
+  virtual ~amd_dbgapi_register_type () = default;
+
+  /* Return the type's kind.  */
+  kind kind () const
+  { return m_kind; }
+
+  /* Name to use for this type in the existing type map.  */
+  const std::string &lookup_name () const
+  { return m_lookup_name; }
+
+private:
+  enum kind m_kind;
+  std::string m_lookup_name;
+};
+
+using amd_dbgapi_register_type_up = std::unique_ptr<amd_dbgapi_register_type>;
+
+struct amd_dbgapi_register_type_integer : public amd_dbgapi_register_type
+{
+  amd_dbgapi_register_type_integer (bool is_unsigned, unsigned int bit_size)
+    : amd_dbgapi_register_type
+	(kind::INTEGER,
+	 string_printf ("%sint%d", is_unsigned ? "u" : "", bit_size)),
+      m_is_unsigned (is_unsigned),
+      m_bit_size (bit_size)
+  {}
+
+  bool is_unsigned () const
+  { return m_is_unsigned; }
+
+  unsigned int bit_size () const
+  { return m_bit_size; }
+
+private:
+  bool m_is_unsigned;
+  unsigned int m_bit_size;
+};
+
+struct amd_dbgapi_register_type_float : public amd_dbgapi_register_type
+{
+  amd_dbgapi_register_type_float ()
+    : amd_dbgapi_register_type (kind::FLOAT, "float")
+  {}
+};
+
+struct amd_dbgapi_register_type_double : public amd_dbgapi_register_type
+{
+  amd_dbgapi_register_type_double ()
+    : amd_dbgapi_register_type (kind::DOUBLE, "double")
+  {}
+};
+
+struct amd_dbgapi_register_type_vector : public amd_dbgapi_register_type
+{
+  amd_dbgapi_register_type_vector (const amd_dbgapi_register_type &element_type,
+				   unsigned int count)
+    : amd_dbgapi_register_type (kind::VECTOR,
+				make_lookup_name (element_type, count)),
+      m_element_type (element_type),
+      m_count (count)
+  {}
+
+  const amd_dbgapi_register_type &element_type () const
+  { return m_element_type; }
+
+  unsigned int count () const
+  { return m_count; }
+
+  static std::string make_lookup_name
+    (const amd_dbgapi_register_type &element_type, unsigned int count)
+  {
+    return string_printf ("%s[%d]", element_type.lookup_name ().c_str (),
+			  count);
+  }
+
+private:
+  const amd_dbgapi_register_type &m_element_type;
+  unsigned int m_count;
+};
+
+struct amd_dbgapi_register_type_code_ptr : public amd_dbgapi_register_type
+{
+  amd_dbgapi_register_type_code_ptr ()
+    : amd_dbgapi_register_type (kind::CODE_PTR, "void (*)()")
+  {}
+};
+
+struct amd_dbgapi_register_type_flags : public amd_dbgapi_register_type
+{
+  struct field
+  {
+    std::string name;
+    unsigned int bit_pos_start;
+    unsigned int bit_pos_end;
+    const amd_dbgapi_register_type *type;
+  };
+
+  using container_type = std::vector<field>;
+  using const_iterator_type = container_type::const_iterator;
+
+  amd_dbgapi_register_type_flags (unsigned int bit_size, gdb::string_view name)
+    : amd_dbgapi_register_type (kind::FLAGS,
+				make_lookup_name (bit_size, name)),
+      m_bit_size (bit_size),
+      m_name (std::move (name))
+  {}
+
+  unsigned int bit_size () const
+  { return m_bit_size; }
+
+  void add_field (std::string name, unsigned int bit_pos_start,
+		  unsigned int bit_pos_end,
+		  const amd_dbgapi_register_type *type)
+  {
+    m_fields.push_back (field {std::move (name), bit_pos_start,
+			       bit_pos_end, type});
+  }
+
+  container_type::size_type size () const
+  { return m_fields.size (); }
+
+  const field &operator[] (container_type::size_type pos) const
+  { return m_fields[pos]; }
+
+  const_iterator_type begin () const
+  { return m_fields.begin (); }
+
+  const_iterator_type end () const
+  { return m_fields.end (); }
+
+  const std::string &name () const
+  { return m_name; }
+
+  static std::string make_lookup_name (int bits, gdb::string_view name)
+  {
+    std::string res = string_printf ("flags%d_t ", bits);
+    res.append (name.data (), name.size ());
+    return res;
+  }
+
+private:
+  unsigned int m_bit_size;
+  container_type m_fields;
+  std::string m_name;
+};
+
+using amd_dbgapi_register_type_flags_up
+  = std::unique_ptr<amd_dbgapi_register_type_flags>;
+
+struct amd_dbgapi_register_type_enum : public amd_dbgapi_register_type
+{
+  struct enumerator
+  {
+    std::string name;
+    ULONGEST value;
+  };
+
+  using container_type = std::vector<enumerator>;
+  using const_iterator_type = container_type::const_iterator;
+
+  amd_dbgapi_register_type_enum (gdb::string_view name)
+    : amd_dbgapi_register_type (kind::ENUM, make_lookup_name (name)),
+      m_name (name.data (), name.length ())
+  {}
+
+  void set_bit_size (int bit_size)
+  { m_bit_size = bit_size; }
+
+  unsigned int bit_size () const
+  { return m_bit_size; }
+
+  void add_enumerator (std::string name, ULONGEST value)
+  { m_enumerators.push_back (enumerator {std::move (name), value}); }
+
+  container_type::size_type size () const
+  { return m_enumerators.size (); }
+
+  const enumerator &operator[] (container_type::size_type pos) const
+  { return m_enumerators[pos]; }
+
+  const_iterator_type begin () const
+  { return m_enumerators.begin (); }
+
+  const_iterator_type end () const
+  { return m_enumerators.end (); }
+
+  const std::string &name () const
+  { return m_name; }
+
+  static std::string make_lookup_name (gdb::string_view name)
+  {
+    std::string res = "enum ";
+    res.append (name.data (), name.length ());
+    return res;
+  }
+
+private:
+  unsigned int m_bit_size = 32;
+  container_type m_enumerators;
+  std::string m_name;
+};
+
+using amd_dbgapi_register_type_enum_up
+  = std::unique_ptr<amd_dbgapi_register_type_enum>;
+
+/* Map type lookup names to types.  */
+using amd_dbgapi_register_type_map
+  = std::unordered_map<std::string, amd_dbgapi_register_type_up>;
+
+/* Parse S as a ULONGEST, raise an error on overflow.  */
+
+static ULONGEST
+try_strtoulst (gdb::string_view s)
+{
+  errno = 0;
+  ULONGEST value = strtoulst (s.data (), nullptr, 0);
+  if (errno != 0)
+    error (_("Failed to parse integer."));
+
+  return value;
+};
+
+/* Shared regex bits.  */
+#define IDENTIFIER "[A-Za-z0-9_.]+"
+#define WS "[ \t]+"
+#define WSOPT "[ \t]*"
+
+static const amd_dbgapi_register_type &
+parse_amd_dbgapi_register_type (gdb::string_view type_name,
+				amd_dbgapi_register_type_map &type_map);
+
+
+/* parse_amd_dbgapi_register_type helper for enum types.  */
+
+static void
+parse_amd_dbgapi_register_type_enum_fields
+  (amd_dbgapi_register_type_enum &enum_type, gdb::string_view fields)
+{
+  compiled_regex regex (/* name */
+			"^(" IDENTIFIER ")"
+			WSOPT "=" WSOPT
+			/* value */
+			"([0-9]+)"
+			WSOPT "(," WSOPT ")?",
+			REG_EXTENDED,
+			_("Error in AMDGPU enum register type regex"));
+  regmatch_t matches[4];
+
+  while (!fields.empty ())
+    {
+      int res = regex.exec (fields.data (), ARRAY_SIZE (matches), matches, 0);
+      if (res == REG_NOMATCH)
+	error (_("Failed to parse enum fields"));
+
+      auto sv_from_match = [fields] (const regmatch_t &m)
+	{ return fields.substr (m.rm_so, m.rm_eo - m.rm_so); };
+
+      gdb::string_view name = sv_from_match (matches[1]);
+      gdb::string_view value_str = sv_from_match (matches[2]);
+      ULONGEST value = try_strtoulst (value_str);
+
+      if (value > std::numeric_limits<uint32_t>::max ())
+	enum_type.set_bit_size (64);
+
+      enum_type.add_enumerator (gdb::to_string (name), value);
+
+      fields = fields.substr (matches[0].rm_eo);
+    }
+}
+
+/* parse_amd_dbgapi_register_type helper for flags types.  */
+
+static void
+parse_amd_dbgapi_register_type_flags_fields
+  (amd_dbgapi_register_type_flags &flags_type,
+   int bits, gdb::string_view name, gdb::string_view fields,
+   amd_dbgapi_register_type_map &type_map)
+{
+  gdb_assert (bits == 32 || bits == 64);
+
+  std::string regex_str
+    = string_printf (/* type */
+		     "^(bool|uint%d_t|enum" WS IDENTIFIER WSOPT "(\\{[^}]*})?)"
+		     WS
+		     /* name */
+		     "(" IDENTIFIER ")" WSOPT
+		     /* bit position */
+		     "@([0-9]+)(-[0-9]+)?" WSOPT ";" WSOPT,
+		     bits);
+  compiled_regex regex (regex_str.c_str (), REG_EXTENDED,
+			_("Error in AMDGPU register type flags fields regex"));
+  regmatch_t matches[6];
+
+  while (!fields.empty ())
+    {
+      int res = regex.exec (fields.data (), ARRAY_SIZE (matches), matches, 0);
+      if (res == REG_NOMATCH)
+	error (_("Failed to parse flags type fields string"));
+
+      auto sv_from_match = [fields] (const regmatch_t &m)
+	{ return fields.substr (m.rm_so, m.rm_eo - m.rm_so); };
+
+      gdb::string_view field_type_str = sv_from_match (matches[1]);
+      gdb::string_view field_name = sv_from_match (matches[3]);
+      gdb::string_view pos_begin_str = sv_from_match (matches[4]);
+      ULONGEST pos_begin = try_strtoulst (pos_begin_str);
+
+      if (field_type_str == "bool")
+	flags_type.add_field (gdb::to_string (field_name), pos_begin, pos_begin,
+			      nullptr);
+      else
+	{
+	  if (matches[5].rm_so == -1)
+	    error (_("Missing end bit position"));
+
+	  gdb::string_view pos_end_str = sv_from_match (matches[5]);
+	  ULONGEST pos_end = try_strtoulst (pos_end_str.substr (1));
+	  const amd_dbgapi_register_type &field_type
+	    = parse_amd_dbgapi_register_type (field_type_str, type_map);
+	  flags_type.add_field (gdb::to_string (field_name), pos_begin, pos_end,
+				&field_type);
+	}
+
+      fields = fields.substr (matches[0].rm_eo);
+    }
+}
+
+/* parse_amd_dbgapi_register_type helper for scalars.  */
+
+static const amd_dbgapi_register_type &
+parse_amd_dbgapi_register_type_scalar (gdb::string_view name,
+				       amd_dbgapi_register_type_map &type_map)
+{
+  std::string name_str = gdb::to_string (name);
+  auto it = type_map.find (name_str);
+  if (it != type_map.end ())
+    {
+      enum amd_dbgapi_register_type::kind kind = it->second->kind ();
+      if (kind != amd_dbgapi_register_type::kind::INTEGER
+	  && kind != amd_dbgapi_register_type::kind::FLOAT
+	  && kind != amd_dbgapi_register_type::kind::DOUBLE
+	  && kind != amd_dbgapi_register_type::kind::CODE_PTR)
+	error (_("type mismatch"));
+
+      return *it->second;
+    }
+
+  amd_dbgapi_register_type_up type;
+  if (name == "int32_t")
+    type.reset (new amd_dbgapi_register_type_integer (false, 32));
+  else if (name == "uint32_t")
+    type.reset (new amd_dbgapi_register_type_integer (true, 32));
+  else if (name == "int64_t")
+    type.reset (new amd_dbgapi_register_type_integer (false, 64));
+  else if (name == "uint64_t")
+    type.reset (new amd_dbgapi_register_type_integer (true, 64));
+  else if (name == "float")
+    type.reset (new amd_dbgapi_register_type_float ());
+  else if (name == "double")
+    type.reset (new amd_dbgapi_register_type_double ());
+  else if (name == "void (*)()")
+    type.reset (new amd_dbgapi_register_type_code_ptr ());
+  else
+    error (_("unknown type %s"), name_str.c_str ());
+
+  auto insertion_pair = type_map.emplace (name, std::move (type));
+  return *insertion_pair.first->second;
+}
+
+/* Parse an amd-dbgapi register type string into an amd_dbgapi_register_type
+   object.
+
+   See the documentation of AMD_DBGAPI_REGISTER_INFO_TYPE in amd-dbgapi.h for
+   details about the format.  */
+
+static const amd_dbgapi_register_type &
+parse_amd_dbgapi_register_type (gdb::string_view type_str,
+				amd_dbgapi_register_type_map &type_map)
+{
+  size_t pos_open_bracket = type_str.find_last_of ('[');
+  auto sv_from_match = [type_str] (const regmatch_t &m)
+    { return type_str.substr (m.rm_so, m.rm_eo - m.rm_so); };
+
+  if (pos_open_bracket != gdb::string_view::npos)
+    {
+      /* Vector types.  */
+      gdb::string_view element_type_str
+	= type_str.substr (0, pos_open_bracket);
+      const amd_dbgapi_register_type &element_type
+	= parse_amd_dbgapi_register_type (element_type_str, type_map);
+
+      size_t pos_close_bracket = type_str.find_last_of (']');
+      gdb_assert (pos_close_bracket != gdb::string_view::npos);
+      gdb::string_view count_str_view
+	= type_str.substr (pos_open_bracket + 1,
+			    pos_close_bracket - pos_open_bracket);
+      std::string count_str = gdb::to_string (count_str_view);
+      unsigned int count = std::stoul (count_str);
+
+      std::string lookup_name
+	= amd_dbgapi_register_type_vector::make_lookup_name (element_type, count);
+      auto existing_type_it = type_map.find (lookup_name);
+      if (existing_type_it != type_map.end ())
+	{
+	  gdb_assert (existing_type_it->second->kind ()
+		      == amd_dbgapi_register_type::kind::VECTOR);
+	  return *existing_type_it->second;
+	}
+
+      amd_dbgapi_register_type_up type
+	(new amd_dbgapi_register_type_vector (element_type, count));
+      auto insertion_pair
+	= type_map.emplace (type->lookup_name (), std::move (type));
+      return *insertion_pair.first->second;
+    }
+
+  if (type_str.find ("flags32_t") == 0 || type_str.find ("flags64_t") == 0)
+    {
+      /* Split 'type_str' into 4 tokens: "(type) (name) ({ (fields) })".  */
+      compiled_regex regex ("^(flags32_t|flags64_t)"
+			    WS "(" IDENTIFIER ")" WSOPT
+			    "(\\{" WSOPT "(.*)})?",
+			    REG_EXTENDED,
+			    _("Error in AMDGPU register type regex"));
+
+      regmatch_t matches[5];
+      int res = regex.exec (type_str.data (), ARRAY_SIZE (matches), matches, 0);
+      if (res == REG_NOMATCH)
+	error (_("Failed to parse flags type string"));
+
+      gdb::string_view flags_keyword = sv_from_match (matches[1]);
+      unsigned int bit_size = flags_keyword == "flags32_t" ? 32 : 64;
+      gdb::string_view name = sv_from_match (matches[2]);
+      std::string lookup_name
+	= amd_dbgapi_register_type_flags::make_lookup_name (bit_size, name);
+      auto existing_type_it = type_map.find (lookup_name);
+
+      if (matches[3].rm_so == -1)
+	{
+	  /* No braces, lookup existing type.  */
+	  if (existing_type_it == type_map.end ())
+	    error (_("reference to unknown type %s."),
+		   gdb::to_string (name).c_str ());
+
+	  if (existing_type_it->second->kind ()
+	      != amd_dbgapi_register_type::kind::FLAGS)
+	    error (_("type mismatch"));
+
+	  return *existing_type_it->second;
+	}
+      else
+	{
+	  /* With braces, it's a definition.  */
+	  if (existing_type_it != type_map.end ())
+	    error (_("re-definition of type %s."),
+		   gdb::to_string (name).c_str ());
+
+	  amd_dbgapi_register_type_flags_up flags_type
+	    (new amd_dbgapi_register_type_flags (bit_size, name));
+	  gdb::string_view fields_without_braces = sv_from_match (matches[4]);
+
+	  parse_amd_dbgapi_register_type_flags_fields
+	    (*flags_type, bit_size, name, fields_without_braces, type_map);
+
+	  auto insertion_pair
+	    = type_map.emplace (flags_type->lookup_name (),
+				std::move (flags_type));
+	  return *insertion_pair.first->second;
+	}
+    }
+
+  if (type_str.find ("enum") == 0)
+    {
+      compiled_regex regex ("^enum" WS "(" IDENTIFIER ")" WSOPT "(\\{" WSOPT "([^}]*)})?",
+			    REG_EXTENDED,
+			    _("Error in AMDGPU register type enum regex"));
+
+      /* Split 'type_name' into 3 tokens: "(name) ( { (fields) } )".  */
+      regmatch_t matches[4];
+      int res = regex.exec (type_str.data (), ARRAY_SIZE (matches), matches, 0);
+      if (res == REG_NOMATCH)
+	error (_("Failed to parse flags type string"));
+
+      gdb::string_view name = sv_from_match (matches[1]);
+
+      std::string lookup_name
+	= amd_dbgapi_register_type_enum::make_lookup_name (name);
+      auto existing_type_it = type_map.find (lookup_name);
+
+      if (matches[2].rm_so == -1)
+	{
+	  /* No braces, lookup existing type.  */
+	  if (existing_type_it == type_map.end ())
+	    error (_("reference to unknown type %s"),
+		   gdb::to_string (name).c_str ());
+
+	  if (existing_type_it->second->kind ()
+	      != amd_dbgapi_register_type::kind::ENUM)
+	    error (_("type mismatch"));
+
+	  return *existing_type_it->second;
+	}
+      else
+	{
+	  /* With braces, it's a definition.  */
+	  if (existing_type_it != type_map.end ())
+	    error (_("re-definition of type %s"),
+		   gdb::to_string (name).c_str ());
+
+	  amd_dbgapi_register_type_enum_up enum_type
+	    (new amd_dbgapi_register_type_enum (name));
+	  gdb::string_view fields_without_braces = sv_from_match (matches[3]);
+
+	  parse_amd_dbgapi_register_type_enum_fields
+	    (*enum_type, fields_without_braces);
+
+	  auto insertion_pair
+	    = type_map.emplace (enum_type->lookup_name (),
+				std::move (enum_type));
+	  return *insertion_pair.first->second;
+	}
+    }
+
+  return parse_amd_dbgapi_register_type_scalar (type_str, type_map);
+}
+
+/* Convert an amd_dbgapi_register_type object to a GDB type.  */
+
+static type *
+amd_dbgapi_register_type_to_gdb_type (const amd_dbgapi_register_type &type,
+				      struct gdbarch *gdbarch)
+{
+  switch (type.kind ())
+    {
+    case amd_dbgapi_register_type::kind::INTEGER:
+      {
+	const auto &integer_type
+	  = static_cast<const amd_dbgapi_register_type_integer &> (type);
+	switch (integer_type.bit_size ())
+	  {
+	  case 32:
+	    if (integer_type.is_unsigned ())
+	      return builtin_type (gdbarch)->builtin_uint32;
+	    else
+	      return builtin_type (gdbarch)->builtin_int32;
+
+	  case 64:
+	    if (integer_type.is_unsigned ())
+	      return builtin_type (gdbarch)->builtin_uint64;
+	    else
+	      return builtin_type (gdbarch)->builtin_int64;
+
+	  default:
+	    gdb_assert_not_reached ("invalid bit size");
+	  }
+      }
+
+    case amd_dbgapi_register_type::kind::VECTOR:
+      {
+	const auto &vector_type
+	  = static_cast<const amd_dbgapi_register_type_vector &> (type);
+	struct type *element_type
+	  = amd_dbgapi_register_type_to_gdb_type (vector_type.element_type (),
+						  gdbarch);
+	return init_vector_type (element_type, vector_type.count ());
+      }
+
+    case amd_dbgapi_register_type::kind::FLOAT:
+      return builtin_type (gdbarch)->builtin_float;
+
+    case amd_dbgapi_register_type::kind::DOUBLE:
+      return builtin_type (gdbarch)->builtin_double;
+
+    case amd_dbgapi_register_type::kind::CODE_PTR:
+      return builtin_type (gdbarch)->builtin_func_ptr;
+
+    case amd_dbgapi_register_type::kind::FLAGS:
+      {
+	const auto &flags_type
+	  = static_cast<const amd_dbgapi_register_type_flags &> (type);
+	struct type *gdb_type
+	  = arch_flags_type (gdbarch, flags_type.name ().c_str (),
+			     flags_type.bit_size ());
+
+	for (const auto &field : flags_type)
+	  {
+	    if (field.type == nullptr)
+	      {
+		gdb_assert (field.bit_pos_start == field.bit_pos_end);
+		append_flags_type_flag (gdb_type, field.bit_pos_start,
+					field.name.c_str ());
+	      }
+	    else
+	      {
+		struct type *field_type
+		  = amd_dbgapi_register_type_to_gdb_type (*field.type, gdbarch);
+		gdb_assert (field_type != nullptr);
+		append_flags_type_field
+		  (gdb_type, field.bit_pos_start,
+		   field.bit_pos_end - field.bit_pos_start + 1,
+		   field_type, field.name.c_str ());
+	      }
+	  }
+
+	return gdb_type;
+      }
+
+    case amd_dbgapi_register_type::kind::ENUM:
+      {
+	const auto &enum_type
+	  = static_cast<const amd_dbgapi_register_type_enum &> (type);
+	struct type *gdb_type
+	  = arch_type (gdbarch, TYPE_CODE_ENUM, enum_type.bit_size (),
+		       enum_type.name ().c_str ());
+
+	gdb_type->set_num_fields (enum_type.size ());
+	gdb_type->set_fields
+	  ((struct field *) TYPE_ZALLOC (gdb_type, (sizeof (struct field)
+						    * enum_type.size ())));
+	gdb_type->set_is_unsigned (true);
+
+	for (size_t i = 0; i < enum_type.size (); ++i)
+	  {
+	    const auto &field = enum_type[i];
+	    gdb_type->field (i).set_name (xstrdup (field.name.c_str ()));
+	    gdb_type->field (i).set_loc_enumval (field.value);
+	  }
+
+	return gdb_type;
+      }
+
+    default:
+      gdb_assert_not_reached ("unhandled amd_dbgapi_register_type kind");
+    }
+}
+
+static type *
+amdgpu_register_type (struct gdbarch *gdbarch, int regnum)
+{
+  amdgpu_gdbarch_tdep *tdep = get_amdgpu_gdbarch_tdep (gdbarch);
+
+  if (tdep->register_types[regnum] == nullptr)
+    {
+      /* This is done lazily (not at gdbarch initialization time), because it
+	 requires access to builtin_type, which can't be used while the gdbarch
+	 is not fully initialized.  */
+      char *bytes;
+      amd_dbgapi_status_t status
+	= amd_dbgapi_register_get_info (tdep->register_ids[regnum],
+					AMD_DBGAPI_REGISTER_INFO_TYPE,
+					sizeof (bytes), &bytes);
+      if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	error (_("Failed to get register type from amd-dbgapi"));
+
+      gdb::unique_xmalloc_ptr<char> bytes_holder (bytes);
+      amd_dbgapi_register_type_map type_map;
+      const amd_dbgapi_register_type &register_type
+	= parse_amd_dbgapi_register_type (bytes, type_map);
+      tdep->register_types[regnum]
+	= amd_dbgapi_register_type_to_gdb_type (register_type, gdbarch);
+      gdb_assert (tdep->register_types[regnum] != nullptr);
+    }
+
+  return tdep->register_types[regnum];
+}
+
+static int
+amdgpu_register_reggroup_p (struct gdbarch *gdbarch, int regnum,
+			    const reggroup *group)
+{
+  amdgpu_gdbarch_tdep *tdep = get_amdgpu_gdbarch_tdep (gdbarch);
+
+  auto it = tdep->register_class_map.find (group->name ());
+  if (it == tdep->register_class_map.end ())
+    return group == all_reggroup;
+
+  amd_dbgapi_register_class_state_t state;
+  if (amd_dbgapi_register_is_in_register_class (it->second,
+						tdep->register_ids[regnum],
+						&state)
+      != AMD_DBGAPI_STATUS_SUCCESS)
+    return group == all_reggroup;
+
+  return (state == AMD_DBGAPI_REGISTER_CLASS_STATE_MEMBER
+	  || group == all_reggroup);
+}
+
+static int
+amdgpu_breakpoint_kind_from_pc (struct gdbarch *gdbarch, CORE_ADDR *)
+{
+  return get_amdgpu_gdbarch_tdep (gdbarch)->breakpoint_instruction_size;
+}
+
+static const gdb_byte *
+amdgpu_sw_breakpoint_from_kind (struct gdbarch *gdbarch, int kind, int *size)
+{
+  *size = kind;
+  return get_amdgpu_gdbarch_tdep (gdbarch)->breakpoint_instruction_bytes.get ();
+}
+
+struct amdgpu_frame_cache
+{
+  CORE_ADDR base;
+  CORE_ADDR pc;
+};
+
+static amdgpu_frame_cache *
+amdgpu_frame_cache (frame_info_ptr this_frame, void **this_cache)
+{
+  if (*this_cache != nullptr)
+    return (struct amdgpu_frame_cache *) *this_cache;
+
+  struct amdgpu_frame_cache *cache
+    = FRAME_OBSTACK_ZALLOC (struct amdgpu_frame_cache);
+  (*this_cache) = cache;
+
+  cache->pc = get_frame_func (this_frame);
+  cache->base = 0;
+
+  return cache;
+}
+
+static void
+amdgpu_frame_this_id (frame_info_ptr this_frame, void **this_cache,
+		      frame_id *this_id)
+{
+  struct amdgpu_frame_cache *cache
+    = amdgpu_frame_cache (this_frame, this_cache);
+
+  if (get_frame_type (this_frame) == INLINE_FRAME)
+    (*this_id) = frame_id_build (cache->base, cache->pc);
+  else
+    (*this_id) = outer_frame_id;
+
+  frame_debug_printf ("this_frame=%d, type=%d, this_id=%s",
+		      frame_relative_level (this_frame),
+		      get_frame_type (this_frame),
+		      this_id->to_string ().c_str ());
+}
+
+static frame_id
+amdgpu_dummy_id (struct gdbarch *gdbarch, frame_info_ptr this_frame)
+{
+  return frame_id_build (0, get_frame_pc (this_frame));
+}
+
+static struct value *
+amdgpu_frame_prev_register (frame_info_ptr this_frame, void **this_cache,
+			    int regnum)
+{
+  return frame_unwind_got_register (this_frame, regnum, regnum);
+}
+
+static const frame_unwind amdgpu_frame_unwind = {
+  "amdgpu",
+  NORMAL_FRAME,
+  default_frame_unwind_stop_reason,
+  amdgpu_frame_this_id,
+  amdgpu_frame_prev_register,
+  nullptr,
+  default_frame_sniffer,
+  nullptr,
+  nullptr,
+};
+
+static int
+print_insn_amdgpu (bfd_vma memaddr, struct disassemble_info *info)
+{
+  gdb_disassemble_info *di
+    = static_cast<gdb_disassemble_info *> (info->application_data);
+
+  /* Try to read at most INSTRUCTION_SIZE bytes.  */
+
+  amd_dbgapi_size_t instruction_size = gdbarch_max_insn_length (di->arch ());
+  gdb::byte_vector buffer (instruction_size);
+
+  /* read_memory_func doesn't support partial reads, so if the read
+     fails, try one byte less, on and on until we manage to read
+     something.  A case where this would happen is if we're trying to
+     read the last instruction at the end of a file section and that
+     instruction is smaller than the largest instruction.  */
+  while (instruction_size > 0)
+    {
+      int ret = info->read_memory_func (memaddr, buffer.data (),
+					instruction_size, info);
+      if (ret == 0)
+	break;
+
+      --instruction_size;
+    }
+
+  if (instruction_size == 0)
+    {
+      info->memory_error_func (-1, memaddr, info);
+      return -1;
+    }
+
+  amd_dbgapi_architecture_id_t architecture_id;
+  amd_dbgapi_status_t status
+    = amd_dbgapi_get_architecture (gdbarch_bfd_arch_info (di->arch ())->mach,
+				   &architecture_id);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    return -1;
+
+  auto symbolizer = [] (amd_dbgapi_symbolizer_id_t symbolizer_id,
+			amd_dbgapi_global_address_t address,
+			char **symbol_text) -> amd_dbgapi_status_t
+  {
+    gdb_disassemble_info *disasm_info
+      = reinterpret_cast<gdb_disassemble_info *> (symbolizer_id);
+    gdb_printing_disassembler *disasm
+      = dynamic_cast<gdb_printing_disassembler *> (disasm_info);
+    gdb_assert (disasm != nullptr);
+
+    string_file string (disasm->stream ()->can_emit_style_escape ());
+    print_address (disasm->arch (), address, &string);
+    *symbol_text = xstrdup (string.c_str ());
+
+    return AMD_DBGAPI_STATUS_SUCCESS;
+  };
+  auto symbolizer_id = reinterpret_cast<amd_dbgapi_symbolizer_id_t> (di);
+  char *instruction_text = nullptr;
+  status = amd_dbgapi_disassemble_instruction (architecture_id, memaddr,
+					       &instruction_size,
+					       buffer.data (),
+					       &instruction_text,
+					       symbolizer_id,
+					       symbolizer);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    {
+      size_t alignment;
+      status = amd_dbgapi_architecture_get_info
+	(architecture_id,
+	 AMD_DBGAPI_ARCHITECTURE_INFO_MINIMUM_INSTRUCTION_ALIGNMENT,
+	 sizeof (alignment), &alignment);
+      if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	error (_("amd_dbgapi_architecture_get_info failed"));
+
+      info->fprintf_func (di, "<illegal instruction>");
+
+      /* Skip to the next valid instruction address.  */
+      return align_up (memaddr + 1, alignment) - memaddr;
+    }
+
+  /* Print the instruction.  */
+  info->fprintf_func (di, "%s", instruction_text);
+
+  /* Free the memory allocated by the amd-dbgapi.  */
+  xfree (instruction_text);
+
+  return static_cast<int> (instruction_size);
+}
+
+static CORE_ADDR
+amdgpu_skip_prologue (struct gdbarch *gdbarch, CORE_ADDR start_pc)
+{
+  CORE_ADDR func_addr;
+
+  /* See if we can determine the end of the prologue via the symbol table.
+     If so, then return either PC, or the PC after the prologue, whichever
+     is greater.  */
+  if (find_pc_partial_function (start_pc, nullptr, &func_addr, nullptr))
+    {
+      CORE_ADDR post_prologue_pc
+	= skip_prologue_using_sal (gdbarch, func_addr);
+      struct compunit_symtab *cust = find_pc_compunit_symtab (func_addr);
+
+      /* Clang always emits a line note before the prologue and another
+	 one after.  We trust clang to emit usable line notes.  */
+      if (post_prologue_pc != 0
+	  && cust != nullptr
+	  && cust->producer () != nullptr
+	  && producer_is_llvm (cust->producer ()))
+	return std::max (start_pc, post_prologue_pc);
+    }
+
+  return start_pc;
+}
+
+static bool
+amdgpu_supports_arch_info (const struct bfd_arch_info *info)
+{
+  amd_dbgapi_architecture_id_t architecture_id;
+  amd_dbgapi_status_t status
+    = amd_dbgapi_get_architecture (info->mach, &architecture_id);
+
+  gdb_assert (status != AMD_DBGAPI_STATUS_ERROR_NOT_INITIALIZED);
+  return status == AMD_DBGAPI_STATUS_SUCCESS;
+}
+
+static struct gdbarch *
+amdgpu_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
+{
+  /* If there is already a candidate, use it.  */
+  arches = gdbarch_list_lookup_by_info (arches, &info);
+  if (arches != nullptr)
+    return arches->gdbarch;
+
+  /* Allocate space for the new architecture.  */
+  gdbarch_up gdbarch_u
+    (gdbarch_alloc (&info, gdbarch_tdep_up (new amdgpu_gdbarch_tdep)));
+  gdbarch *gdbarch = gdbarch_u.get ();
+  amdgpu_gdbarch_tdep *tdep = gdbarch_tdep<amdgpu_gdbarch_tdep> (gdbarch);
+
+  /* Data types.  */
+  set_gdbarch_char_signed (gdbarch, 0);
+  set_gdbarch_ptr_bit (gdbarch, 64);
+  set_gdbarch_addr_bit (gdbarch, 64);
+  set_gdbarch_short_bit (gdbarch, 16);
+  set_gdbarch_int_bit (gdbarch, 32);
+  set_gdbarch_long_bit (gdbarch, 64);
+  set_gdbarch_long_long_bit (gdbarch, 64);
+  set_gdbarch_float_bit (gdbarch, 32);
+  set_gdbarch_double_bit (gdbarch, 64);
+  set_gdbarch_long_double_bit (gdbarch, 128);
+  set_gdbarch_half_format (gdbarch, floatformats_ieee_half);
+  set_gdbarch_float_format (gdbarch, floatformats_ieee_single);
+  set_gdbarch_double_format (gdbarch, floatformats_ieee_double);
+  set_gdbarch_long_double_format (gdbarch, floatformats_ieee_double);
+
+  /* Frame interpretation.  */
+  set_gdbarch_skip_prologue (gdbarch, amdgpu_skip_prologue);
+  set_gdbarch_inner_than (gdbarch, core_addr_greaterthan);
+  dwarf2_append_unwinders (gdbarch);
+  frame_unwind_append_unwinder (gdbarch, &amdgpu_frame_unwind);
+  set_gdbarch_dummy_id (gdbarch, amdgpu_dummy_id);
+
+  /* Registers and memory.  */
+  amd_dbgapi_architecture_id_t architecture_id;
+  amd_dbgapi_status_t status
+    = amd_dbgapi_get_architecture (gdbarch_bfd_arch_info (gdbarch)->mach,
+				   &architecture_id);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    {
+      warning (_("Failed to get architecture from amd-dbgapi"));
+      return nullptr;
+    }
+
+
+  /* Add register groups.  */
+  size_t register_class_count;
+  amd_dbgapi_register_class_id_t *register_class_ids;
+  status = amd_dbgapi_architecture_register_class_list (architecture_id,
+							&register_class_count,
+							&register_class_ids);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    {
+      warning (_("Failed to get register class list from amd-dbgapi"));
+      return nullptr;
+    }
+
+  gdb::unique_xmalloc_ptr<amd_dbgapi_register_class_id_t>
+    register_class_ids_holder (register_class_ids);
+
+  for (size_t i = 0; i < register_class_count; ++i)
+    {
+      char *bytes;
+      status = amd_dbgapi_architecture_register_class_get_info
+	(register_class_ids[i], AMD_DBGAPI_REGISTER_CLASS_INFO_NAME,
+	 sizeof (bytes), &bytes);
+      if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	{
+	  warning (_("Failed to get register class name from amd-dbgapi"));
+	  return nullptr;
+	}
+
+      gdb::unique_xmalloc_ptr<char> name (bytes);
+
+      auto inserted = tdep->register_class_map.emplace (name.get (),
+							register_class_ids[i]);
+      gdb_assert (inserted.second);
+
+      /* Avoid creating a user reggroup with the same name as some built-in
+	 reggroup, such as "general", "system", "vector", etc.  */
+      if (reggroup_find (gdbarch, name.get ()) != nullptr)
+	continue;
+
+      /* Allocate the reggroup in the gdbarch.  */
+      reggroup_add
+	(gdbarch, reggroup_gdbarch_new (gdbarch, name.get (), USER_REGGROUP));
+    }
+
+  /* Add registers. */
+  size_t register_count;
+  amd_dbgapi_register_id_t *register_ids;
+  status = amd_dbgapi_architecture_register_list (architecture_id,
+						  &register_count,
+						  &register_ids);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    {
+      warning (_("Failed to get register list from amd-dbgapi"));
+      return nullptr;
+    }
+
+  gdb::unique_xmalloc_ptr<amd_dbgapi_register_id_t> register_ids_holder
+    (register_ids);
+
+  tdep->register_ids.insert (tdep->register_ids.end (), &register_ids[0],
+			     &register_ids[register_count]);
+
+  tdep->register_properties.resize (register_count,
+				    AMD_DBGAPI_REGISTER_PROPERTY_NONE);
+  for (size_t regnum = 0; regnum < register_count; ++regnum)
+    {
+      auto &register_properties = tdep->register_properties[regnum];
+      if (amd_dbgapi_register_get_info (register_ids[regnum],
+					AMD_DBGAPI_REGISTER_INFO_PROPERTIES,
+					sizeof (register_properties),
+					&register_properties)
+	  != AMD_DBGAPI_STATUS_SUCCESS)
+	{
+	  warning (_("Failed to get register properties from amd-dbgapi"));
+	  return nullptr;
+	}
+    }
+
+  set_gdbarch_num_regs (gdbarch, register_count);
+  set_gdbarch_num_pseudo_regs (gdbarch, 0);
+
+  tdep->register_names.resize (register_count);
+  tdep->register_types.resize (register_count);
+  for (size_t i = 0; i < register_count; ++i)
+    {
+      /* Set amd-dbgapi register id -> gdb regnum mapping.  */
+      tdep->regnum_map.emplace (tdep->register_ids[i], i);
+
+      /* Get register name.  */
+      char *bytes;
+      status = amd_dbgapi_register_get_info (tdep->register_ids[i],
+					     AMD_DBGAPI_REGISTER_INFO_NAME,
+					     sizeof (bytes), &bytes);
+      if (status == AMD_DBGAPI_STATUS_SUCCESS)
+	{
+	  tdep->register_names[i] = bytes;
+	  xfree (bytes);
+	}
+
+      /* Get register DWARF number.  */
+      uint64_t dwarf_num;
+      status = amd_dbgapi_register_get_info (tdep->register_ids[i],
+					     AMD_DBGAPI_REGISTER_INFO_DWARF,
+					     sizeof (dwarf_num), &dwarf_num);
+      if (status == AMD_DBGAPI_STATUS_SUCCESS)
+	{
+	  if (dwarf_num >= tdep->dwarf_regnum_to_gdb_regnum.size ())
+	    tdep->dwarf_regnum_to_gdb_regnum.resize (dwarf_num + 1, -1);
+
+	  tdep->dwarf_regnum_to_gdb_regnum[dwarf_num] = i;
+	}
+    }
+
+  amd_dbgapi_register_id_t pc_register_id;
+  status = amd_dbgapi_architecture_get_info
+    (architecture_id, AMD_DBGAPI_ARCHITECTURE_INFO_PC_REGISTER,
+     sizeof (pc_register_id), &pc_register_id);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    {
+      warning (_("Failed to get PC register from amd-dbgapi"));
+      return nullptr;
+    }
+
+  set_gdbarch_pc_regnum (gdbarch, tdep->regnum_map[pc_register_id]);
+  set_gdbarch_ps_regnum (gdbarch, -1);
+  set_gdbarch_sp_regnum (gdbarch, -1);
+  set_gdbarch_fp0_regnum (gdbarch, -1);
+
+  set_gdbarch_dwarf2_reg_to_regnum (gdbarch, amdgpu_dwarf_reg_to_regnum);
+
+  /* Register representation.  */
+  set_gdbarch_register_name (gdbarch, amdgpu_register_name);
+  set_gdbarch_register_type (gdbarch, amdgpu_register_type);
+  set_gdbarch_register_reggroup_p (gdbarch, amdgpu_register_reggroup_p);
+
+  /* Disassembly.  */
+  set_gdbarch_print_insn (gdbarch, print_insn_amdgpu);
+
+ /* Instructions.  */
+  amd_dbgapi_size_t max_insn_length = 0;
+  status = amd_dbgapi_architecture_get_info
+    (architecture_id, AMD_DBGAPI_ARCHITECTURE_INFO_LARGEST_INSTRUCTION_SIZE,
+     sizeof (max_insn_length), &max_insn_length);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("amd_dbgapi_architecture_get_info failed"));
+
+  set_gdbarch_max_insn_length (gdbarch, max_insn_length);
+
+  status = amd_dbgapi_architecture_get_info
+    (architecture_id, AMD_DBGAPI_ARCHITECTURE_INFO_BREAKPOINT_INSTRUCTION_SIZE,
+     sizeof (tdep->breakpoint_instruction_size),
+     &tdep->breakpoint_instruction_size);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("amd_dbgapi_architecture_get_info failed"));
+
+  gdb_byte *breakpoint_instruction_bytes;
+  status = amd_dbgapi_architecture_get_info
+    (architecture_id, AMD_DBGAPI_ARCHITECTURE_INFO_BREAKPOINT_INSTRUCTION,
+     sizeof (breakpoint_instruction_bytes), &breakpoint_instruction_bytes);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("amd_dbgapi_architecture_get_info failed"));
+
+  tdep->breakpoint_instruction_bytes.reset (breakpoint_instruction_bytes);
+
+  set_gdbarch_breakpoint_kind_from_pc (gdbarch,
+				       amdgpu_breakpoint_kind_from_pc);
+  set_gdbarch_sw_breakpoint_from_kind (gdbarch,
+				       amdgpu_sw_breakpoint_from_kind);
+
+  amd_dbgapi_size_t pc_adjust;
+  status = amd_dbgapi_architecture_get_info
+    (architecture_id,
+     AMD_DBGAPI_ARCHITECTURE_INFO_BREAKPOINT_INSTRUCTION_PC_ADJUST,
+     sizeof (pc_adjust), &pc_adjust);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    error (_("amd_dbgapi_architecture_get_info failed"));
+
+  set_gdbarch_decr_pc_after_break (gdbarch, pc_adjust);
+
+  return gdbarch_u.release ();
+}
+
+#if defined GDB_SELF_TEST
+
+static void
+amdgpu_register_type_parse_test ()
+{
+  {
+    /* A type that exercises flags and enums, in particular looking up an
+       existing enum type by name. */
+    const char *flags_type_str =
+      "flags32_t mode { \
+	 enum fp_round { \
+	   NEAREST_EVEN = 0, \
+	   PLUS_INF  = 1, \
+	   MINUS_INF = 2, \
+	   ZERO      = 3 \
+	 } FP_ROUND.32 @0-1; \
+	 enum fp_round FP_ROUND.64_16 @2-3; \
+	 enum fp_denorm { \
+	   FLUSH_SRC_DST = 0, \
+	   FLUSH_DST     = 1, \
+	   FLUSH_SRC     = 2, \
+	   FLUSH_NONE    = 3 \
+	 } FP_DENORM.32 @4-5; \
+	 enum fp_denorm FP_DENORM.64_16 @6-7; \
+	 bool DX10_CLAMP @8; \
+	 bool IEEE @9; \
+	 bool LOD_CLAMPED @10; \
+	 bool DEBUG_EN @11; \
+	 bool EXCP_EN.INVALID @12; \
+	 bool EXCP_EN.DENORM @13; \
+	 bool EXCP_EN.DIV0 @14; \
+	 bool EXCP_EN.OVERFLOW @15; \
+	 bool EXCP_EN.UNDERFLOW @16; \
+	 bool EXCP_EN.INEXACT @17; \
+	 bool EXCP_EN.INT_DIV0 @18; \
+	 bool EXCP_EN.ADDR_WATCH @19; \
+	 bool FP16_OVFL @23; \
+	 bool POPS_PACKER0 @24; \
+	 bool POPS_PACKER1 @25; \
+	 bool DISABLE_PERF @26; \
+	 bool GPR_IDX_EN @27; \
+	 bool VSKIP @28; \
+	 uint32_t CSP @29-31; \
+       }";
+    amd_dbgapi_register_type_map type_map;
+    const amd_dbgapi_register_type &type
+      = parse_amd_dbgapi_register_type (flags_type_str, type_map);
+
+    gdb_assert (type.kind () == amd_dbgapi_register_type::kind::FLAGS);
+
+    const auto &f = static_cast<const amd_dbgapi_register_type_flags &> (type);
+    gdb_assert (f.size () == 23);
+
+    /* Check the two "FP_ROUND" fields.  */
+    auto check_fp_round_field
+      = [] (const char *name, const amd_dbgapi_register_type_flags::field &field)
+	{
+	  gdb_assert (field.name == name);
+	  gdb_assert (field.type->kind ()
+		      == amd_dbgapi_register_type::kind::ENUM);
+
+	  const auto &e
+	    = static_cast<const amd_dbgapi_register_type_enum &> (*field.type);
+	  gdb_assert (e.size () == 4);
+	  gdb_assert (e[0].name == "NEAREST_EVEN");
+	  gdb_assert (e[0].value == 0);
+	  gdb_assert (e[3].name == "ZERO");
+	  gdb_assert (e[3].value == 3);
+	};
+
+    check_fp_round_field ("FP_ROUND.32", f[0]);
+    check_fp_round_field ("FP_ROUND.64_16", f[1]);
+
+    /* Check the "CSP" field.  */
+    gdb_assert (f[22].name == "CSP");
+    gdb_assert (f[22].type->kind () == amd_dbgapi_register_type::kind::INTEGER);
+
+    const auto &i
+      = static_cast<const amd_dbgapi_register_type_integer &> (*f[22].type);
+    gdb_assert (i.bit_size () == 32);
+    gdb_assert (i.is_unsigned ());
+  }
+
+  {
+    /* Test the vector type.  */
+    const char *vector_type_str = "int32_t[64]";
+    amd_dbgapi_register_type_map type_map;
+    const amd_dbgapi_register_type &type
+      = parse_amd_dbgapi_register_type (vector_type_str, type_map);
+
+    gdb_assert (type.kind () == amd_dbgapi_register_type::kind::VECTOR);
+
+    const auto &v = static_cast<const amd_dbgapi_register_type_vector &> (type);
+    gdb_assert (v.count () == 64);
+
+    const auto &et = v.element_type ();
+    gdb_assert (et.kind () == amd_dbgapi_register_type::kind::INTEGER);
+
+    const auto &i = static_cast<const amd_dbgapi_register_type_integer &> (et);
+    gdb_assert (i.bit_size () == 32);
+    gdb_assert (!i.is_unsigned ());
+  }
+}
+
+#endif
+
+void _initialize_amdgpu_tdep ();
+
+void
+_initialize_amdgpu_tdep ()
+{
+  gdbarch_register (bfd_arch_amdgcn, amdgpu_gdbarch_init, NULL,
+		    amdgpu_supports_arch_info);
+#if defined GDB_SELF_TEST
+  selftests::register_test ("amdgpu-register-type-parse-flags-fields",
+			    amdgpu_register_type_parse_test);
+#endif
+}
diff --git a/gdb/amdgpu-tdep.h b/gdb/amdgpu-tdep.h
new file mode 100644
index 00000000000..24081ebaf7d
--- /dev/null
+++ b/gdb/amdgpu-tdep.h
@@ -0,0 +1,93 @@ 
+/* Target-dependent code for the AMDGPU architectures.
+
+   Copyright (C) 2019-2022 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef AMDGPU_TDEP_H
+#define AMDGPU_TDEP_H
+
+#include "gdbarch.h"
+
+#include <amd-dbgapi/amd-dbgapi.h>
+#include <unordered_map>
+
+/* Provide std::unordered_map::Hash for amd_dbgapi_register_id_t.  */
+struct register_id_hash
+{
+  size_t
+  operator() (const amd_dbgapi_register_id_t &register_id) const
+  {
+    return std::hash<decltype (register_id.handle)> () (register_id.handle);
+  }
+};
+
+/* Provide std::unordered_map::Equal for amd_dbgapi_register_id_t.  */
+struct register_id_equal_to
+{
+  bool
+  operator() (const amd_dbgapi_register_id_t &lhs,
+	      const amd_dbgapi_register_id_t &rhs) const
+  {
+    return std::equal_to<decltype (lhs.handle)> () (lhs.handle, rhs.handle);
+  }
+};
+
+/* AMDGPU architecture specific information.  */
+struct amdgpu_gdbarch_tdep : gdbarch_tdep_base
+{
+  /* This architecture's breakpoint instruction.  */
+  gdb::unique_xmalloc_ptr<gdb_byte> breakpoint_instruction_bytes;
+  size_t breakpoint_instruction_size;
+
+  /* A vector of register_ids indexed by their equivalent gdb regnum.  */
+  std::vector<amd_dbgapi_register_id_t> register_ids;
+
+  /* A vector of register_properties indexed by their equivalent gdb regnum.  */
+  std::vector<amd_dbgapi_register_properties_t> register_properties;
+
+  /* A vector of register names indexed by their equivalent gdb regnum.  */
+  std::vector<std::string> register_names;
+
+  /* A vector of register types created from the amd-dbgapi type strings,
+     indexed by their equivalent gdb regnum.  These are computed lazily by
+     amdgpu_register_type, entries that haven't been computed yet are
+     nullptr.  */
+  std::vector<type *> register_types;
+
+  /* A vector of GDB register numbers indexed by DWARF register number.
+
+     Unused DWARF register numbers map to value -1.  */
+  std::vector<int> dwarf_regnum_to_gdb_regnum;
+
+  /* A map of gdb regnums keyed by they equivalent register_id.  */
+  std::unordered_map<amd_dbgapi_register_id_t, int, register_id_hash,
+		     register_id_equal_to>
+    regnum_map;
+
+  /* A map of register_class_ids keyed by their name.  */
+  std::unordered_map<std::string, amd_dbgapi_register_class_id_t>
+    register_class_map;
+};
+
+/* Return true if GDBARCH is of an AMDGPU architecture.  */
+bool is_amdgpu_arch (struct gdbarch *gdbarch);
+
+/* Return the amdgpu-specific data associated to ARCH.  */
+
+amdgpu_gdbarch_tdep *get_amdgpu_gdbarch_tdep (gdbarch *arch);
+
+#endif /* AMDGPU_TDEP_H */
diff --git a/gdb/configure b/gdb/configure
index 1b07e1e798c..2f327c8e236 100755
--- a/gdb/configure
+++ b/gdb/configure
@@ -774,11 +774,10 @@  PKGVERSION
 CODESIGN_CERT
 DEBUGINFOD_LIBS
 DEBUGINFOD_CFLAGS
-PKG_CONFIG_LIBDIR
-PKG_CONFIG_PATH
-PKG_CONFIG
 HAVE_NATIVE_GCORE_TARGET
 TARGET_OBS
+AMD_DBGAPI_LIBS
+AMD_DBGAPI_CFLAGS
 ENABLE_BFD_64_BIT_FALSE
 ENABLE_BFD_64_BIT_TRUE
 subdirs
@@ -800,6 +799,9 @@  INCINTL
 LIBINTL_DEP
 LIBINTL
 USE_NLS
+PKG_CONFIG_LIBDIR
+PKG_CONFIG_PATH
+PKG_CONFIG
 CCDEPMODE
 DEPDIR
 am__leading_dot
@@ -913,6 +915,7 @@  with_auto_load_dir
 with_auto_load_safe_path
 enable_targets
 enable_64_bit_bfd
+with_amd_dbgapi
 enable_gdbmi
 enable_tui
 enable_gdbtk
@@ -984,11 +987,13 @@  CXXFLAGS
 CCC
 CPP
 CXXCPP
-MAKEINFO
-MAKEINFOFLAGS
 PKG_CONFIG
 PKG_CONFIG_PATH
 PKG_CONFIG_LIBDIR
+MAKEINFO
+MAKEINFOFLAGS
+AMD_DBGAPI_CFLAGS
+AMD_DBGAPI_LIBS
 DEBUGINFOD_CFLAGS
 DEBUGINFOD_LIBS
 YACC
@@ -1675,6 +1680,7 @@  Optional Packages:
                           [--with-auto-load-dir]
   --without-auto-load-safe-path
                           do not restrict auto-loaded files locations
+  --with-amd-dbgapi       support for the amd-dbgapi target (yes / no / auto)
   --with-debuginfod       Enable debuginfo lookups with debuginfod
                           (auto/yes/no)
   --with-libunwind-ia64   use libunwind frame unwinding for ia64 targets
@@ -1748,14 +1754,18 @@  Some influential environment variables:
   CXXFLAGS    C++ compiler flags
   CPP         C preprocessor
   CXXCPP      C++ preprocessor
-  MAKEINFO    Parent configure detects if it is of sufficient version.
-  MAKEINFOFLAGS
-              Parameters for MAKEINFO.
   PKG_CONFIG  path to pkg-config utility
   PKG_CONFIG_PATH
               directories to add to pkg-config's search path
   PKG_CONFIG_LIBDIR
               path overriding pkg-config's built-in search path
+  MAKEINFO    Parent configure detects if it is of sufficient version.
+  MAKEINFOFLAGS
+              Parameters for MAKEINFO.
+  AMD_DBGAPI_CFLAGS
+              C compiler flags for AMD_DBGAPI, overriding pkg-config
+  AMD_DBGAPI_LIBS
+              linker flags for AMD_DBGAPI, overriding pkg-config
   DEBUGINFOD_CFLAGS
               C compiler flags for DEBUGINFOD, overriding pkg-config
   DEBUGINFOD_LIBS
@@ -11451,7 +11461,7 @@  else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11454 "configure"
+#line 11464 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11557,7 +11567,7 @@  else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11560 "configure"
+#line 11570 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -17537,6 +17547,130 @@  else CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type
 fi
 
 
+# Since the first call to PKG_CHECK_MODULES may not happen (is guarded by
+# a condition), we must call PKG_PROG_PKG_CONFIG explicitly to probe for
+# pkg-config.
+
+
+
+
+
+
+
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+	if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args.
+set dummy ${ac_tool_prefix}pkg-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_PKG_CONFIG+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $PKG_CONFIG in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+PKG_CONFIG=$ac_cv_path_PKG_CONFIG
+if test -n "$PKG_CONFIG"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5
+$as_echo "$PKG_CONFIG" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_path_PKG_CONFIG"; then
+  ac_pt_PKG_CONFIG=$PKG_CONFIG
+  # Extract the first word of "pkg-config", so it can be a program name with args.
+set dummy pkg-config; ac_word=$2
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+$as_echo_n "checking for $ac_word... " >&6; }
+if ${ac_cv_path_ac_pt_PKG_CONFIG+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  case $ac_pt_PKG_CONFIG in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
+    ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
+    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG
+if test -n "$ac_pt_PKG_CONFIG"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5
+$as_echo "$ac_pt_PKG_CONFIG" >&6; }
+else
+  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+
+  if test "x$ac_pt_PKG_CONFIG" = x; then
+    PKG_CONFIG=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
+$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    PKG_CONFIG=$ac_pt_PKG_CONFIG
+  fi
+else
+  PKG_CONFIG="$ac_cv_path_PKG_CONFIG"
+fi
+
+fi
+if test -n "$PKG_CONFIG"; then
+	_pkg_min_version=0.9.0
+	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5
+$as_echo_n "checking pkg-config is at least version $_pkg_min_version... " >&6; }
+	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
+		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+	else
+		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+		PKG_CONFIG=""
+	fi
+fi
+
 
 CONFIG_OBS=
 CONFIG_DEPS=
@@ -17997,6 +18131,157 @@  if test x${all_targets} = xtrue; then
   fi
 fi
 
+# AMD debugger API support.
+
+
+# Check whether --with-amd-dbgapi was given.
+if test "${with_amd_dbgapi+set}" = set; then :
+  withval=$with_amd_dbgapi;
+	   case $withval in
+	     yes | no | auto)
+	       ;;
+	     *)
+	       as_fn_error $? "bad value $withval for --with-amd-dbgapi" "$LINENO" 5
+	       ;;
+	   esac
+
+else
+  with_amd_dbgapi=auto
+fi
+
+
+# If the user passes --without-amd-dbgapi but also explicitly enables a target
+# that requires amd-dbgapi, it is an error.
+if test "$with_amd_dbgapi" = no -a "$gdb_require_amd_dbgapi" = true; then
+  as_fn_error $? "an explicitly enabled target requires amd-dbgapi, but amd-dbgapi is explicitly disabled" "$LINENO" 5
+fi
+
+# Look for amd-dbgapi if:
+#
+#   - a target architecture requiring it has explicitly been enabled, or
+#   - --enable-targets=all was provided and the user did not explicitly disable
+#     amd-dbgapi support
+if test "$gdb_require_amd_dbgapi" = true \
+     -o \( "$all_targets" = true -a "$with_amd_dbgapi" != no \); then
+  # amd-dbgapi version 0.68 is part of ROCm 5.4.  There is no guarantee of API
+  # stability until amd-dbgapi hits 1.0, but for convenience, still check for
+  # greater or equal that version.  It can be handy when testing with a newer
+  # version of the library.
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for amd-dbgapi >= 0.68.0" >&5
+$as_echo_n "checking for amd-dbgapi >= 0.68.0... " >&6; }
+
+if test -n "$AMD_DBGAPI_CFLAGS"; then
+    pkg_cv_AMD_DBGAPI_CFLAGS="$AMD_DBGAPI_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"amd-dbgapi >= 0.68.0\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "amd-dbgapi >= 0.68.0") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  pkg_cv_AMD_DBGAPI_CFLAGS=`$PKG_CONFIG --cflags "amd-dbgapi >= 0.68.0" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
+else
+  pkg_failed=yes
+fi
+ else
+    pkg_failed=untried
+fi
+if test -n "$AMD_DBGAPI_LIBS"; then
+    pkg_cv_AMD_DBGAPI_LIBS="$AMD_DBGAPI_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"amd-dbgapi >= 0.68.0\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "amd-dbgapi >= 0.68.0") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  pkg_cv_AMD_DBGAPI_LIBS=`$PKG_CONFIG --libs "amd-dbgapi >= 0.68.0" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
+else
+  pkg_failed=yes
+fi
+ else
+    pkg_failed=untried
+fi
+
+if test $pkg_failed = no; then
+  pkg_save_LDFLAGS="$LDFLAGS"
+  LDFLAGS="$LDFLAGS $pkg_cv_AMD_DBGAPI_LIBS"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+
+else
+  pkg_failed=yes
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+  LDFLAGS=$pkg_save_LDFLAGS
+fi
+
+
+
+if test $pkg_failed = yes; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi
+        if test $_pkg_short_errors_supported = yes; then
+	        AMD_DBGAPI_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "amd-dbgapi >= 0.68.0" 2>&1`
+        else
+	        AMD_DBGAPI_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "amd-dbgapi >= 0.68.0" 2>&1`
+        fi
+	# Put the nasty error message in config.log where it belongs
+	echo "$AMD_DBGAPI_PKG_ERRORS" >&5
+
+	has_amd_dbgapi=no
+elif test $pkg_failed = untried; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+	has_amd_dbgapi=no
+else
+	AMD_DBGAPI_CFLAGS=$pkg_cv_AMD_DBGAPI_CFLAGS
+	AMD_DBGAPI_LIBS=$pkg_cv_AMD_DBGAPI_LIBS
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+	has_amd_dbgapi=yes
+fi
+
+  if test "$has_amd_dbgapi" = "yes"; then
+    TARGET_OBS="$TARGET_OBS amd-dbgapi-target.o"
+
+    # If --enable-targets=all was provided, use the list of all files depending
+    # on amd-dbgapi that is hardcoded in the Makefile.  Else, the appropriate
+    # architecture entry in configure.tgt will have added the files to
+    # gdb_target_obs.
+    if test "$all_targets" = true; then
+      TARGET_OBS="$TARGET_OBS \$(ALL_AMD_DBGAPI_TARGET_OBS)"
+    fi
+  elif test "$gdb_require_amd_dbgapi" = true -o "$with_amd_dbgapi" = yes; then
+    # amd-dbgapi was not found and...
+    #
+    #   - a target requiring it was explicitly enabled, or
+    #   - the user explicitly wants to enable amd-dbgapi
+    as_fn_error $? "amd-dbgapi is required, but cannot find an appropriate version: $AMD_DBGAPI_PKG_ERRORS" "$LINENO" 5
+  fi
+fi
+
 
 
 
@@ -18099,126 +18384,6 @@  esac
 
 # Handle optional debuginfod support
 
-
-
-
-
-
-
-if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
-	if test -n "$ac_tool_prefix"; then
-  # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args.
-set dummy ${ac_tool_prefix}pkg-config; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_path_PKG_CONFIG+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  case $PKG_CONFIG in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
-    ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
-    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-  ;;
-esac
-fi
-PKG_CONFIG=$ac_cv_path_PKG_CONFIG
-if test -n "$PKG_CONFIG"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5
-$as_echo "$PKG_CONFIG" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_path_PKG_CONFIG"; then
-  ac_pt_PKG_CONFIG=$PKG_CONFIG
-  # Extract the first word of "pkg-config", so it can be a program name with args.
-set dummy pkg-config; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if ${ac_cv_path_ac_pt_PKG_CONFIG+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  case $ac_pt_PKG_CONFIG in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
-    ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
-    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-  ;;
-esac
-fi
-ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG
-if test -n "$ac_pt_PKG_CONFIG"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5
-$as_echo "$ac_pt_PKG_CONFIG" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-  if test "x$ac_pt_PKG_CONFIG" = x; then
-    PKG_CONFIG=""
-  else
-    case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
-    PKG_CONFIG=$ac_pt_PKG_CONFIG
-  fi
-else
-  PKG_CONFIG="$ac_cv_path_PKG_CONFIG"
-fi
-
-fi
-if test -n "$PKG_CONFIG"; then
-	_pkg_min_version=0.9.0
-	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5
-$as_echo_n "checking pkg-config is at least version $_pkg_min_version... " >&6; }
-	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
-		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-	else
-		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-		PKG_CONFIG=""
-	fi
-fi
-
 # Handle optional debuginfod support
 
 # Check whether --with-debuginfod was given.
diff --git a/gdb/configure.ac b/gdb/configure.ac
index cecf4a46bec..86efe23111a 100644
--- a/gdb/configure.ac
+++ b/gdb/configure.ac
@@ -61,6 +61,11 @@  AX_CXX_COMPILE_STDCXX(11, , mandatory)
 ZW_CREATE_DEPDIR
 ZW_PROG_COMPILER_DEPENDENCIES([CC])
 
+# Since the first call to PKG_CHECK_MODULES may not happen (is guarded by
+# a condition), we must call PKG_PROG_PKG_CONFIG explicitly to probe for
+# pkg-config.
+PKG_PROG_PKG_CONFIG
+
 dnl List of object files and targets accumulated by configure.
 
 CONFIG_OBS=
@@ -241,6 +246,53 @@  if test x${all_targets} = xtrue; then
   fi
 fi
 
+# AMD debugger API support.
+
+AC_ARG_WITH([amd-dbgapi],
+	    [AS_HELP_STRING([--with-amd-dbgapi],
+			    [support for the amd-dbgapi target (yes / no / auto)])],
+	    [GDB_CHECK_YES_NO_AUTO_VAL([$withval], [--with-amd-dbgapi])],
+	    [with_amd_dbgapi=auto])
+
+# If the user passes --without-amd-dbgapi but also explicitly enables a target
+# that requires amd-dbgapi, it is an error.
+if test "$with_amd_dbgapi" = no -a "$gdb_require_amd_dbgapi" = true; then
+  AC_MSG_ERROR([an explicitly enabled target requires amd-dbgapi, but amd-dbgapi is explicitly disabled])
+fi
+
+# Look for amd-dbgapi if:
+#
+#   - a target architecture requiring it has explicitly been enabled, or
+#   - --enable-targets=all was provided and the user did not explicitly disable
+#     amd-dbgapi support
+if test "$gdb_require_amd_dbgapi" = true \
+     -o \( "$all_targets" = true -a "$with_amd_dbgapi" != no \); then
+  # amd-dbgapi version 0.68 is part of ROCm 5.4.  There is no guarantee of API
+  # stability until amd-dbgapi hits 1.0, but for convenience, still check for
+  # greater or equal that version.  It can be handy when testing with a newer
+  # version of the library.
+  PKG_CHECK_MODULES([AMD_DBGAPI], [amd-dbgapi >= 0.68.0],
+		    [has_amd_dbgapi=yes], [has_amd_dbgapi=no])
+
+  if test "$has_amd_dbgapi" = "yes"; then
+    TARGET_OBS="$TARGET_OBS amd-dbgapi-target.o"
+
+    # If --enable-targets=all was provided, use the list of all files depending
+    # on amd-dbgapi that is hardcoded in the Makefile.  Else, the appropriate
+    # architecture entry in configure.tgt will have added the files to
+    # gdb_target_obs.
+    if test "$all_targets" = true; then
+      TARGET_OBS="$TARGET_OBS \$(ALL_AMD_DBGAPI_TARGET_OBS)"
+    fi
+  elif test "$gdb_require_amd_dbgapi" = true -o "$with_amd_dbgapi" = yes; then
+    # amd-dbgapi was not found and...
+    #
+    #   - a target requiring it was explicitly enabled, or
+    #   - the user explicitly wants to enable amd-dbgapi
+    AC_MSG_ERROR([amd-dbgapi is required, but cannot find an appropriate version: $AMD_DBGAPI_PKG_ERRORS])
+  fi
+fi
+
 AC_SUBST(TARGET_OBS)
 AC_SUBST(HAVE_NATIVE_GCORE_TARGET)
 
diff --git a/gdb/configure.tgt b/gdb/configure.tgt
index e84e222ba0d..d5b7dd1e7d7 100644
--- a/gdb/configure.tgt
+++ b/gdb/configure.tgt
@@ -2,13 +2,20 @@ 
 # invoked from the autoconf generated configure script.
 
 # This file sets the following shell variables:
-#  gdb_target_obs	target-specific object files to use
-#  gdb_sim		simulator library for target
-#  gdb_osabi		default OS ABI to use with target
-#  gdb_have_gcore	set to "true"/"false" if this target can run gcore
+#  gdb_target_obs         target-specific object files to use
+#  gdb_sim                simulator library for target
+#  gdb_osabi              default OS ABI to use with target
+#  gdb_have_gcore         set to "true"/"false" if this target can run gcore
+#  gdb_require_amd_dbgapi set to "true" if this target requires the amd-dbgapi
+#                         target
 
 # NOTE: Every file added to a gdb_target_obs variable for any target here
-#       must also be added to either ALL_TARGET_OBS or ALL_64_TARGET_OBS
+#       must also be added to either:
+#
+#         - ALL_TARGET_OBS
+#         - ALL_64_TARGET_OBS
+#         - ALL_AMD_DBGAPI_TARGET_OBS
+#
 #	in Makefile.in!
 
 case $targ in
@@ -161,6 +168,12 @@  alpha*-*-openbsd*)
 			alpha-netbsd-tdep.o alpha-obsd-tdep.o netbsd-tdep.o"
 	;;
 
+amdgcn*-*-*)
+	# Target: AMDGPU
+	gdb_require_amd_dbgapi=true
+	gdb_target_obs="amdgpu-tdep.o solib-rocm.o"
+	;;
+
 am33_2.0*-*-linux*)
 	# Target: Matsushita mn10300 (AM33) running Linux
 	gdb_target_obs="mn10300-tdep.o mn10300-linux-tdep.o linux-tdep.o \
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index 5b566669975..820de7d5119 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -7021,6 +7021,8 @@  signal happened.  @value{GDBN} alerts you to the context switch with a
 message such as @samp{[Switching to Thread @var{n}]} to identify the
 thread.  
 
+@node set scheduler-locking
+
 On some OSes, you can modify @value{GDBN}'s default behavior by
 locking the OS scheduler to allow only a single thread to run.
 
@@ -25822,6 +25824,7 @@  all uses of @value{GDBN} with the architecture, both native and cross.
 * Nios II::
 * Sparc64::
 * S12Z::
+* AMD GPU::            @acronym{AMD GPU} architectures
 @end menu
 
 @node AArch64
@@ -26310,6 +26313,721 @@  This command displays the current value of the microprocessor's
 BDCCSR register.
 @end table
 
+@node AMD GPU
+@subsection @acronym{AMD GPU}
+@cindex @acronym{AMD GPU} support
+
+@value{GDBN} supports commercially available @acronym{AMD GPU}
+devices when the @url{https://docs.amd.com/, @acronym{AMD ROCm}}
+platform is installed.
+
+@subsubsection @acronym{AMD GPU} Architectures
+
+The following @acronym{AMD GPU} architectures are supported:
+
+@table @emph
+
+@item @samp{gfx900}
+AMD Vega 10 devices, displayed as @samp{vega10} by @value{GDBN}.
+
+@item @samp{gfx906}
+AMD Vega 7nm devices, displayed as @samp{vega20} by @value{GDBN}.
+
+@item @samp{gfx908}
+AMD Instinct@registeredsymbol{} MI100 accelerator devices, displayed as
+@samp{arcturus} by @value{GDBN}.
+
+@item @samp{gfx90a}
+Aldebaran devices, displayed as @samp{aldebaran} by @value{GDBN}.
+
+@item @samp{gfx1010}
+Navi10 devices, displayed as @samp{navi10} by @value{GDBN}.
+
+@item @samp{gfx1011}
+Navi12 devices, displayed as @samp{navi12} by @value{GDBN}.
+
+@item @samp{gfx1012}
+Navi14 devices, displayed as @samp{navi14} by @value{GDBN}.
+
+@item @samp{gfx1030}
+Sienna Cichlid devices, displayed as @samp{sienna_cichlid} by @value{GDBN}.
+
+@item @samp{gfx1031}
+Navy Flounder devices, displayed as @samp{navy_flounder} by @value{GDBN}.
+
+@item @samp{gfx1032}
+Dimgrey Cavefish devices, displayed as @samp{dimgrey_cavefish} by
+@value{GDBN}.
+
+@end table
+
+@subsubsection @acronym{AMD ROCm} Source Languages
+
+@value{GDBN} supports the following source languages:
+
+@table @emph
+
+@item HIP
+The
+@url{https://github.com/ROCm-Developer-Tools/HIP/blob/master/docs/markdown/hip_kernel_language.md,
+HIP Programming Language} is supported.
+
+When compiling, the @w{@option{-g}} option should be used to produce
+debugging information suitable for use by @value{GDBN}.  The
+@w{@option{--offload-arch}} option is used to specify the @acronym{AMD
+GPU} chips that the executable is required to support.  For example,
+to compile a HIP program that can utilize ``Vega 10'' and ``Vega 7nm''
+@acronym{AMD GPU} devices, with no optimization:
+
+@smallexample
+hipcc -O0 -g --offload-arch=gfx900 --offload-arch=gfx906 bit_extract.cpp -o bit_extract
+@end smallexample
+
+@item Assembly Code
+Assembly code kernels are supported.
+
+@item Other Languages
+Other languages, including OpenCL and Fortran, are currently supported
+as the minimal pseudo-language, provided they are compiled specifying
+at least the @acronym{AMD GPU} Code Object V3 and DWARF 4 formats.
+@xref{Unsupported Languages}.
+
+@end table
+
+@subsubsection @acronym{AMD GPU} Device Driver and @acronym{AMD ROCm} Runtime
+
+@value{GDBN} requires a compatible @acronym{AMD GPU} device driver to
+be installed.  A warning message is displayed if either the device
+driver version or the version of the debug support it implements is
+unsupported.  For example,
+
+@smallexample
+amd-dbgapi: warning: AMD GPU driver's version 1.6 not supported (version 2.x where x >= 1 required)
+amd-dbgapi: warning: AMD GPU driver's debug support version 9.0 not supported (version 10.x where x >= 1) required
+@end smallexample
+
+@value{GDBN} will continue to function except no @acronym{AMD GPU}
+debugging will be possible.
+
+@value{GDBN} requires each agent to have compatible firmware installed
+by the device driver.  A warning message is displayed if unsupported
+firmware is detected.  For example,
+
+@smallexample
+amd-dbgapi: warning: AMD GPU gpu_id 17619's firmware version 458 not supported (version >= 555 required)
+@end smallexample
+
+@value{GDBN} will continue to function except no @acronym{AMD GPU}
+debugging will be possible on the agent.
+
+@value{GDBN} requires a compatible @acronym{AMD ROCm} runtime to be
+loaded in order to detect @acronym{AMD GPU} code objects and
+wavefronts.  A warning message is displayed if an unsupported
+@acronym{AMD ROCm} runtime is detected, or there is an error or
+restriction that prevents debugging.  For example,
+
+@smallexample
+amd_dbgapi: warning: AMD GPU runtime's r_debug::r_version 5 not supported (r_debug::r_version >= 6 required)
+@end smallexample
+
+@value{GDBN} will continue to function except no @acronym{AMD GPU}
+debugging will be possible.
+
+@subsubsection @acronym{AMD GPU} Wavefronts
+
+An @acronym{AMD GPU} wavefront is represented in @value{GDBN} as a
+thread.
+
+An @acronym{AMD GPU} wavefront can enter the halt state by:
+
+@itemize @bullet{}
+
+@item
+Executing a @code{S_SETHALT 1} instruction.
+
+@item
+Using the @samp{set $status} command to change the bit in the
+@acronym{AMD GPU} wavefront's register that controls the halt state.
+
+@item
+Delivering a signal to the wavefront (@pxref{AMD GPU Signals, ,
+@acronym{AMD GPU} Signals}).
+
+@end itemize
+
+When a wavefront is in the halt state, it executes no further
+instructions.  In addition, a wavefront that is associated with a
+queue that is in the queue error state (@pxref{AMD GPU Signals, ,
+@acronym{AMD GPU} Signals}) is inhibited from executing further
+instructions.  Continuing such wavefronts will not hit any breakpoints
+nor report completion of a single step command.  If necessary,
+@samp{Ctrl-C} can be used to cancel the command.
+
+Note that some @acronym{AMD GPU} architectures may have restrictions
+on providing information about @acronym{AMD GPU} wavefronts created
+when @value{GDBN} is not attached (@pxref{AMD GPU Attaching
+Restrictions, , @acronym{AMD GPU} Attaching Restrictions}).
+
+When scheduler-locking is in effect (@pxref{set scheduler-locking}),
+new wavefronts created by the resumed thread (either CPU thread or GPU
+wavefront) are held in the halt state.
+
+@subsubsection @acronym{AMD GPU} Registers
+
+@acronym{AMD GPU} supports the following @var{reggroup} values for the
+@samp{info registers @var{reggroup} @dots{}} command:
+
+@itemize @bullet
+
+@item
+general
+
+@item
+vector
+
+@item
+scalar
+
+@item
+system
+
+@end itemize
+
+The number of scalar and vector registers is configured when a
+wavefront is created.  Only allocated registers are displayed.
+
+Scalar registers are reported as 32-bit signed integer values.
+
+Vector registers are reported as a wavefront size vector of signed
+32-bit values.
+
+The @code{pc} is reported as a function pointer value.
+
+The @code{exec} register is reported as a wavefront size-bit unsigned
+integer value.
+
+The @code{vcc} and @code{xnack_mask} pseudo registers are reported as
+a wavefront size-bit unsigned integer value.
+
+The @code{flat_scratch} pseudo register is reported as a 64-bit
+unsigned integer value.
+
+The @code{mode}, @code{status}, and @code{trapsts} registers are
+reported as flag values.  For example,
+
+@smallexample
+(gdb) p $mode
+$1 = [ FP_ROUND.32=NEAREST_EVEN FP_ROUND.64_16=NEAREST_EVEN FP_DENORM.32=FLUSH_NONE FP_DENORM.64_16=FLUSH_NONE DX10_CLAMP IEEE CSP=0 ]
+(gdb) p $status
+$2 = [ SPI_PRIO=0 USER_PRIO=0 TRAP_EN VCCZ VALID ]
+(gdb) p $trapsts
+$3 = [ EXCP_CYCLE=0 DP_RATE=FULL ]
+@end smallexample
+
+Use the @samp{ptype} command to see the type of any register.
+
+@subsubsection @acronym{AMD GPU} Code Objects
+
+The @samp{info sharedlibrary} command will show the @acronym{AMD GPU} code objects
+together with the CPU code objects.  For example:
+
+@smallexample
+(@value{GDBP}) info sharedlibrary
+From                To                  Syms Read   Shared Object Library
+0x00007fd120664ac0  0x00007fd120682790  Yes (*)     /lib64/ld-linux-x86-64.so.2
+...
+0x00007fd0125d8ec0  0x00007fd015f21630  Yes (*)     /opt/rocm-3.5.0/hip/lib/../../lib/libamd_comgr.so
+0x00007fd11d74e870  0x00007fd11d75a868  Yes (*)     /lib/x86_64-linux-gnu/libtinfo.so.5
+0x00007fd11d001000  0x00007fd11d00173c  Yes         file:///home/rocm/examples/bit_extract#offset=6477&size=10832
+0x00007fd11d008000  0x00007fd11d00adc0  Yes (*)     memory://95557/mem#offset=0x7fd0083e7f60&size=41416
+(*): Shared library is missing debugging information.
+(@value{GDBP})
+@end smallexample
+
+The code object path for @acronym{AMD GPU} code objects is shown as a
+@acronym{URI, Universal Location Identifier} with a syntax defined by
+the following BNF syntax:
+
+@smallexample
+code_object_uri ::== file_uri | memory_uri
+file_uri        ::== "file://" file_path [ range_specifier ]
+memory_uri      ::== "memory://" process_id range_specifier
+range_specifier ::== [ "#" | "?" ] "offset=" number "&" "size=" number
+file_path       ::== URI_ENCODED_OS_FILE_PATH
+process_id      ::== DECIMAL_NUMBER
+number          ::== HEX_NUMBER | DECIMAL_NUMBER | OCTAL_NUMBER
+@end smallexample
+
+@noindent
+Where:
+
+@table @var
+
+@item number
+A C integral literal where hexadecimal values are prefixed by
+@samp{0x} or @samp{0X}, and octal values by @samp{0}.
+
+@item file_path
+The file's path specified as a URI encoded UTF-8 string.  In URI
+encoding, every character that is not:
+
+@itemize
+@item In the @samp{a-z}, @samp{A-Z}, @samp{0-9} ranges
+@item @samp{/}, @samp{_}, @samp{.}, @samp{~} or @samp{-}
+@end itemize
+
+is encoded as two uppercase hexadecimal digits proceeded by @samp{%}.
+Directories in the path are separated by @samp{/}.
+
+@item offset
+A 0-based byte offset to the start of the code object.  For a file
+URI, it is from the start of the file specified by the
+@var{file_path}, and if omitted defaults to 0. For a memory URI, it is
+the memory address and is required.
+
+@item size
+The number of bytes in the code object.  For a file URI, if omitted it
+defaults to the size of the file.  It is required for a memory URI.
+
+@item process_id
+The identity of the process owning the memory.  For Linux it is the C
+unsigned integral decimal literal for the process @var{pid}.
+
+@end table
+
+@acronym{AMD GPU} code objects are loaded into each @acronym{AMD GPU}
+device separately.  The @samp{info sharedlibrary} command will
+therefore show the same code object loaded multiple times.  As a
+consequence, setting a breakpoint in @acronym{AMD GPU} code will
+result in multiple breakpoint locations if there are multiple
+@acronym{AMD GPU} devices.
+
+If the source language runtime defers loading code objects until
+kernels are launched, then setting breakpoints may result in pending
+breakpoints that will be resolved when the code object is finally loaded.
+
+@subsubsection @acronym{AMD GPU} Entity Target Identifiers and Convenience Variables
+
+The @acronym{AMD GPU} entities have the following target identifier formats:
+
+@table @var
+
+@item Thread Target ID
+The @acronym{AMD GPU} thread target identifier (@var{systag}) string has the
+following format:
+
+@smallexample
+AMDGPU Wave @var{agent-id}:@var{queue-id}:@var{dispatch-id}:@var{wave-id} (@var{work-group-x},@var{work-group-y},@var{work-group-z})/@var{work-group-thread-index}
+@end smallexample
+
+It is used in the @samp{Target ID} column of the @samp{info threads} command.
+
+@end table
+
+@anchor{AMD GPU Signals}
+@subsubsection @acronym{AMD GPU} Signals
+
+@acronym{AMD GPU} wavefronts can raise the following signals when
+executing instructions:
+
+@table @code
+
+@item SIGILL
+Execution of an illegal instruction.
+
+@item SIGTRAP
+Execution of a @code{S_TRAP} instruction other than:
+
+@itemize @bullet{}
+
+@item
+@code{S_TRAP 1} which is used by @value{GDBN} to insert breakpoints.
+
+@item
+@code{S_TRAP 2} which raises @code{SIGABRT}.
+
+@end itemize
+
+Note that @code{S_TRAP 3} only raises a signal when @value{GDBN} is
+attached to the inferior.  Otherwise, it is treated as a no-operation.
+The compiler generates @code{S_TRAP 3} for the @code{llvm.debugtrap}
+intrinsic.
+
+@item SIGABRT
+Execution of a @code{S_TRAP 2} instruction.  The compiler generates
+@code{S_TRAP 2} for the @code{llvm.trap} intrinsic which is used for
+assertions.
+
+@item SIGFPE
+Execution of a floating point or integer instruction detects a
+condition that is enabled to raise a signal.  The conditions include:
+
+@itemize @bullet{}
+
+@item
+Floating point operation is invalid.
+
+@item
+Floating point operation had subnormal input that was rounded to zero.
+
+@item
+Floating point operation performed a division by zero.
+
+@item
+Floating point operation produced an overflow result.  The result was
+rounded to infinity.
+
+@item
+Floating point operation produced an underflow result.  A subnormal
+result was rounded to zero.
+
+@item
+Floating point operation produced an inexact result.
+
+@item
+Integer operation performed a division by zero.
+
+@end itemize
+
+By default, these conditions are not enabled to raise signals.  The
+@samp{set $mode} command can be used to change the @acronym{AMD GPU}
+wavefront's register that has bits controlling which conditions are
+enabled to raise signals.  The @samp{print $trapsts} command can be
+used to inspect which conditions have been detected even if they are
+not enabled to raise a signal.
+
+@item SIGBUS
+Execution of an instruction that accessed global memory using an
+address that is outside the virtual address range.
+
+@item SIGSEGV
+Execution of an instruction that accessed a global memory page that is
+either not mapped or accessed with incompatible permissions.
+
+@end table
+
+If a single instruction raises more than one signal, they will be
+reported one at a time each time the wavefront is continued.
+
+If any of these signals are delivered to the wavefront, it will cause
+the wavefront to enter the halt state and cause the @acronym{AMD ROCm}
+runtime to put the associated queue into the queue error state.  All
+wavefronts associated with a queue that is in the queue error state
+are inhibited from executing further instructions even if they are not
+in the halt state.  In addition, when the @acronym{AMD ROCm} runtime
+puts a queue into the queue error state it may invoke an application
+registered callback that could either abort the application or delete
+the queue which will delete any wavefronts associated with the queue.
+
+The @value{GDBN} signal-related commands (@pxref{Signals}) can be used
+to control when a signal is delivered to the inferior, what signal is
+delivered to the inferior, and even if a signal should not be
+delivered to the inferior.
+
+If the @samp{signal} or @samp{queue-signal} commands are used to
+deliver a signal other than those listed above to an @acronym{AMD GPU}
+wavefront, then the following error will be displayed when the
+wavefront is resumed:
+
+@smallexample
+Resuming with signal @var{signal} is not supported by this agent.
+@end smallexample
+
+The wavefront will not be resumed and no signal will be delivered.
+Use the @samp{signal} or @samp{queue-signal} commands to change the
+signal to deliver, or use @samp{signal 0} or @samp{queue-signal 0} to
+suppress delivering a signal.
+
+Note that some @acronym{AMD GPU} architectures may have restrictions
+on supressing delivering signals to a wavefront (@pxref{AMD GPU Signal
+Restrictions, , @acronym{AMD GPU} Signal Restrictions}).
+
+@subsubsection @acronym{AMD GPU} Logging
+
+The @samp{set debug amd-dbgapi-lib log-level @var{level}} command can be used
+to enable diagnostic messages from the @samp{amd-dbgapi} library.  The
+@samp{show debug amd-dbgapi-lib log-level} command displays the current
+@samp{amd-dbgapi} library log level.  @xref{set debug amd-dbgapi-lib}.
+
+The @samp{set debug amd-dbgapi} command can be used
+to enable diagnostic messages in the @samp{amd-dbgapi} target.  The
+@samp{show debug amd-dbgapi} command displays the current setting.
+@xref{set debug amd-dbgapi}.
+
+For example, the following will enable information messages and send
+the log to a new file:
+
+@smallexample
+(@value{GDBP}) set debug amd-dbgapi-lib log-level info
+(@value{GDBP}) set debug amd-dbgapi on
+(@value{GDBP}) set logging overwrite
+(@value{GDBP}) set logging file log.out
+(@value{GDBP}) set logging debugredirect on
+(@value{GDBP}) set logging enabled on
+@end smallexample
+
+If you want to print the log to both the console and a file, omit the
+@samp{set logging debugredirect on} command.  @xref{Logging Output}.
+
+@subsubsection @acronym{AMD GPU} Restrictions
+
+@value{GDBN} @acronym{AMD GPU} support is currently a prototype and
+has the following restrictions.  Future releases aim to address these
+restrictions.
+
+@enumerate
+
+@item
+Only @acronym{AMD GPU} Code Object V3 and above are supported.
+
+@item
+No support yet for @acronym{AMD GPU} core dumps.
+
+@item
+When in non-stop mode, wavefronts may not hit breakpoints inserted
+while not stopped, nor see memory updates made while not stopped,
+until the wavefront is next stopped.  Memory updated by non-stopped
+wavefronts may not be visible until the wavefront is next stopped.
+
+@item
+Single-stepping or resuming execution from an illegal instruction may
+execute differently in @value{GDBN} than on real hardware.
+
+@item
+On some @acronym{AMD GPU} devices, halting @acronym{AMD GPU} wavefronts
+in an inferior can result in preventing other processes from executing
+@acronym{AMD GPU} wavefronts.
+
+@item
+Some @acronym{AMD GPU} devices, such as @samp{gfx90a}, can be in use
+by multiple processes that are being debugged by @value{GDBN}.  For
+other devices the following warning message may be displayed.
+
+@smallexample
+amd-dbgapi: warning: At least one agent is busy (debugging may be enabled by another process)
+@end smallexample
+
+@value{GDBN} will continue to function except no @acronym{AMD GPU}
+debugging will be possible.
+
+The Linux @emph{cgroups} facility can be used to limit which
+@acronym{AMD GPU} devices are used by a process.  In order for a
+@value{GDBN} process to access the @acronym{AMD GPU} devices of the
+process it is debugging, the @acronym{AMD GPU} devices must be
+included in the @value{GDBN} process @emph{cgroup}.
+
+Therefore, multiple @value{GDBN} processes can each debug a process
+provided the @emph{cgroups} specify disjoint sets of @acronym{AMD GPU}
+devices.  However, a single @value{GDBN} process cannot debug multiple
+inferiors that use @acronym{AMD GPU} devices even if those inferiors
+have @emph{cgroups} that specify disjoint @acronym{AMD GPU} devices.
+This is because the @value{GDBN} process must have all the
+@acronym{AMD GPU} devices in its @emph{cgroups} and so will attempt to
+enable debugging for all AMD GPU devices for all inferiors it is
+debugging.
+
+It is suggested to use @emph{Docker} rather than @emph{cgroups}
+directly to limit the @acronym{AMD GPU} devices visible inside a
+container:
+
+@enumerate
+
+@item
+@samp{/dev/kfd} must be mapped into the container.
+
+@item
+The @samp{/dev/dri/renderD@var{<render-minor-number>}} and
+@samp{/dev/drm/card@var{<node-number>}} files corresponding to each
+AMD GPU device that is to be visible must be mapped into the
+container.  Note that non-@acronym{AMD GPU} devices may also be
+present.
+
+The @var{render-minor-number} for a device can be obtained by looking
+at the @samp{drm_render_minor} field value from:
+
+@smallexample
+cat /sys/class/kfd/kfd/topology/nodes/@var{<node-number>}/properties
+@end smallexample
+
+@item
+Make sure the container user is a member of the @var{render} group for
+Ubuntu 20.04 onward and the @var{video} group for all other
+distributions.
+
+@item
+Specify the @samp{--cap-add=SYS_PTRACE} and
+@samp{--security-opt seccomp=unconfined} options.
+
+@item
+Install the @acronym{AMD ROCm} packages in the container.  See
+@uref{https://github.com/RadeonOpenCompute/ROCm-docker}.
+
+@end enumerate
+
+All processes running in the container will see the same subset of
+devices.  By having two containers with non-overlapping sets of AMD
+GPUs, it is possible to use @value{GDBN} in both containers at the
+same time since each @acronym{AMD GPU} device will only have one
+@value{GDBN} process accessing it.
+
+For example:
+
+@smallexample
+docker run -it --rm --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
+    --device=/dev/kfd --device=/dev/drm/card0 --device=/dev/dri/renderD128 \
+    --group-add render ubuntu:22.04 /bin/bash
+@end smallexample
+
+@item
+The HIP runtime currently performs deferred code object loading by
+default.  @acronym{AMD GPU} code objects are not loaded until the
+first kernel is launched.  Before then, all breakpoints have to be set
+as pending breakpoints.
+
+If source line positions are used that only correspond to source lines
+in unloaded code objects, then @value{GDBN} may not set pending
+breakpoints, and instead set breakpoints in unpredictable places of
+the loaded code objects if they contain code from the same file.  This
+can result in unexpected breakpoint hits being reported.  When the
+code object containing the source lines is loaded, the incorrect
+breakpoints will be removed and replaced by the correct ones.  This
+problem can be avoided by only setting breakpoints in unloaded code
+objects using symbol or function names.
+
+The @code{HIP_ENABLE_DEFERRED_LOADING} environment variable can be
+used to disable deferred code object loading by the HIP runtime.  This
+ensures all code objects will be loaded when the inferior reaches the
+beginning of the @code{main} function.
+
+For example,
+
+@smallexample
+export HIP_ENABLE_DEFERRED_LOADING=0
+@end smallexample
+
+@emph{Note:}  If deferred code object loading is disabled and the
+application performs a @code{fork}, then the program may crash.
+
+@emph{Note:}  Disabling code object loading can result in errors being
+reported when executing @value{GDBN} due to open file limitations when
+the application contains a large number of embedded device code
+objects.  With deferred code object loading enabled, only the device
+code objects actually invoked are loaded, and so @value{GDBN} opens
+fewer files.
+
+@item
+Watchpoints are not yet supported for @acronym{AMD GPU} devices.
+
+@item When single stepping there can be times when GDB appears to wait
+indefinitely for the single step to complete.  This can happen if the wave
+being single stepped is context switched off the GPU and cannot be restored due
+to another process or other hardware limitations.  If this happens, โ€˜Ctrl-Cโ€™ can
+be used to cancel the single step command, and commands used to switch to
+another thread or to allow other threads to complete.
+
+@item
+If no CPU thread is running, then @samp{Ctrl-C} is not able to stop
+@acronym{AMD GPU} threads.  This can happen for example if you enable
+@code{scheduler-locking} after the whole program stopped, and then
+resume an @acronym{AMD GPU} thread.  For example:
+
+@smallexample
+Thread 6 hit Breakpoint 1, with lanes [0-63], kernel () at test.cpp:38
+38          size_t l = 0;
+(@value{GDBP}) info threads
+  Id   Target Id                            Frame
+  1    Thread 0x7ffff6493880 (LWP 2222574)  0x00007ffff6cb989b in sched_yield () at ../sysdeps/unix/syscall-template.S:78
+  2    Thread 0x7ffff6492700 (LWP 2222582)  0x00007ffff6ccb50b in ioctl () at ../sysdeps/unix/syscall-template.S:78
+  4    Thread 0x7ffff5aff700 (LWP 2222584)  0x00007ffff6ccb50b in ioctl () at ../sysdeps/unix/syscall-template.S:78
+  5    Thread 0x7ffff515d700 (LWP 2222585)  0x00007ffff6764d81 in rocr::core::InterruptSignal::WaitRelaxed() from /opt/rocm/lib/libhsa-runtime64.so.1
+* 6    AMDGPU Wave 1:1:1:1 (0,0,0)/0        kernel () at test.cpp:38
+(@value{GDBP}) del 1
+(@value{GDBP}) set scheduler-locking on
+(@value{GDBP}) c
+Continuing.
+^C
+@end smallexample
+
+Above, @value{GDBN} does not respond to @samp{Ctrl-C}.  The only way
+to unblock the situation is to kill the @value{GDBN} process.
+
+@item
+@acronym{AMD GPU} target does not currently support calling inferior
+functions.
+
+@item
+@acronym{AMD GPU} debugging is not supported by @code{gdbserver}.
+
+@item
+No language specific support for Fortran or OpenCL.  No OpenMP
+language extension support for C, C++, or Fortran.
+
+@item
+@value{GDBN} support for @acronym{AMD GPU} devices is not currently
+available under virtualization.
+
+@anchor{AMD GPU Signal Restrictions}
+@item
+Suppressing delivering some signals to a wavefront for some
+@acronym{AMD GPU} architectures may not prevent the @acronym{AMD ROCm}
+runtime putting the associated queue into the queue error state.  For
+example, suppressing the @code{SIGSEGV} signal may prevent the
+wavefront from being put in the halt state, but the @acronym{AMD ROCm}
+runtime may still put the associated queue into the queue error state.
+
+Suppressing delivering some signals, such as @code{SIGSEGV}, for a
+wavefront may also suppress the same signal raised by other
+@acronym{AMD GPU} hardware such as from @acronym{DMA} or from the
+@acronym{packet processor}, preventing the @acronym{AMD ROCm} runtime
+being notified.
+
+@xref{AMD GPU Signals, , @acronym{AMD GPU} Signals}.
+
+@anchor{AMD GPU Attaching Restrictions}
+@item
+By default, for some architectures, the @acronym{AMD GPU} device
+driver causes all @acronym{AMD GPU} wavefronts created when
+@value{GDBN} is not attached to be unable to report the dispatch
+associated with the wavefront, or the wavefront's work-group
+position.  The @samp{info threads} command will display this
+missing information with a @samp{?}.
+
+For example,
+
+@smallexample
+(gdb) info threads
+  Id   Target Id                                       Frame
+* 1    Thread 0x7ffff6987840 (LWP 62056) "bit_extract" 0x00007ffff6da489b in sched_yield () at ../sysdeps/unix/syscall-template.S:78
+  2    Thread 0x7ffff6986700 (LWP 62064) "bit_extract" 0x00007ffff6db650b in ioctl () at ../sysdeps/unix/syscall-template.S:78
+  3    Thread 0x7ffff5f7f700 (LWP 62066) "bit_extract" 0x00007ffff6db650b in ioctl () at ../sysdeps/unix/syscall-template.S:78
+  4    Thread 0x7ffff597f700 (LWP 62067) "bit_extract" 0x00007ffff6db650b in ioctl () at ../sysdeps/unix/syscall-template.S:78
+  5    AMDGPU Wave 1:2:?:1 (?,?,?)/? "bit_extract"     bit_extract_kernel (C_d=<optimized out>, A_d=<optimized out>, N=<optimized out>) at bit_extract.cpp:41
+@end smallexample
+
+This does not affect wavefronts created while @value{GDBN} is attached
+which are always capable of reporting this information.
+
+If the @env{HSA_ENABLE_DEBUG} environment variable is set to @samp{1}
+when the @acronym{AMD ROCm} runtime is initialized, then this
+information will be available for all architectures even for wavefronts
+created when @value{GDBN} was not attached.  Setting this environment
+variable may very marginally increase wavefront launch latency for some
+architectures for very short lived wavefronts.
+
+@item
+If an @acronym{AMD GPU} wavefront has the @code{DX10_CLAMP} bit set in
+the @code{MODE} register, enabled arithmetic exceptions will not be
+reported as @code{SIGFPE} signals.  This happens if the
+@code{DX10_CLAMP} kernel descriptor field is enabled.
+
+@xref{AMD GPU Signals, , @acronym{AMD GPU} Signals}.
+
+@item
+@value{GDBN} does not support single root I/O virtualization (SR-IOV)
+on any AMD GPU architecture that supports it.  That includes
+@samp{gfx1030}, @samp{gfx1031}, and @samp{gfx1032}.
+
+@end enumerate
 
 @node Controlling GDB
 @chapter Controlling @value{GDBN}
@@ -27563,6 +28281,46 @@  module.
 @item show debug aix-thread
 Show the current state of AIX thread debugging info display.
 
+@cindex AMD GPU debugging info
+@anchor{set debug amd-dbgapi-lib}
+@item set debug amd-dbgapi-lib
+@itemx show debug amd-dbgapi-lib
+
+The @code{set debug amd-dbgapi-lib log-level @var{level}} command can be used
+to enable diagnostic messages from the @samp{amd-dbgapi} library, where
+@var{level} can be:
+
+@table @code
+
+@item off
+no logging is enabled
+
+@item error
+fatal errors are reported
+
+@item warning
+fatal errors and warnings are reported
+
+@item info
+fatal errors, warnings, and info messages are reported
+
+@item verbose
+all messages are reported
+
+@end table
+
+The @code{show debug amd-dbgapi-lib log-level} command displays the current
+@acronym{amd-dbgapi} library log level.
+
+@anchor{set debug amd-dbgapi}
+@item set debug amd-dbgapi
+@itemx show debug amd-dbgapi
+
+The @samp{set debug amd-dbgapi} command can be used
+to enable diagnostic messages in the @samp{amd-dbgapi} target.  The
+@samp{show debug amd-dbgapi} command displays the current setting.
+@xref{set debug amd-dbgapi}.
+
 @item set debug check-physname
 @cindex physname
 Check the results of the ``physname'' computation.  When reading DWARF
diff --git a/gdb/regcache.c b/gdb/regcache.c
index 02d6bdc271c..2d926fbed36 100644
--- a/gdb/regcache.c
+++ b/gdb/regcache.c
@@ -1915,7 +1915,8 @@  cooked_read_test (struct gdbarch *gdbarch)
 	{
 	  auto bfd_arch = gdbarch_bfd_arch_info (gdbarch)->arch;
 
-	  if (bfd_arch == bfd_arch_frv || bfd_arch == bfd_arch_h8300
+	  if (bfd_arch == bfd_arch_amdgcn
+	      || bfd_arch == bfd_arch_frv || bfd_arch == bfd_arch_h8300
 	      || bfd_arch == bfd_arch_m32c || bfd_arch == bfd_arch_sh
 	      || bfd_arch == bfd_arch_alpha || bfd_arch == bfd_arch_v850
 	      || bfd_arch == bfd_arch_msp430 || bfd_arch == bfd_arch_mep
diff --git a/gdb/solib-rocm.c b/gdb/solib-rocm.c
new file mode 100644
index 00000000000..2b965acc790
--- /dev/null
+++ b/gdb/solib-rocm.c
@@ -0,0 +1,679 @@ 
+/* Handle ROCm Code Objects for GDB, the GNU Debugger.
+
+   Copyright (C) 2019-2022 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "defs.h"
+
+#include "amd-dbgapi-target.h"
+#include "amdgpu-tdep.h"
+#include "arch-utils.h"
+#include "elf-bfd.h"
+#include "elf/amdgpu.h"
+#include "gdbsupport/fileio.h"
+#include "inferior.h"
+#include "observable.h"
+#include "solib.h"
+#include "solib-svr4.h"
+#include "solist.h"
+#include "symfile.h"
+
+/* ROCm-specific inferior data.  */
+
+struct solib_info
+{
+  /* List of code objects loaded into the inferior.  */
+  so_list *solib_list;
+};
+
+/* Per-inferior data key.  */
+static const registry<inferior>::key<solib_info> rocm_solib_data;
+
+static target_so_ops rocm_solib_ops;
+
+/* Free the solib linked list.  */
+
+static void
+rocm_free_solib_list (struct solib_info *info)
+{
+  while (info->solib_list != nullptr)
+    {
+      struct so_list *next = info->solib_list->next;
+
+      free_so (info->solib_list);
+      info->solib_list = next;
+    }
+
+  info->solib_list = nullptr;
+}
+
+
+/* Fetch the solib_info data for INF.  */
+
+static struct solib_info *
+get_solib_info (inferior *inf)
+{
+  solib_info *info = rocm_solib_data.get (inf);
+
+  if (info == nullptr)
+    info = rocm_solib_data.emplace (inf);
+
+  return info;
+}
+
+/* Relocate section addresses.  */
+
+static void
+rocm_solib_relocate_section_addresses (struct so_list *so,
+				       struct target_section *sec)
+{
+  if (!is_amdgpu_arch (gdbarch_from_bfd (so->abfd)))
+    {
+      svr4_so_ops.relocate_section_addresses (so, sec);
+      return;
+    }
+
+  lm_info_svr4 *li = (lm_info_svr4 *) so->lm_info;
+  sec->addr = sec->addr + li->l_addr;
+  sec->endaddr = sec->endaddr + li->l_addr;
+}
+
+static void rocm_update_solib_list ();
+
+static void
+rocm_solib_handle_event ()
+{
+  /* Since we sit on top of svr4_so_ops, we might get called following an event
+     concerning host libraries.  We must therefore forward the call.  If the
+     event was for a ROCm code object, it will be a no-op.  On the other hand,
+     if the event was for host libraries, rocm_update_solib_list will be
+     essentially be a no-op (it will reload the same code object list as was
+     previously loaded).  */
+  svr4_so_ops.handle_event ();
+
+  rocm_update_solib_list ();
+}
+
+/* Make a deep copy of the solib linked list.  */
+
+static so_list *
+rocm_solib_copy_list (const so_list *src)
+{
+  struct so_list *dst = nullptr;
+  struct so_list **link = &dst;
+
+  while (src != nullptr)
+    {
+      struct so_list *newobj;
+
+      newobj = XNEW (struct so_list);
+      memcpy (newobj, src, sizeof (struct so_list));
+
+      lm_info_svr4 *src_li = (lm_info_svr4 *) src->lm_info;
+      newobj->lm_info = new lm_info_svr4 (*src_li);
+
+      newobj->next = nullptr;
+      *link = newobj;
+      link = &newobj->next;
+
+      src = src->next;
+    }
+
+  return dst;
+}
+
+/* Build a list of `struct so_list' objects describing the shared
+   objects currently loaded in the inferior.  */
+
+static struct so_list *
+rocm_solib_current_sos ()
+{
+  /* First, retrieve the host-side shared library list.  */
+  so_list *head = svr4_so_ops.current_sos ();
+
+  /* Then, the device-side shared library list.  */
+  so_list *list = get_solib_info (current_inferior ())->solib_list;
+
+  if (list == nullptr)
+    return head;
+
+  list = rocm_solib_copy_list (list);
+
+  if (head == nullptr)
+    return list;
+
+  /* Append our libraries to the end of the list.  */
+  so_list *tail;
+  for (tail = head; tail->next; tail = tail->next)
+    /* Nothing.  */;
+  tail->next = list;
+
+  return head;
+}
+
+namespace {
+
+/* Interface to interact with a ROCm code object stream.  */
+
+struct rocm_code_object_stream
+{
+  DISABLE_COPY_AND_ASSIGN (rocm_code_object_stream);
+
+  /* Copy SIZE bytes from the underlying objfile storage starting at OFFSET
+     into the user provided buffer BUF.
+
+     Return the number of bytes actually copied (might be inferior to SIZE if
+     the end of the stream is reached).  */
+  virtual file_ptr read (void *buf, file_ptr size, file_ptr offset) = 0;
+
+  /* Retrieve file information in SB.
+
+     Return 0 on success.  On failure, set the appropriate bfd error number
+     (using bfd_set_error) and return -1.  */
+  int stat (struct stat *sb);
+
+  virtual ~rocm_code_object_stream () = default;
+
+protected:
+  rocm_code_object_stream () = default;
+
+  /* Return the size of the object file, or -1 if the size cannot be
+     determined.
+
+     This is a helper function for stat.  */
+  virtual LONGEST size () = 0;
+};
+
+int
+rocm_code_object_stream::stat (struct stat *sb)
+{
+  const LONGEST size = this->size ();
+  if (size == -1)
+    return -1;
+
+  memset (sb, '\0', sizeof (struct stat));
+  sb->st_size = size;
+  return 0;
+}
+
+/* Interface to a ROCm object stream which is embedded in an ELF file
+   accessible to the debugger.  */
+
+struct rocm_code_object_stream_file final : rocm_code_object_stream
+{
+  DISABLE_COPY_AND_ASSIGN (rocm_code_object_stream_file);
+
+  rocm_code_object_stream_file (int fd, ULONGEST offset, ULONGEST size);
+
+  file_ptr read (void *buf, file_ptr size, file_ptr offset) override;
+
+  LONGEST size () override;
+
+  ~rocm_code_object_stream_file () override;
+
+protected:
+
+  /* The target file descriptor for this stream.  */
+  int m_fd;
+
+  /* The offset of the ELF file image in the target file.  */
+  ULONGEST m_offset;
+
+  /* The size of the ELF file image.  The value 0 means that it was
+     unspecified in the URI descriptor.  */
+  ULONGEST m_size;
+};
+
+rocm_code_object_stream_file::rocm_code_object_stream_file
+  (int fd, ULONGEST offset, ULONGEST size)
+  : m_fd (fd), m_offset (offset), m_size (size)
+{
+}
+
+file_ptr
+rocm_code_object_stream_file::read (void *buf, file_ptr size,
+				    file_ptr offset)
+{
+  fileio_error target_errno;
+  file_ptr nbytes = 0;
+  while (size > 0)
+    {
+      QUIT;
+
+      file_ptr bytes_read
+	= target_fileio_pread (m_fd, static_cast<gdb_byte *> (buf) + nbytes,
+			       size, m_offset + offset + nbytes,
+			       &target_errno);
+
+      if (bytes_read == 0)
+	break;
+
+      if (bytes_read < 0)
+	{
+	  errno = fileio_error_to_host (target_errno);
+	  bfd_set_error (bfd_error_system_call);
+	  return -1;
+	}
+
+      nbytes += bytes_read;
+      size -= bytes_read;
+    }
+
+  return nbytes;
+}
+
+LONGEST
+rocm_code_object_stream_file::size ()
+{
+  if (m_size == 0)
+    {
+      fileio_error target_errno;
+      struct stat stat;
+      if (target_fileio_fstat (m_fd, &stat, &target_errno) < 0)
+	{
+	  errno = fileio_error_to_host (target_errno);
+	  bfd_set_error (bfd_error_system_call);
+	  return -1;
+	}
+
+      /* Check that the offset is valid.  */
+      if (m_offset >= stat.st_size)
+	{
+	  bfd_set_error (bfd_error_bad_value);
+	  return -1;
+	}
+
+      m_size = stat.st_size - m_offset;
+    }
+
+  return m_size;
+}
+
+rocm_code_object_stream_file::~rocm_code_object_stream_file ()
+{
+  fileio_error target_errno;
+  target_fileio_close (m_fd, &target_errno);
+}
+
+/* Interface to a code object which lives in the inferior's memory.  */
+
+struct rocm_code_object_stream_memory final : public rocm_code_object_stream
+{
+  DISABLE_COPY_AND_ASSIGN (rocm_code_object_stream_memory);
+
+  rocm_code_object_stream_memory (gdb::byte_vector buffer);
+
+  file_ptr read (void *buf, file_ptr size, file_ptr offset) override;
+
+protected:
+
+  /* Snapshot of the original ELF image taken during load.  This is done to
+     support the situation where an inferior uses an in-memory image, and
+     releases or re-uses this memory before GDB is done using it.  */
+  gdb::byte_vector m_objfile_image;
+
+  LONGEST size () override
+  {
+    return m_objfile_image.size ();
+  }
+};
+
+rocm_code_object_stream_memory::rocm_code_object_stream_memory
+  (gdb::byte_vector buffer)
+  : m_objfile_image (std::move (buffer))
+{
+}
+
+file_ptr
+rocm_code_object_stream_memory::read (void *buf, file_ptr size,
+				      file_ptr offset)
+{
+  if (size > m_objfile_image.size () - offset)
+    size = m_objfile_image.size () - offset;
+
+  memcpy (buf, m_objfile_image.data () + offset, size);
+  return size;
+}
+
+} /* anonymous namespace */
+
+static void *
+rocm_bfd_iovec_open (bfd *abfd, void *inferior_void)
+{
+  gdb::string_view uri (bfd_get_filename (abfd));
+  gdb::string_view protocol_delim = "://";
+  size_t protocol_end = uri.find (protocol_delim);
+  std::string protocol = gdb::to_string (uri.substr (0, protocol_end));
+  protocol_end += protocol_delim.length ();
+
+  std::transform (protocol.begin (), protocol.end (), protocol.begin (),
+		  [] (unsigned char c) { return std::tolower (c); });
+
+  gdb::string_view path;
+  size_t path_end = uri.find_first_of ("#?", protocol_end);
+  if (path_end != std::string::npos)
+    path = uri.substr (protocol_end, path_end++ - protocol_end);
+  else
+    path = uri.substr (protocol_end);
+
+  /* %-decode the string.  */
+  std::string decoded_path;
+  decoded_path.reserve (path.length ());
+  for (size_t i = 0; i < path.length (); ++i)
+    if (path[i] == '%'
+	&& i < path.length () - 2
+	&& std::isxdigit (path[i + 1])
+	&& std::isxdigit (path[i + 2]))
+      {
+	gdb::string_view hex_digits = path.substr (i + 1, 2);
+	decoded_path += std::stoi (gdb::to_string (hex_digits), 0, 16);
+	i += 2;
+      }
+    else
+      decoded_path += path[i];
+
+  /* Tokenize the query/fragment.  */
+  std::vector<gdb::string_view> tokens;
+  size_t pos, last = path_end;
+  while ((pos = uri.find ('&', last)) != std::string::npos)
+    {
+      tokens.emplace_back (uri.substr (last, pos - last));
+      last = pos + 1;
+    }
+
+  if (last != std::string::npos)
+    tokens.emplace_back (uri.substr (last));
+
+  /* Create a tag-value map from the tokenized query/fragment.  */
+  std::unordered_map<gdb::string_view, gdb::string_view,
+		     gdb::string_view_hash> params;
+  for (gdb::string_view token : tokens)
+    {
+      size_t delim = token.find ('=');
+      if (delim != std::string::npos)
+	{
+	  gdb::string_view tag = token.substr (0, delim);
+	  gdb::string_view val = token.substr (delim + 1);
+	  params.emplace (tag, val);
+	}
+    }
+
+  try
+    {
+      ULONGEST offset = 0;
+      ULONGEST size = 0;
+      inferior *inferior = static_cast<struct inferior *> (inferior_void);
+
+      auto try_strtoulst = [] (gdb::string_view v)
+	{
+	  errno = 0;
+	  ULONGEST value = strtoulst (v.data (), nullptr, 0);
+	  if (errno != 0)
+	    {
+	      /* The actual message doesn't matter, the exception is caught
+	         below, transformed in a BFD error, and the message is lost.  */
+	      error (_("Failed to parse integer."));
+	    }
+
+	  return value;
+	};
+
+      auto offset_it = params.find ("offset");
+      if (offset_it != params.end ())
+	offset = try_strtoulst (offset_it->second);
+
+      auto size_it = params.find ("size");
+      if (size_it != params.end ())
+	{
+	  size = try_strtoulst (size_it->second);
+	  if (size == 0)
+	    error (_("Invalid size value"));
+	}
+
+      if (protocol == "file")
+	{
+	  fileio_error target_errno;
+	  int fd
+	    = target_fileio_open (static_cast<struct inferior *> (inferior),
+				  decoded_path.c_str (), FILEIO_O_RDONLY,
+				  false, 0, &target_errno);
+
+	  if (fd == -1)
+	    {
+	      errno = fileio_error_to_host (target_errno);
+	      bfd_set_error (bfd_error_system_call);
+	      return nullptr;
+	    }
+
+	  return new rocm_code_object_stream_file (fd, offset, size);
+	}
+
+      if (protocol == "memory")
+	{
+	  ULONGEST pid = try_strtoulst (path);
+	  if (pid != inferior->pid)
+	    {
+	      warning (_("`%s': code object is from another inferior"),
+		       gdb::to_string (uri).c_str ());
+	      bfd_set_error (bfd_error_bad_value);
+	      return nullptr;
+	    }
+
+	  gdb::byte_vector buffer (size);
+	  if (target_read_memory (offset, buffer.data (), size) != 0)
+	    {
+	      warning (_("Failed to copy the code object from the inferior"));
+	      bfd_set_error (bfd_error_bad_value);
+	      return nullptr;
+	    }
+
+	  return new rocm_code_object_stream_memory (std::move (buffer));
+	}
+
+      warning (_("`%s': protocol not supported: %s"),
+	       gdb::to_string (uri).c_str (), protocol.c_str ());
+      bfd_set_error (bfd_error_bad_value);
+      return nullptr;
+    }
+  catch (const gdb_exception_quit &ex)
+    {
+      set_quit_flag ();
+      bfd_set_error (bfd_error_bad_value);
+      return nullptr;
+    }
+  catch (const gdb_exception &ex)
+    {
+      bfd_set_error (bfd_error_bad_value);
+      return nullptr;
+    }
+}
+
+static int
+rocm_bfd_iovec_close (bfd *nbfd, void *data)
+{
+  delete static_cast<rocm_code_object_stream *> (data);
+
+  return 0;
+}
+
+static file_ptr
+rocm_bfd_iovec_pread (bfd *abfd, void *data, void *buf, file_ptr size,
+		      file_ptr offset)
+{
+  return static_cast<rocm_code_object_stream *> (data)->read (buf, size,
+							      offset);
+}
+
+static int
+rocm_bfd_iovec_stat (bfd *abfd, void *data, struct stat *sb)
+{
+  return static_cast<rocm_code_object_stream *> (data)->stat (sb);
+}
+
+static gdb_bfd_ref_ptr
+rocm_solib_bfd_open (const char *pathname)
+{
+  /* Handle regular files with SVR4 open.  */
+  if (strstr (pathname, "://") == nullptr)
+    return svr4_so_ops.bfd_open (pathname);
+
+  gdb_bfd_ref_ptr abfd
+    = gdb_bfd_openr_iovec (pathname, "elf64-amdgcn", rocm_bfd_iovec_open,
+			   current_inferior (), rocm_bfd_iovec_pread,
+			   rocm_bfd_iovec_close, rocm_bfd_iovec_stat);
+
+  if (abfd == nullptr)
+    error (_("Could not open `%s' as an executable file: %s"), pathname,
+	   bfd_errmsg (bfd_get_error ()));
+
+  /* Check bfd format.  */
+  if (!bfd_check_format (abfd.get (), bfd_object))
+    error (_("`%s': not in executable format: %s"),
+	   bfd_get_filename (abfd.get ()), bfd_errmsg (bfd_get_error ()));
+
+  unsigned char osabi = elf_elfheader (abfd)->e_ident[EI_OSABI];
+  unsigned char osabiversion = elf_elfheader (abfd)->e_ident[EI_ABIVERSION];
+
+  /* Check that the code object is using the HSA OS ABI.  */
+  if (osabi != ELFOSABI_AMDGPU_HSA)
+    error (_("`%s': ELF file OS ABI is not supported (%d)."),
+	   bfd_get_filename (abfd.get ()), osabi);
+
+  /* We support HSA code objects V3 and greater.  */
+  if (osabiversion < ELFABIVERSION_AMDGPU_HSA_V3)
+    error (_("`%s': ELF file HSA OS ABI version is not supported (%d)."),
+	   bfd_get_filename (abfd.get ()), osabiversion);
+
+  return abfd;
+}
+
+static void
+rocm_solib_create_inferior_hook (int from_tty)
+{
+  rocm_free_solib_list (get_solib_info (current_inferior ()));
+
+  svr4_so_ops.solib_create_inferior_hook (from_tty);
+}
+
+static void
+rocm_update_solib_list ()
+{
+  inferior *inf = current_inferior ();
+
+  amd_dbgapi_process_id_t process_id = get_amd_dbgapi_process_id (inf);
+  if (process_id.handle == AMD_DBGAPI_PROCESS_NONE.handle)
+    return;
+
+  solib_info *info = get_solib_info (inf);
+
+  rocm_free_solib_list (info);
+  struct so_list **link = &info->solib_list;
+
+  amd_dbgapi_code_object_id_t *code_object_list;
+  size_t count;
+
+  amd_dbgapi_status_t status
+    = amd_dbgapi_process_code_object_list (process_id, &count,
+					   &code_object_list, nullptr);
+  if (status != AMD_DBGAPI_STATUS_SUCCESS)
+    {
+      warning (_("amd_dbgapi_process_code_object_list failed (%s)"),
+	       get_status_string (status));
+      return;
+    }
+
+  for (size_t i = 0; i < count; ++i)
+    {
+      CORE_ADDR l_addr;
+      char *uri_bytes;
+
+      status = amd_dbgapi_code_object_get_info
+	(code_object_list[i], AMD_DBGAPI_CODE_OBJECT_INFO_LOAD_ADDRESS,
+	 sizeof (l_addr), &l_addr);
+      if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	continue;
+
+      status = amd_dbgapi_code_object_get_info
+	(code_object_list[i], AMD_DBGAPI_CODE_OBJECT_INFO_URI_NAME,
+	 sizeof (uri_bytes), &uri_bytes);
+      if (status != AMD_DBGAPI_STATUS_SUCCESS)
+	continue;
+
+      struct so_list *so = XCNEW (struct so_list);
+      lm_info_svr4 *li = new lm_info_svr4;
+      li->l_addr = l_addr;
+      so->lm_info = li;
+
+      strncpy (so->so_name, uri_bytes, sizeof (so->so_name));
+      so->so_name[sizeof (so->so_name) - 1] = '\0';
+      xfree (uri_bytes);
+
+      /* Make so_original_name unique so that code objects with the same URI
+	 but different load addresses are seen by gdb core as different shared
+	 objects.  */
+      xsnprintf (so->so_original_name, sizeof (so->so_original_name),
+		 "code_object_%ld", code_object_list[i].handle);
+
+      so->next = nullptr;
+      *link = so;
+      link = &so->next;
+    }
+
+  xfree (code_object_list);
+
+  if (rocm_solib_ops.current_sos == NULL)
+    {
+      /* Override what we need to.  */
+      rocm_solib_ops = svr4_so_ops;
+      rocm_solib_ops.current_sos = rocm_solib_current_sos;
+      rocm_solib_ops.solib_create_inferior_hook
+	= rocm_solib_create_inferior_hook;
+      rocm_solib_ops.bfd_open = rocm_solib_bfd_open;
+      rocm_solib_ops.relocate_section_addresses
+	= rocm_solib_relocate_section_addresses;
+      rocm_solib_ops.handle_event = rocm_solib_handle_event;
+
+      /* Engage the ROCm so_ops.  */
+      set_gdbarch_so_ops (current_inferior ()->gdbarch, &rocm_solib_ops);
+    }
+}
+
+static void
+rocm_solib_target_inferior_created (inferior *inf)
+{
+  rocm_free_solib_list (get_solib_info (inf));
+  rocm_update_solib_list ();
+
+  /* Force GDB to reload the solibs.  */
+  current_inferior ()->pspace->clear_solib_cache ();
+  solib_add (nullptr, 0, auto_solib_add);
+}
+
+/* -Wmissing-prototypes */
+extern initialize_file_ftype _initialize_rocm_solib;
+
+void
+_initialize_rocm_solib ()
+{
+  /* The dependency on the amd-dbgapi exists because solib-rocm's
+     inferior_created observer needs amd-dbgapi to have attached the process,
+     which happens in amd_dbgapi_target's inferior_created observer.  */
+  gdb::observers::inferior_created.attach
+    (rocm_solib_target_inferior_created,
+     "solib-rocm",
+     { &get_amd_dbgapi_target_inferior_created_observer_token () });
+}
diff --git a/gdb/testsuite/gdb.rocm/simple.cpp b/gdb/testsuite/gdb.rocm/simple.cpp
new file mode 100644
index 00000000000..31dc56a1d8c
--- /dev/null
+++ b/gdb/testsuite/gdb.rocm/simple.cpp
@@ -0,0 +1,48 @@ 
+/* This testcase is part of GDB, the GNU debugger.
+
+   Copyright 2022 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include "hip/hip_runtime.h"
+#include <cassert>
+
+__global__ void
+do_an_addition (int a, int b, int *out)
+{
+  *out = a + b;
+}
+
+int
+main ()
+{
+  int *result_ptr, result;
+
+  /* Allocate memory for the device to write the result to.  */
+  hipError_t error = hipMalloc (&result_ptr, sizeof (int));
+  assert (error == hipSuccess);
+
+  /* Run `do_an_addition` on one workgroup containing one work item.  */
+  do_an_addition<<<dim3(1), dim3(1), 0, 0>>> (1, 2, result_ptr);
+
+  /* Copy result from device to host.  Note that this acts as a synchronization
+     point, waiting for the kernel dispatch to complete.  */
+  error = hipMemcpyDtoH (&result, result_ptr, sizeof (int));
+  assert (error == hipSuccess);
+
+  printf ("result is %d\n", result);
+  assert (result == 3);
+
+  return 0;
+}
diff --git a/gdb/testsuite/gdb.rocm/simple.exp b/gdb/testsuite/gdb.rocm/simple.exp
new file mode 100644
index 00000000000..f84df71414e
--- /dev/null
+++ b/gdb/testsuite/gdb.rocm/simple.exp
@@ -0,0 +1,52 @@ 
+# Copyright 2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# A simple AMD GPU debugging smoke test.  Run to a breakpoint in device code,
+# then continue until the end of the program.
+
+load_lib rocm.exp
+
+standard_testfile .cpp
+
+if [skip_hipcc_tests] {
+    verbose "skipping hip test: ${testfile}"
+    return
+}
+
+if {[build_executable "failed to prepare" $testfile $srcfile {debug hip}]} {
+    return
+}
+
+proc do_test {} {
+    clean_restart $::binfile
+
+    with_rocm_gpu_lock {
+	if ![runto_main] {
+	    return
+	}
+
+	gdb_test "with breakpoint pending on -- break do_an_addition" \
+	    "Breakpoint $::decimal \\(do_an_addition\\) pending."
+
+	gdb_test "continue" \
+	    "Thread $::decimal hit Breakpoint $::decimal, do_an_addition .*"
+
+	gdb_test "continue" \
+	    "Inferior 1 .* exited normally.*" \
+	    "continue to end"
+    }
+}
+
+do_test
diff --git a/gdb/testsuite/lib/future.exp b/gdb/testsuite/lib/future.exp
index 17736ed360a..b35429c15ff 100644
--- a/gdb/testsuite/lib/future.exp
+++ b/gdb/testsuite/lib/future.exp
@@ -121,6 +121,19 @@  proc gdb_find_rustc {} {
     return $rustc
 }
 
+proc gdb_find_hipcc {} {
+    global tool_root_dir
+    if {![is_remote host]} {
+	set hipcc [lookfor_file $tool_root_dir hipcc]
+	if {$hipcc == ""} {
+	    set hipcc [lookfor_file /opt/rocm/bin hipcc]
+	}
+    } else {
+	set hipcc ""
+    }
+    return $hipcc
+}
+
 proc gdb_find_ldd {} {
     global LDD_FOR_TARGET
     if [info exists LDD_FOR_TARGET] {
@@ -290,6 +303,18 @@  proc gdb_default_target_compile_1 {source destfile type options} {
 	    }
 	}
 
+	if { $i == "hip" } {
+	    set compiler_type "hip"
+	    if {[board_info $dest exists hipflags]} {
+		append add_flags " [target_info hipflags]"
+	    }
+	    if {[board_info $dest exists hipcompiler]} {
+		set compiler [target_info hipcompiler]
+	    } else {
+		set compiler [find_hipcc]
+	    }
+	}
+
 	if {[regexp "^dest=" $i]} {
 	    regsub "^dest=" $i "" tmp
 	    if {[board_info $tmp exists name]} {
@@ -352,6 +377,7 @@  proc gdb_default_target_compile_1 {source destfile type options} {
     global GO_FOR_TARGET
     global GO_LD_FOR_TARGET
     global RUSTC_FOR_TARGET
+    global HIPCC_FOR_TARGET
 
     if {[info exists GNATMAKE_FOR_TARGET]} {
 	if { $compiler_type == "ada" } {
@@ -398,6 +424,12 @@  proc gdb_default_target_compile_1 {source destfile type options} {
 	}
     }
 
+    if {[info exists HIPCC_FOR_TARGET]} {
+	if {$compiler_type == "hip"} {
+	    set compiler $HIPCC_FOR_TARGET
+	}
+    }
+
     if { $type == "executable" && $linker != "" } {
 	set compiler $linker
     }
@@ -687,6 +719,12 @@  if {[info procs find_rustc] == ""} {
     gdb_note [join [list $note_prefix "Rust" $note_suffix] ""]
 }
 
+if {[info procs find_hipcc] == ""} {
+    rename gdb_find_hipcc find_hipcc
+    set use_gdb_compile(hip) 1
+    gdb_note [join [list $note_prefix "HIP" $note_suffix] ""]
+}
+
 # If dejagnu's default_target_compile is missing support for any language,
 # override it.
 if { [array size use_gdb_compile] != 0 } {
diff --git a/gdb/testsuite/lib/gdb.exp b/gdb/testsuite/lib/gdb.exp
index e4ce3c30c2f..f51de435d17 100644
--- a/gdb/testsuite/lib/gdb.exp
+++ b/gdb/testsuite/lib/gdb.exp
@@ -4847,6 +4847,13 @@  proc gdb_compile {source dest type options} {
         lappend new_options "early_flags=-fno-stack-protector"
     }
 
+    # hipcc defaults to -O2, so add -O0 to early flags for the hip language.
+    # If "optimize" is also requested, another -O flag (e.g. -O2) will be added
+    # to the flags, overriding this -O0.
+    if {[lsearch -exact $options hip] != -1} {
+	lappend new_options "early_flags=-O0"
+    }
+
     # Because we link with libraries using their basename, we may need
     # (depending on the platform) to set a special rpath value, to allow
     # the executable to find the libraries it depends on.
diff --git a/gdb/testsuite/lib/rocm.exp b/gdb/testsuite/lib/rocm.exp
new file mode 100644
index 00000000000..e22f392deb1
--- /dev/null
+++ b/gdb/testsuite/lib/rocm.exp
@@ -0,0 +1,94 @@ 
+# Copyright (C) 2019-2022 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Support library for testing ROCm (AMD GPU) GDB features.
+
+proc skip_hipcc_tests { } {
+    # Only the native target supports ROCm debugging.  E.g., when
+    # testing against GDBserver, there's no point in running the ROCm
+    # tests.
+    if {[target_info gdb_protocol] != ""} {
+        return 1
+    }
+    return 0
+}
+
+# The lock file used to ensure that only one GDB has access to the GPU
+# at a time.
+set gpu_lock_filename $objdir/gpu-parallel.lock
+
+# Acquire lock file LOCKFILE.  Tries forever until the lock file is
+# successfully created.
+
+proc lock_file_acquire {lockfile} {
+    verbose -log "acquiring lock file: $::subdir/${::gdb_test_file_name}.exp"
+    while {true} {
+	if {![catch {open $lockfile {WRONLY CREAT EXCL}} rc]} {
+	    set msg "locked by $::subdir/${::gdb_test_file_name}.exp"
+	    verbose -log "lock file: $msg"
+	    # For debugging, put info in the lockfile about who owns
+	    # it.
+	    puts  $rc $msg
+	    flush $rc
+	    return [list $rc $lockfile]
+	}
+	after 10
+    }
+}
+
+# Release a lock file.
+
+proc lock_file_release {info} {
+    verbose -log "releasing lock file: $::subdir/${::gdb_test_file_name}.exp"
+
+    if {![catch {fconfigure [lindex $info 0]}]} {
+	if {![catch {
+	    close [lindex $info 0]
+	    file delete -force [lindex $info 1]
+	} rc]} {
+	    return ""
+	} else {
+	    return -code error "Error releasing lockfile: '$rc'"
+	}
+    } else {
+	error "invalid lock"
+    }
+}
+
+# Run body under the GPU lock.  Also calls gdb_exit before releasing
+# the GPU lock.
+
+proc with_rocm_gpu_lock { body } {
+    if {[info exists ::GDB_PARALLEL]} {
+	set lock_rc [lock_file_acquire $::gpu_lock_filename]
+    }
+
+    set code [catch {uplevel 1 $body} result]
+
+    # In case BODY returned early due to some testcase failing, and
+    # left GDB running, debugging the GPU.
+    gdb_exit
+
+    if {[info exists ::GDB_PARALLEL]} {
+	lock_file_release $lock_rc
+    }
+
+    if {$code == 1} {
+	global errorInfo errorCode
+	return -code $code -errorinfo $errorInfo -errorcode $errorCode $result
+    } else {
+	return -code $code $result
+    }
+}