@@ -728,6 +728,7 @@ ALL_64_TARGET_OBS = \
arch/aarch64.o \
arch/aarch64-insn.o \
arch/aarch64-mte-linux.o \
+ arch/aarch64-scalable-linux.o \
arch/amd64.o \
arch/riscv.o \
bpf-tdep.o \
@@ -1524,6 +1525,7 @@ HFILES_NO_SRCDIR = \
arch/aarch64.h \
arch/aarch64-insn.h \
arch/aarch64-mte-linux.h \
+ arch/aarch64-scalable-linux.h \
arch/arc.h \
arch/arm.h \
arch/i386.h \
@@ -1562,6 +1564,7 @@ HFILES_NO_SRCDIR = \
nat/aarch64-linux-hw-point.h \
nat/aarch64-mte-linux-ptrace.h \
nat/aarch64-scalable-linux-ptrace.h \
+ nat/aarch64-scalable-linux-sigcontext.h \
nat/amd64-linux-siginfo.h \
nat/gdb_ptrace.h \
nat/gdb_thread_db.h \
@@ -1627,6 +1630,7 @@ ALLDEPFILES = \
arch/aarch64.c \
arch/aarch64-insn.c \
arch/aarch64-mte-linux.c \
+ arch/aarch64-scalable-linux.c \
arch/amd64.c \
arch/arc.c \
arch/arm.c \
@@ -55,6 +55,7 @@
#include "arch/aarch64-mte-linux.h"
#include "nat/aarch64-mte-linux-ptrace.h"
+#include "arch/aarch64-scalable-linux.h"
#include <string.h>
@@ -313,8 +314,11 @@ store_fpregs_to_thread (const struct regcache *regcache)
}
}
-/* Fill GDB's register array with the sve register values
- from the current thread. */
+/* Fill GDB's REGCACHE with the valid SVE register values from the thread
+ associated with REGCACHE.
+
+ This function handles reading data from SVE or SSVE states, depending
+ on which state is active at the moment. */
static void
fetch_sveregs_from_thread (struct regcache *regcache)
@@ -323,8 +327,11 @@ fetch_sveregs_from_thread (struct regcache *regcache)
aarch64_sve_regs_copy_to_reg_buf (regcache->ptid ().lwp (), regcache);
}
-/* Store to the current thread the valid sve register
- values in the GDB's register array. */
+/* Store the valid SVE register values from GDB's REGCACHE to the thread
+ associated with REGCACHE.
+
+ This function handles writing data to SVE or SSVE states, depending
+ on which state is active at the moment. */
static void
store_sveregs_to_thread (struct regcache *regcache)
@@ -334,6 +341,41 @@ store_sveregs_to_thread (struct regcache *regcache)
aarch64_sve_regs_copy_from_reg_buf (regcache->ptid ().lwp (), regcache);
}
+/* Fill GDB's REGCACHE with the ZA register set contents from the
+ thread associated with REGCACHE. If there is no active ZA register state,
+ make the ZA register contents zero. */
+
+static void
+fetch_za_from_thread (struct regcache *regcache)
+{
+ aarch64_gdbarch_tdep *tdep
+ = gdbarch_tdep<aarch64_gdbarch_tdep> (regcache->arch ());
+
+ /* Read ZA state from the thread to the register cache. */
+ aarch64_za_regs_copy_to_reg_buf (regcache->ptid ().lwp (),
+ regcache,
+ tdep->sme_za_regnum,
+ tdep->sme_svg_regnum,
+ tdep->sme_svcr_regnum);
+}
+
+/* Store the NT_ARM_ZA register set contents from GDB's REGCACHE to the thread
+ associated with REGCACHE. */
+
+static void
+store_za_to_thread (struct regcache *regcache)
+{
+ aarch64_gdbarch_tdep *tdep
+ = gdbarch_tdep<aarch64_gdbarch_tdep> (regcache->arch ());
+
+ /* Write ZA state from the register cache to the thread. */
+ aarch64_za_regs_copy_from_reg_buf (regcache->ptid ().lwp (),
+ regcache,
+ tdep->sme_za_regnum,
+ tdep->sme_svg_regnum,
+ tdep->sme_svcr_regnum);
+}
+
/* Fill GDB's register array with the pointer authentication mask values from
the current thread. */
@@ -488,7 +530,10 @@ aarch64_fetch_registers (struct regcache *regcache, int regno)
if (regno == -1)
{
fetch_gregs_from_thread (regcache);
- if (tdep->has_sve ())
+
+ /* We attempt to fetch SVE registers if there is support for either
+ SVE or SME (due to the SSVE state of SME). */
+ if (tdep->has_sve () || tdep->has_sme ())
fetch_sveregs_from_thread (regcache);
else
fetch_fpregs_from_thread (regcache);
@@ -501,12 +546,16 @@ aarch64_fetch_registers (struct regcache *regcache, int regno)
if (tdep->has_tls ())
fetch_tlsregs_from_thread (regcache);
+
+ if (tdep->has_sme ())
+ fetch_za_from_thread (regcache);
}
/* General purpose register? */
else if (regno < AARCH64_V0_REGNUM)
fetch_gregs_from_thread (regcache);
/* SVE register? */
- else if (tdep->has_sve () && regno <= AARCH64_SVE_VG_REGNUM)
+ else if ((tdep->has_sve () || tdep->has_sme ())
+ && regno <= AARCH64_SVE_VG_REGNUM)
fetch_sveregs_from_thread (regcache);
/* FPSIMD register? */
else if (regno <= AARCH64_FPCR_REGNUM)
@@ -516,6 +565,10 @@ aarch64_fetch_registers (struct regcache *regcache, int regno)
&& (regno == AARCH64_PAUTH_DMASK_REGNUM (tdep->pauth_reg_base)
|| regno == AARCH64_PAUTH_CMASK_REGNUM (tdep->pauth_reg_base)))
fetch_pauth_masks_from_thread (regcache);
+ /* SME register? */
+ else if (tdep->has_sme () && regno >= tdep->sme_reg_base
+ && regno < tdep->sme_reg_base + 3)
+ fetch_za_from_thread (regcache);
/* MTE register? */
else if (tdep->has_mte ()
&& (regno == tdep->mte_reg_base))
@@ -577,7 +630,10 @@ aarch64_store_registers (struct regcache *regcache, int regno)
if (regno == -1)
{
store_gregs_to_thread (regcache);
- if (tdep->has_sve ())
+
+ /* We attempt to store SVE registers if there is support for either
+ SVE or SME (due to the SSVE state of SME). */
+ if (tdep->has_sve () || tdep->has_sme ())
store_sveregs_to_thread (regcache);
else
store_fpregs_to_thread (regcache);
@@ -587,16 +643,24 @@ aarch64_store_registers (struct regcache *regcache, int regno)
if (tdep->has_tls ())
store_tlsregs_to_thread (regcache);
+
+ if (tdep->has_sme ())
+ store_za_to_thread (regcache);
}
/* General purpose register? */
else if (regno < AARCH64_V0_REGNUM)
store_gregs_to_thread (regcache);
/* SVE register? */
- else if (tdep->has_sve () && regno <= AARCH64_SVE_VG_REGNUM)
+ else if ((tdep->has_sve () || tdep->has_sme ())
+ && regno <= AARCH64_SVE_VG_REGNUM)
store_sveregs_to_thread (regcache);
/* FPSIMD register? */
else if (regno <= AARCH64_FPCR_REGNUM)
store_fpregs_to_thread (regcache);
+ /* SME register? */
+ else if (tdep->has_sme () && regno >= tdep->sme_reg_base
+ && regno < tdep->sme_reg_base + 3)
+ store_za_to_thread (regcache);
/* MTE register? */
else if (tdep->has_mte ()
&& (regno == tdep->mte_reg_base))
@@ -787,10 +851,15 @@ aarch64_linux_nat_target::read_description ()
CORE_ADDR hwcap2 = linux_get_hwcap2 ();
aarch64_features features;
+ /* SVE/SSVE check. Reading VQ may return either the regular vector length
+ or the streaming vector length, depending on whether streaming mode is
+ active or not. */
features.vq = aarch64_sve_get_vq (tid);
features.pauth = hwcap & AARCH64_HWCAP_PACA;
features.mte = hwcap2 & HWCAP2_MTE;
features.tls = aarch64_tls_register_count (tid);
+ /* SME feature check. */
+ features.svq = aarch64_za_get_svq (tid);
return aarch64_read_description (features);
}
@@ -893,21 +962,24 @@ aarch64_linux_nat_target::thread_architecture (ptid_t ptid)
if (gdbarch_bfd_arch_info (inf->gdbarch)->bits_per_word == 32)
return inf->gdbarch;
- /* Only return it if the current vector length matches the one in the tdep. */
+ /* Only return the inferior's gdbarch if both vq and svq match the ones in
+ the tdep. */
aarch64_gdbarch_tdep *tdep
= gdbarch_tdep<aarch64_gdbarch_tdep> (inf->gdbarch);
uint64_t vq = aarch64_sve_get_vq (ptid.lwp ());
- if (vq == tdep->vq)
+ uint64_t svq = aarch64_za_get_svq (ptid.lwp ());
+ if (vq == tdep->vq && svq == tdep->sme_svq)
return inf->gdbarch;
- /* We reach here if the vector length for the thread is different from its
+ /* We reach here if any vector length for the thread is different from its
value at process start. Lookup gdbarch via info (potentially creating a
- new one) by using a target description that corresponds to the new vq value
- and the current architecture features. */
+ new one) by using a target description that corresponds to the new vq/svq
+ value and the current architecture features. */
const struct target_desc *tdesc = gdbarch_target_desc (inf->gdbarch);
aarch64_features features = aarch64_features_from_target_desc (tdesc);
features.vq = vq;
+ features.svq = svq;
struct gdbarch_info info;
info.bfd_arch_info = bfd_lookup_arch (bfd_arch_aarch64, bfd_mach_aarch64);
@@ -57,6 +57,8 @@
/* For inferior_ptid and current_inferior (). */
#include "inferior.h"
+/* For std::sqrt and std::pow. */
+#include <cmath>
/* A Homogeneous Floating-Point or Short-Vector Aggregate may have at most
four members. */
@@ -190,6 +192,43 @@ struct aarch64_prologue_cache
trad_frame_saved_reg *saved_regs;
};
+/* Holds information used to read/write from/to ZA
+ pseudo-registers.
+
+ With this information, the read/write code can be simplified so it
+ deals only with the required information to map a ZA pseudo-register
+ to the exact bytes into the ZA contents buffer. Otherwise we'd need
+ to use a lot of conditionals. */
+
+struct za_offsets
+{
+ /* Offset, into ZA, of the starting byte of the pseudo-register. */
+ size_t starting_offset;
+ /* The size of the contiguous chunks of the pseudo-register. */
+ size_t chunk_size;
+ /* The number of pseudo-register chunks contained in ZA. */
+ size_t chunks;
+ /* The offset between each contiguous chunk. */
+ size_t stride_size;
+};
+
+/* Holds data that is helpful to determine the individual fields that make
+ up the names of the ZA pseudo-registers. It is also very helpful to
+ determine offsets, stride and sizes for reading ZA tiles and tile
+ slices. */
+
+struct za_pseudo_encoding
+{
+ /* The slice index (0 ~ svl). Only used for tile slices. */
+ uint8_t slice_index;
+ /* The tile number (0 ~ 15). */
+ uint8_t tile_index;
+ /* Direction (horizontal/vertical). Only used for tile slices. */
+ bool horizontal;
+ /* Qualifier index (0 ~ 4). These map to B, H, S, D and Q. */
+ uint8_t qualifier_index;
+};
+
static void
show_aarch64_debug (struct ui_file *file, int from_tty,
struct cmd_list_element *c, const char *value)
@@ -2147,6 +2186,214 @@ aarch64_vnb_type (struct gdbarch *gdbarch)
return tdep->vnb_type;
}
+/* Return TRUE if REGNUM is a ZA tile slice pseudo-register number. Return
+ FALSE otherwise. */
+
+static bool
+is_sme_tile_slice_pseudo_register (struct gdbarch *gdbarch, int regnum)
+{
+ aarch64_gdbarch_tdep *tdep = gdbarch_tdep<aarch64_gdbarch_tdep> (gdbarch);
+
+ gdb_assert (tdep->has_sme ());
+ gdb_assert (tdep->sme_svq > 0);
+ gdb_assert (tdep->sme_pseudo_base <= regnum);
+ gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count);
+
+ if (tdep->sme_tile_slice_pseudo_base <= regnum
+ && regnum < tdep->sme_tile_slice_pseudo_base
+ + tdep->sme_tile_slice_pseudo_count)
+ return true;
+
+ return false;
+}
+
+/* Given REGNUM, a ZA pseudo-register number, return, in ENCODING, the
+ decoded fields that make up its name. */
+
+static void
+aarch64_za_decode_pseudos (struct gdbarch *gdbarch, int regnum,
+ struct za_pseudo_encoding &encoding)
+{
+ aarch64_gdbarch_tdep *tdep = gdbarch_tdep<aarch64_gdbarch_tdep> (gdbarch);
+
+ gdb_assert (tdep->has_sme ());
+ gdb_assert (tdep->sme_svq > 0);
+ gdb_assert (tdep->sme_pseudo_base <= regnum);
+ gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count);
+
+ if (is_sme_tile_slice_pseudo_register (gdbarch, regnum))
+ {
+ /* Calculate the tile slice pseudo-register offset relative to the other
+ tile slice pseudo-registers. */
+ int offset = regnum - tdep->sme_tile_slice_pseudo_base;
+
+ /* Fetch the qualifier. We can have 160 to 2560 possible tile slice
+ pseudo-registers. Each qualifier (we have 5 of them: B, H, S, D
+ and Q) covers 32 * svq pseudo-registers, so we divide the offset by
+ that constant. */
+ size_t qualifier = offset / (tdep->sme_svq * 32);
+ encoding.qualifier_index = qualifier;
+
+ /* Prepare to fetch the direction (d), tile number (t) and slice
+ number (s). */
+ int dts = offset % (tdep->sme_svq * 32);
+
+ /* The direction is represented by the even/odd numbers. Even-numbered
+ pseudo-registers are horizontal tile slices and odd-numbered
+ pseudo-registers are vertical tile slices. */
+ encoding.horizontal = !(dts & 1);
+
+ /* Fetch the tile number. The tile number is closely related to the
+ qualifier. B has 1 tile, H has 2 tiles, S has 4 tiles, D has 8 tiles
+ and Q has 16 tiles. */
+ encoding.tile_index = (dts >> 1) & ((1 << qualifier) - 1);
+
+ /* Fetch the slice number. The slice number is closely related to the
+ qualifier and the svl. */
+ encoding.slice_index = dts >> (qualifier + 1);
+ }
+ else
+ {
+ /* Calculate the tile pseudo-register offset relative to the other
+ tile pseudo-registers. */
+ int offset = regnum - tdep->sme_tile_pseudo_base;
+
+ encoding.qualifier_index = std::floor (std::log2 (offset + 1));
+ /* Calculate the tile number. */
+ encoding.tile_index = (offset + 1) - (1 << encoding.qualifier_index);
+ /* Direction and slice index don't get used for tiles. Set them to
+ 0/false values. */
+ encoding.slice_index = 0;
+ encoding.horizontal = false;
+ }
+}
+
+/* Return the type for a ZA tile slice pseudo-register based on ENCODING. */
+
+static struct type *
+aarch64_za_tile_slice_type (struct gdbarch *gdbarch,
+ const struct za_pseudo_encoding &encoding)
+{
+ aarch64_gdbarch_tdep *tdep = gdbarch_tdep<aarch64_gdbarch_tdep> (gdbarch);
+
+ gdb_assert (tdep->has_sme ());
+ gdb_assert (tdep->sme_svq > 0);
+
+ if (tdep->sme_tile_slice_type_q == nullptr)
+ {
+ /* Q tile slice type. */
+ tdep->sme_tile_slice_type_q
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint128,
+ tdep->sme_svq);
+ /* D tile slice type. */
+ tdep->sme_tile_slice_type_d
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint64,
+ tdep->sme_svq * 2);
+ /* S tile slice type. */
+ tdep->sme_tile_slice_type_s
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint32,
+ tdep->sme_svq * 4);
+ /* H tile slice type. */
+ tdep->sme_tile_slice_type_h
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint16,
+ tdep->sme_svq * 8);
+ /* B tile slice type. */
+ tdep->sme_tile_slice_type_b
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint8,
+ tdep->sme_svq * 16);
+ }
+
+ switch (encoding.qualifier_index)
+ {
+ case 4:
+ return tdep->sme_tile_slice_type_q;
+ case 3:
+ return tdep->sme_tile_slice_type_d;
+ case 2:
+ return tdep->sme_tile_slice_type_s;
+ case 1:
+ return tdep->sme_tile_slice_type_h;
+ case 0:
+ return tdep->sme_tile_slice_type_b;
+ default:
+ error (_("Invalid qualifier index %s for tile slice pseudo register."),
+ pulongest (encoding.qualifier_index));
+ }
+
+ gdb_assert_not_reached ("Unknown qualifier for ZA tile slice register");
+}
+
+/* Return the type for a ZA tile pseudo-register based on ENCODING. */
+
+static struct type *
+aarch64_za_tile_type (struct gdbarch *gdbarch,
+ const struct za_pseudo_encoding &encoding)
+{
+ aarch64_gdbarch_tdep *tdep = gdbarch_tdep<aarch64_gdbarch_tdep> (gdbarch);
+
+ gdb_assert (tdep->has_sme ());
+ gdb_assert (tdep->sme_svq > 0);
+
+ if (tdep->sme_tile_type_q == nullptr)
+ {
+ struct type *inner_vectors_type;
+
+ /* Q tile type. */
+ inner_vectors_type
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint128,
+ tdep->sme_svq);
+ tdep->sme_tile_type_q
+ = init_vector_type (inner_vectors_type, tdep->sme_svq);
+
+ /* D tile type. */
+ inner_vectors_type
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint64,
+ tdep->sme_svq * 2);
+ tdep->sme_tile_type_d
+ = init_vector_type (inner_vectors_type, tdep->sme_svq * 2);
+
+ /* S tile type. */
+ inner_vectors_type
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint32,
+ tdep->sme_svq * 4);
+ tdep->sme_tile_type_s
+ = init_vector_type (inner_vectors_type, tdep->sme_svq * 4);
+
+ /* H tile type. */
+ inner_vectors_type
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint16,
+ tdep->sme_svq * 8);
+ tdep->sme_tile_type_h
+ = init_vector_type (inner_vectors_type, tdep->sme_svq * 8);
+
+ /* B tile type. */
+ inner_vectors_type
+ = init_vector_type (builtin_type (gdbarch)->builtin_uint8,
+ tdep->sme_svq * 16);
+ tdep->sme_tile_type_b
+ = init_vector_type (inner_vectors_type, tdep->sme_svq * 16);
+ }
+
+ switch (encoding.qualifier_index)
+ {
+ case 4:
+ return tdep->sme_tile_type_q;
+ case 3:
+ return tdep->sme_tile_type_d;
+ case 2:
+ return tdep->sme_tile_type_s;
+ case 1:
+ return tdep->sme_tile_type_h;
+ case 0:
+ return tdep->sme_tile_type_b;
+ default:
+ error (_("Invalid qualifier index %s for ZA tile pseudo register."),
+ pulongest (encoding.qualifier_index));
+ }
+
+ gdb_assert_not_reached ("unknown qualifier for tile pseudo-register");
+}
+
/* Return the type for an AdvSISD V register. */
static struct type *
@@ -2579,6 +2826,73 @@ is_w_pseudo_register (struct gdbarch *gdbarch, int regnum)
return false;
}
+/* Return TRUE if REGNUM is a SME pseudo-register number. Return FALSE
+ otherwise. */
+
+static bool
+is_sme_pseudo_register (struct gdbarch *gdbarch, int regnum)
+{
+ aarch64_gdbarch_tdep *tdep = gdbarch_tdep<aarch64_gdbarch_tdep> (gdbarch);
+
+ if (tdep->has_sme () && tdep->sme_pseudo_base <= regnum
+ && regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count)
+ return true;
+
+ return false;
+}
+
+/* Convert ENCODING into a ZA tile slice name. */
+
+static const std::string
+aarch64_za_tile_slice_name (const struct za_pseudo_encoding &encoding)
+{
+ gdb_assert (encoding.qualifier_index >= 0);
+ gdb_assert (encoding.qualifier_index <= 4);
+ gdb_assert (encoding.tile_index >= 0);
+ gdb_assert (encoding.tile_index <= 15);
+ gdb_assert (encoding.slice_index >= 0);
+ gdb_assert (encoding.slice_index <= 255);
+
+ const char orientation = encoding.horizontal ? 'h' : 'v';
+
+ const char qualifiers[6] = "bhsdq";
+ const char qualifier = qualifiers [encoding.qualifier_index];
+ return string_printf ("za%d%c%c%d", encoding.tile_index, orientation,
+ qualifier, encoding.slice_index);
+}
+
+/* Convert ENCODING into a ZA tile name. */
+
+static const std::string
+aarch64_za_tile_name (const struct za_pseudo_encoding &encoding)
+{
+ /* Tiles don't use the slice number and the direction fields. */
+ gdb_assert (encoding.qualifier_index >= 0);
+ gdb_assert (encoding.qualifier_index <= 4);
+ gdb_assert (encoding.tile_index >= 0);
+ gdb_assert (encoding.tile_index <= 15);
+
+ const char qualifiers[6] = "bhsdq";
+ const char qualifier = qualifiers [encoding.qualifier_index];
+ return (string_printf ("za%d%c", encoding.tile_index, qualifier));
+}
+
+/* Given a SME pseudo-register REGNUM, return its type. */
+
+static struct type *
+aarch64_sme_pseudo_register_type (struct gdbarch *gdbarch, int regnum)
+{
+ struct za_pseudo_encoding encoding;
+
+ /* Decode the SME pseudo-register number. */
+ aarch64_za_decode_pseudos (gdbarch, regnum, encoding);
+
+ if (is_sme_tile_slice_pseudo_register (gdbarch, regnum))
+ return aarch64_za_tile_slice_type (gdbarch, encoding);
+ else
+ return aarch64_za_tile_type (gdbarch, encoding);
+}
+
/* Return the pseudo register name corresponding to register regnum. */
static const char *
@@ -2699,6 +3013,9 @@ aarch64_pseudo_register_name (struct gdbarch *gdbarch, int regnum)
return sve_v_name[p_regnum - AARCH64_SVE_V0_REGNUM];
}
+ if (is_sme_pseudo_register (gdbarch, regnum))
+ return tdep->sme_pseudo_names[regnum - tdep->sme_pseudo_base].c_str ();
+
/* RA_STATE is used for unwinding only. Do not assign it a name - this
prevents it from being read by methods such as
mi_cmd_trace_frame_collected. */
@@ -2741,6 +3058,9 @@ aarch64_pseudo_register_type (struct gdbarch *gdbarch, int regnum)
if (is_w_pseudo_register (gdbarch, regnum))
return builtin_type (gdbarch)->builtin_uint32;
+ if (is_sme_pseudo_register (gdbarch, regnum))
+ return aarch64_sme_pseudo_register_type (gdbarch, regnum);
+
if (tdep->has_pauth () && regnum == tdep->ra_sign_state_regnum)
return builtin_type (gdbarch)->builtin_uint64;
@@ -2773,6 +3093,8 @@ aarch64_pseudo_register_reggroup_p (struct gdbarch *gdbarch, int regnum,
else if (tdep->has_sve () && p_regnum >= AARCH64_SVE_V0_REGNUM
&& p_regnum < AARCH64_SVE_V0_REGNUM + AARCH64_V_REGS_NUM)
return group == all_reggroup || group == vector_reggroup;
+ else if (is_sme_pseudo_register (gdbarch, regnum))
+ return group == all_reggroup || group == vector_reggroup;
/* RA_STATE is used for unwinding only. Do not assign it to any groups. */
if (tdep->has_pauth () && regnum == tdep->ra_sign_state_regnum)
return 0;
@@ -2802,6 +3124,122 @@ aarch64_pseudo_read_value_1 (struct gdbarch *gdbarch,
return result_value;
}
+/* Helper function for reading/writing ZA pseudo-registers. Given REGNUM,
+ a ZA pseudo-register number, return, in OFFSETS, the information on positioning
+ of the bytes that must be read from/written to. */
+
+static void
+aarch64_za_offsets_from_regnum (struct gdbarch *gdbarch, int regnum,
+ struct za_offsets &offsets)
+{
+ aarch64_gdbarch_tdep *tdep = gdbarch_tdep<aarch64_gdbarch_tdep> (gdbarch);
+
+ gdb_assert (tdep->has_sme ());
+ gdb_assert (tdep->sme_svq > 0);
+ gdb_assert (tdep->sme_pseudo_base <= regnum);
+ gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count);
+
+ struct za_pseudo_encoding encoding;
+
+ /* Decode the ZA pseudo-register number. */
+ aarch64_za_decode_pseudos (gdbarch, regnum, encoding);
+
+ /* Fetch the streaming vector length. */
+ size_t svl = sve_vl_from_vq (tdep->sme_svq);
+
+ if (is_sme_tile_slice_pseudo_register (gdbarch, regnum))
+ {
+ if (encoding.horizontal)
+ {
+ /* Horizontal tile slices are contiguous ranges of svl bytes. */
+
+ /* The starting offset depends on the tile index (to locate the tile
+ in the ZA buffer), the slice index (to locate the slice within the
+ tile) and the qualifier. */
+ offsets.starting_offset
+ = encoding.tile_index * svl + encoding.slice_index
+ * (svl >> encoding.qualifier_index);
+ /* Horizontal tile slice data is contiguous and thus doesn't have
+ a stride. */
+ offsets.stride_size = 0;
+ /* Horizontal tile slice data is contiguous and thus only has 1
+ chunk. */
+ offsets.chunks = 1;
+ /* The chunk size is always svl bytes. */
+ offsets.chunk_size = svl;
+ }
+ else
+ {
+ /* Vertical tile slices are non-contiguous ranges of
+ (1 << qualifier_index) bytes. */
+
+ /* The starting offset depends on the tile number (to locate the
+ tile in the ZA buffer), the slice index (to locate the element
+ within the tile slice) and the qualifier. */
+ offsets.starting_offset
+ = encoding.tile_index * svl + encoding.slice_index
+ * (1 << encoding.qualifier_index);
+ /* The offset between vertical tile slices depends on the qualifier
+ and svl. */
+ offsets.stride_size = svl << encoding.qualifier_index;
+ /* The number of chunks depends on svl and the qualifier size. */
+ offsets.chunks = svl >> encoding.qualifier_index;
+ /* The chunk size depends on the qualifier. */
+ offsets.chunk_size = 1 << encoding.qualifier_index;
+ }
+ }
+ else
+ {
+ /* ZA tile pseudo-register. */
+
+ /* Starting offset depends on the tile index and qualifier. */
+ offsets.starting_offset = encoding.tile_index * svl;
+ /* The offset between tile slices depends on the qualifier and svl. */
+ offsets.stride_size = svl << encoding.qualifier_index;
+ /* The number of chunks depends on the qualifier and svl. */
+ offsets.chunks = svl >> encoding.qualifier_index;
+ /* The chunk size is always svl bytes. */
+ offsets.chunk_size = svl;
+ }
+}
+
+/* Given REGNUM, a SME pseudo-register number, return its value in RESULT. */
+
+static struct value *
+aarch64_sme_pseudo_register_read (struct gdbarch *gdbarch,
+ readable_regcache *regcache, int regnum,
+ struct value *result)
+{
+ aarch64_gdbarch_tdep *tdep = gdbarch_tdep<aarch64_gdbarch_tdep> (gdbarch);
+
+ gdb_assert (tdep->has_sme ());
+ gdb_assert (tdep->sme_svq > 0);
+ gdb_assert (tdep->sme_pseudo_base <= regnum);
+ gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count);
+
+ /* Fetch the offsets that we need in order to read from the correct blocks
+ of ZA. */
+ struct za_offsets offsets;
+ aarch64_za_offsets_from_regnum (gdbarch, regnum, offsets);
+
+ /* Fetch the contents of ZA. */
+ size_t svl = sve_vl_from_vq (tdep->sme_svq);
+ gdb::byte_vector za (std::pow (svl, 2));
+ regcache->raw_read (tdep->sme_za_regnum, za.data ());
+
+ /* Copy the requested data. */
+ for (int chunks = 0; chunks < offsets.chunks; chunks++)
+ {
+ const gdb_byte *source
+ = za.data () + offsets.starting_offset + chunks * offsets.stride_size;
+ gdb_byte *destination
+ = result->contents_raw ().data () + chunks * offsets.chunk_size;
+
+ memcpy (destination, source, offsets.chunk_size);
+ }
+ return result;
+}
+
/* Implement the "pseudo_register_read_value" gdbarch method. */
static struct value *
@@ -2835,6 +3273,9 @@ aarch64_pseudo_read_value (struct gdbarch *gdbarch, readable_regcache *regcache,
return result_value;
}
+ else if (is_sme_pseudo_register (gdbarch, regnum))
+ return aarch64_sme_pseudo_register_read (gdbarch, regcache, regnum,
+ result_value);
regnum -= gdbarch_num_regs (gdbarch);
@@ -2894,6 +3335,44 @@ aarch64_pseudo_write_1 (struct gdbarch *gdbarch, struct regcache *regcache,
regcache->raw_write (v_regnum, reg_buf);
}
+/* Given REGNUM, a SME pseudo-register number, store the bytes from DATA to the
+ pseudo-register. */
+
+static void
+aarch64_sme_pseudo_register_write (struct gdbarch *gdbarch,
+ struct regcache *regcache,
+ int regnum, const gdb_byte *data)
+{
+ aarch64_gdbarch_tdep *tdep = gdbarch_tdep<aarch64_gdbarch_tdep> (gdbarch);
+
+ gdb_assert (tdep->has_sme ());
+ gdb_assert (tdep->sme_svq > 0);
+ gdb_assert (tdep->sme_pseudo_base <= regnum);
+ gdb_assert (regnum < tdep->sme_pseudo_base + tdep->sme_pseudo_count);
+
+ /* Fetch the offsets that we need in order to write to the correct blocks
+ of ZA. */
+ struct za_offsets offsets;
+ aarch64_za_offsets_from_regnum (gdbarch, regnum, offsets);
+
+ /* Fetch the contents of ZA. */
+ size_t svl = sve_vl_from_vq (tdep->sme_svq);
+ gdb::byte_vector za (std::pow (svl, 2));
+
+ /* Copy the requested data. */
+ for (int chunks = 0; chunks < offsets.chunks; chunks++)
+ {
+ const gdb_byte *source = data + chunks * offsets.chunk_size;
+ gdb_byte *destination
+ = za.data () + offsets.starting_offset + chunks * offsets.stride_size;
+
+ memcpy (destination, source, offsets.chunk_size);
+ }
+
+ /* Write back to ZA. */
+ regcache->raw_write (tdep->sme_za_regnum, za.data ());
+}
+
/* Implement the "pseudo_register_write" gdbarch method. */
static void
@@ -2921,6 +3400,11 @@ aarch64_pseudo_write (struct gdbarch *gdbarch, struct regcache *regcache,
regcache->raw_write_part (x_regnum, offset, 4, buf);
return;
}
+ else if (is_sme_pseudo_register (gdbarch, regnum))
+ {
+ aarch64_sme_pseudo_register_write (gdbarch, regcache, regnum, buf);
+ return;
+ }
regnum -= gdbarch_num_regs (gdbarch);
@@ -3500,6 +3984,33 @@ aarch64_get_tdesc_vq (const struct target_desc *tdesc)
return sve_vq_from_vl (vl);
}
+
+/* Return the svq (streaming vector quotient) used when creating the target
+ description TDESC. */
+
+static uint64_t
+aarch64_get_tdesc_svq (const struct target_desc *tdesc)
+{
+ const struct tdesc_feature *feature_sme;
+
+ if (!tdesc_has_registers (tdesc))
+ return 0;
+
+ feature_sme = tdesc_find_feature (tdesc, "org.gnu.gdb.aarch64.sme");
+
+ if (feature_sme == nullptr)
+ return 0;
+
+ size_t svl_squared = tdesc_register_bitsize (feature_sme, "za");
+
+ /* We have the total size of the ZA matrix, in bits. Figure out the svl
+ value. */
+ size_t svl = std::sqrt (svl_squared / 8);
+
+ /* Now extract svq. */
+ return sve_vq_from_vl (svl);
+}
+
/* Get the AArch64 features present in the given target description. */
aarch64_features
@@ -3535,6 +4046,8 @@ aarch64_features_from_target_desc (const struct target_desc *tdesc)
features.tls = 1;
}
+ features.svq = aarch64_get_tdesc_svq (tdesc);
+
return features;
}
@@ -3654,6 +4167,35 @@ aarch64_remove_non_address_bits (struct gdbarch *gdbarch, CORE_ADDR pointer)
return aarch64_remove_top_bits (pointer, mask);
}
+/* Given NAMES, a vector of strings, initialize it with all the SME
+ pseudo-register names for the current streaming vector length. */
+
+static void
+aarch64_initialize_sme_pseudo_names (struct gdbarch *gdbarch,
+ std::vector<std::string> &names)
+{
+ aarch64_gdbarch_tdep *tdep = gdbarch_tdep<aarch64_gdbarch_tdep> (gdbarch);
+
+ gdb_assert (tdep->has_sme ());
+ gdb_assert (tdep->sme_tile_slice_pseudo_base > 0);
+ gdb_assert (tdep->sme_tile_pseudo_base > 0);
+
+ for (int i = 0; i < tdep->sme_tile_slice_pseudo_count; i++)
+ {
+ int regnum = tdep->sme_tile_slice_pseudo_base + i;
+ struct za_pseudo_encoding encoding;
+ aarch64_za_decode_pseudos (gdbarch, regnum, encoding);
+ names.push_back (aarch64_za_tile_slice_name (encoding));
+ }
+ for (int i = 0; i < AARCH64_ZA_TILES_NUM; i++)
+ {
+ int regnum = tdep->sme_tile_pseudo_base + i;
+ struct za_pseudo_encoding encoding;
+ aarch64_za_decode_pseudos (gdbarch, regnum, encoding);
+ names.push_back (aarch64_za_tile_name (encoding));
+ }
+}
+
/* Initialize the current architecture based on INFO. If possible,
re-use an architecture from ARCHES, which is a list of
architectures already created during this debugging session.
@@ -3671,11 +4213,17 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
int first_pauth_regnum = -1, ra_sign_state_offset = -1;
int first_mte_regnum = -1, first_tls_regnum = -1;
uint64_t vq = aarch64_get_tdesc_vq (info.target_desc);
+ uint64_t svq = aarch64_get_tdesc_svq (info.target_desc);
if (vq > AARCH64_MAX_SVE_VQ)
internal_error (_("VQ out of bounds: %s (max %d)"),
pulongest (vq), AARCH64_MAX_SVE_VQ);
+ if (svq > AARCH64_MAX_SVE_VQ)
+ internal_error (_("Streaming vector quotient (svq) out of bounds: %s"
+ " (max %d)"),
+ pulongest (svq), AARCH64_MAX_SVE_VQ);
+
/* If there is already a candidate, use it. */
for (gdbarch_list *best_arch = gdbarch_list_lookup_by_info (arches, &info);
best_arch != nullptr;
@@ -3683,15 +4231,21 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
{
aarch64_gdbarch_tdep *tdep
= gdbarch_tdep<aarch64_gdbarch_tdep> (best_arch->gdbarch);
- if (tdep && tdep->vq == vq)
+ if (tdep && tdep->vq == vq && tdep->sme_svq == svq)
return best_arch->gdbarch;
}
/* Ensure we always have a target descriptor, and that it is for the given VQ
value. */
const struct target_desc *tdesc = info.target_desc;
- if (!tdesc_has_registers (tdesc))
- tdesc = aarch64_read_description ({});
+ if (!tdesc_has_registers (tdesc) || vq != aarch64_get_tdesc_vq (tdesc)
+ || svq != aarch64_get_tdesc_svq (tdesc))
+ {
+ aarch64_features features;
+ features.vq = vq;
+ features.svq = svq;
+ tdesc = aarch64_read_description (features);
+ }
gdb_assert (tdesc);
feature_core = tdesc_find_feature (tdesc,"org.gnu.gdb.aarch64.core");
@@ -3755,6 +4309,35 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
num_pseudo_regs += 32; /* add the Bn scalar register pseudos */
}
+ int first_sme_regnum = -1;
+ int first_sme_pseudo_regnum = -1;
+ const struct tdesc_feature *feature_sme
+ = tdesc_find_feature (tdesc, "org.gnu.gdb.aarch64.sme");
+ if (feature_sme != nullptr)
+ {
+ /* Record the first SME register. */
+ first_sme_regnum = num_regs;
+
+ valid_p &= tdesc_numbered_register (feature_sme, tdesc_data.get (),
+ num_regs++, "svg");
+
+ valid_p &= tdesc_numbered_register (feature_sme, tdesc_data.get (),
+ num_regs++, "svcr");
+
+ valid_p &= tdesc_numbered_register (feature_sme, tdesc_data.get (),
+ num_regs++, "za");
+
+ /* Record the first SME pseudo register. */
+ first_sme_pseudo_regnum = num_pseudo_regs;
+
+ /* Add the ZA tile slice pseudo registers. The number of tile slice
+ pseudo-registers depend on the svl, and is always a multiple of 5. */
+ num_pseudo_regs += (svq << 5) * 5;
+
+ /* Add the ZA tile pseudo registers. */
+ num_pseudo_regs += AARCH64_ZA_TILES_NUM;
+ }
+
/* Add the TLS register. */
int tls_register_count = 0;
if (feature_tls != nullptr)
@@ -3868,6 +4451,14 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
tdep->tls_regnum_base = first_tls_regnum;
tdep->tls_register_count = tls_register_count;
+ /* Set the SME register set details. The pseudo-registers will be adjusted
+ later. */
+ tdep->sme_reg_base = first_sme_regnum;
+ tdep->sme_svg_regnum = first_sme_regnum;
+ tdep->sme_svcr_regnum = first_sme_regnum + 1;
+ tdep->sme_za_regnum = first_sme_regnum + 2;
+ tdep->sme_svq = svq;
+
set_gdbarch_push_dummy_call (gdbarch, aarch64_push_dummy_call);
set_gdbarch_frame_align (gdbarch, aarch64_frame_align);
@@ -3984,6 +4575,86 @@ aarch64_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
set_gdbarch_remove_non_address_bits (gdbarch,
aarch64_remove_non_address_bits);
+ /* SME pseudo-registers. */
+ if (tdep->has_sme ())
+ {
+ tdep->sme_pseudo_base = num_regs + first_sme_pseudo_regnum;
+ tdep->sme_tile_slice_pseudo_base = tdep->sme_pseudo_base;
+ tdep->sme_tile_slice_pseudo_count = (svq * 32) * 5;
+ tdep->sme_tile_pseudo_base
+ = tdep->sme_pseudo_base + tdep->sme_tile_slice_pseudo_count;
+ tdep->sme_pseudo_count
+ = tdep->sme_tile_slice_pseudo_count + AARCH64_ZA_TILES_NUM;
+
+ /* The SME ZA pseudo-registers are a set of 160 to 2560 pseudo-registers
+ depending on the value of svl.
+
+ The tile pseudo-registers are organized around their qualifiers
+ (b, h, s, d and q). Their numbers are distributed as follows:
+
+ b 0
+ h 1~2
+ s 3~6
+ d 7~14
+ q 15~30
+
+ The naming of the tile pseudo-registers follows the pattern za<t><q>,
+ where:
+
+ <t> is the tile number, with the following possible values based on
+ the qualifiers:
+
+ Qualifier - Allocated indexes
+
+ b - 0
+ h - 0~1
+ s - 0~3
+ d - 0~7
+ q - 0~15
+
+ <q> is the qualifier: b, h, s, d and q.
+
+ The tile slice pseudo-registers are organized around their
+ qualifiers as well (b, h, s, d and q), but also around their
+ direction (h - horizontal and v - vertical).
+
+ Even-numbered tile slice pseudo-registers are horizontally-oriented
+ and odd-numbered tile slice pseudo-registers are vertically-oriented.
+
+ Their numbers are distributed as follows:
+
+ Qualifier - Allocated indexes
+
+ b tile slices - 0~511
+ h tile slices - 512~1023
+ s tile slices - 1024~1535
+ d tile slices - 1536~2047
+ q tile slices - 2048~2559
+
+ The naming of the tile slice pseudo-registers follows the pattern
+ za<t><d><q><s>, where:
+
+ <t> is the tile number as described for the tile pseudo-registers.
+ <d> is the direction of the tile slice (h or v)
+ <q> is the qualifier of the tile slice (b, h, s, d or q)
+ <s> is the slice number, defined as follows:
+
+ Qualifier - Allocated indexes
+
+ b - 0~15
+ h - 0~7
+ s - 0~3
+ d - 0~1
+ q - 0
+
+ We have helper functions to translate to/from register index from/to
+ the set of fields that make the pseudo-register names. */
+
+ /* Build the array of pseudo-register names available for this
+ particular gdbarch configuration. */
+ aarch64_initialize_sme_pseudo_names (gdbarch, tdep->sme_pseudo_names);
+ }
+
/* Add standard register aliases. */
for (i = 0; i < ARRAY_SIZE (aarch64_register_aliases); i++)
user_reg_add (gdbarch, aarch64_register_aliases[i].name,
@@ -4005,6 +4676,48 @@ aarch64_dump_tdep (struct gdbarch *gdbarch, struct ui_file *file)
gdb_printf (file, _("aarch64_dump_tdep: Lowest pc = 0x%s\n"),
paddress (gdbarch, tdep->lowest_pc));
+
+ /* SME fields. */
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_q = %s\n"),
+ host_address_to_string (tdep->sme_tile_type_q));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_d = %s\n"),
+ host_address_to_string (tdep->sme_tile_type_d));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_s = %s\n"),
+ host_address_to_string (tdep->sme_tile_type_s));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_h = %s\n"),
+ host_address_to_string (tdep->sme_tile_type_h));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_type_n = %s\n"),
+ host_address_to_string (tdep->sme_tile_type_b));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_q = %s\n"),
+ host_address_to_string (tdep->sme_tile_slice_type_q));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_d = %s\n"),
+ host_address_to_string (tdep->sme_tile_slice_type_d));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_s = %s\n"),
+ host_address_to_string (tdep->sme_tile_slice_type_s));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_h = %s\n"),
+ host_address_to_string (tdep->sme_tile_slice_type_h));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_type_b = %s\n"),
+ host_address_to_string (tdep->sme_tile_slice_type_b));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_reg_base = %s\n"),
+ pulongest (tdep->sme_reg_base));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_svg_regnum = %s\n"),
+ pulongest (tdep->sme_svg_regnum));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_svcr_regnum = %s\n"),
+ pulongest (tdep->sme_svcr_regnum));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_za_regnum = %s\n"),
+ pulongest (tdep->sme_za_regnum));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_pseudo_base = %s\n"),
+ pulongest (tdep->sme_pseudo_base));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_pseudo_count = %s\n"),
+ pulongest (tdep->sme_pseudo_count));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_pseudo_base = %s\n"),
+ pulongest (tdep->sme_tile_slice_pseudo_base));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_slice_pseudo_count = %s\n"),
+ pulongest (tdep->sme_tile_slice_pseudo_count));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_tile_pseudo_base = %s\n"),
+ pulongest (tdep->sme_tile_pseudo_base));
+ gdb_printf (file, _("aarch64_dump_tdep: sme_svq = %s\n"),
+ pulongest (tdep->sme_svq));
}
#if GDB_SELF_TEST
@@ -80,6 +80,22 @@ struct aarch64_gdbarch_tdep : gdbarch_tdep_base
struct type *vnb_type = nullptr;
struct type *vnv_type = nullptr;
+ /* Types for SME ZA tiles and tile slices pseudo-registers. */
+ struct type *sme_tile_type_q = nullptr;
+ struct type *sme_tile_type_d = nullptr;
+ struct type *sme_tile_type_s = nullptr;
+ struct type *sme_tile_type_h = nullptr;
+ struct type *sme_tile_type_b = nullptr;
+ struct type *sme_tile_slice_type_q = nullptr;
+ struct type *sme_tile_slice_type_d = nullptr;
+ struct type *sme_tile_slice_type_s = nullptr;
+ struct type *sme_tile_slice_type_h = nullptr;
+ struct type *sme_tile_slice_type_b = nullptr;
+
+ /* Vector of names for SME pseudo-registers. The number of elements is
+ different for each distinct svl value. */
+ std::vector<std::string> sme_pseudo_names;
+
/* syscall record. */
int (*aarch64_syscall_record) (struct regcache *regcache,
unsigned long svc_number) = nullptr;
@@ -125,6 +141,37 @@ struct aarch64_gdbarch_tdep : gdbarch_tdep_base
/* The W pseudo-registers. */
int w_pseudo_base = 0;
int w_pseudo_count = 0;
+
+ /* SME feature fields. */
+
+ /* Index of the first SME register. This is -1 if SME is not supported. */
+ int sme_reg_base = 0;
+ /* svg register index. */
+ int sme_svg_regnum = 0;
+ /* svcr register index. */
+ int sme_svcr_regnum = 0;
+ /* ZA register index. */
+ int sme_za_regnum = 0;
+ /* Index of the first SME pseudo-register. This is -1 if SME is not
+ supported. */
+ int sme_pseudo_base = 0;
+ /* Total number of SME pseudo-registers. */
+ int sme_pseudo_count = 0;
+ /* First tile slice pseudo-register index. */
+ int sme_tile_slice_pseudo_base = 0;
+ /* Total number of tile slice pseudo-registers. */
+ int sme_tile_slice_pseudo_count = 0;
+ /* First tile pseudo-register index. */
+ int sme_tile_pseudo_base = 0;
+ /* The streaming vector quotient (svq) for SME, or zero if SME is not
+ supported. */
+ size_t sme_svq = 0;
+
+ /* Return true if the target supports SME, and false otherwise. */
+ bool has_sme () const
+ {
+ return sme_svq != 0;
+ }
};
const target_desc *aarch64_read_description (const aarch64_features &features);
new file mode 100644
@@ -0,0 +1,21 @@
+/* Common Linux arch-specific functionality for AArch64 scalable
+ extensions: SVE and SME.
+
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of GDB.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "arch/aarch64-scalable-linux.h"
new file mode 100644
@@ -0,0 +1,38 @@
+/* Common AArch64 Linux arch-specific definitions for the scalable
+ extensions: SVE and SME.
+
+ Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of GDB.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef ARCH_AARCH64_SCALABLE_LINUX_H
+#define ARCH_AARCH64_SCALABLE_LINUX_H
+
+#include "gdbsupport/common-defs.h"
+
+/* Feature check for Scalable Matrix Extension. */
+#ifndef HWCAP2_SME
+#define HWCAP2_SME (1 << 23)
+#endif
+
+/* Streaming mode enabled/disabled bit. */
+#define SVCR_SM_BIT (1 << 0)
+/* ZA enabled/disabled bit. */
+#define SVCR_ZA_BIT (1 << 1)
+/* Mask including all valid SVCR bits. */
+#define SVCR_BIT_MASK (SVCR_SM_BIT | SVCR_ZA_BIT)
+
+#endif /* ARCH_AARCH64_SCALABLE_LINUX_H */
@@ -24,6 +24,7 @@
#include "../features/aarch64-sve.c"
#include "../features/aarch64-pauth.c"
#include "../features/aarch64-mte.c"
+#include "../features/aarch64-sme.c"
#include "../features/aarch64-tls.c"
/* See arch/aarch64.h. */
@@ -57,6 +58,10 @@ aarch64_create_target_description (const aarch64_features &features)
if (features.tls > 0)
regnum = create_feature_aarch64_tls (tdesc.get (), regnum, features.tls);
+ if (features.svq)
+ regnum = create_feature_aarch64_sme (tdesc.get (), regnum,
+ sve_vl_from_vq (features.svq));
+
return tdesc.release ();
}
@@ -27,15 +27,27 @@
struct aarch64_features
{
/* A non zero VQ value indicates both the presence of SVE and the
- Vector Quotient - the number of 128bit chunks in an SVE Z
- register. */
- uint64_t vq = 0;
+ Vector Quotient - the number of 128-bit chunks in an SVE Z
+ register.
+ The maximum value for VQ is 16 (5 bits). */
+ uint64_t vq = 0;
bool pauth = false;
bool mte = false;
/* A positive TLS value indicates the number of TLS registers available. */
uint8_t tls = 0;
+ /* The allowed values for SVQ are the following:
+
+ 0 - SME is not supported/available.
+ 1 - SME is available, SVL is 16 bytes / 128-bit.
+ 2 - SME is available, SVL is 32 bytes / 256-bit.
+ 4 - SME is available, SVL is 64 bytes / 512-bit.
+ 8 - SME is available, SVL is 128 bytes / 1024-bit.
+ 16 - SME is available, SVL is 256 bytes / 2048-bit.
+
+ These use at most 5 bits to represent. */
+ uint8_t svq = 0;
};
inline bool operator==(const aarch64_features &lhs, const aarch64_features &rhs)
@@ -43,7 +55,8 @@ inline bool operator==(const aarch64_features &lhs, const aarch64_features &rhs)
return lhs.vq == rhs.vq
&& lhs.pauth == rhs.pauth
&& lhs.mte == rhs.mte
- && lhs.tls == rhs.tls;
+ && lhs.tls == rhs.tls
+ && lhs.svq == rhs.svq;
}
namespace std
@@ -61,6 +74,11 @@ namespace std
/* Shift by two bits for now. We may need to increase this in the future
if more TLS registers get added. */
h = h << 2 | features.tls;
+
+ /* Make sure the SVQ values are within the limits. */
+ gdb_assert (features.svq >= 0);
+ gdb_assert (features.svq <= 16);
+ h = h << 5 | (features.svq & 0x5);
return h;
}
};
@@ -171,4 +189,35 @@ enum aarch64_regnum
/* Maximum supported VQ value. Increase if required. */
#define AARCH64_MAX_SVE_VQ 16
+/* SME definitions
+
+ Some of these definitions are not found in the Architecture Reference
+ Manual, but we use them so we can keep a similar standard compared to the
+ SVE definitions that the Linux Kernel uses. Otherwise it can get
+ confusing.
+
+ SVL : Streaming Vector Length.
+ Although the documentation handles SVL in bits, we do it in
+ bytes to match what we do for SVE.
+
+ The streaming vector length dictates the size of the ZA register and
+ the size of the SVE registers when in streaming mode.
+
+ SVQ : Streaming Vector Quotient.
+ The number of 128-bit chunks in an SVE Z register or the size of
+ each dimension of the SME ZA matrix.
+
+ SVG : Streaming Vector Granule.
+ The number of 64-bit chunks in an SVE Z register or the size of
+ half a SME ZA matrix dimension. The SVG definition was added so
+ we keep a familiar definition when dealing with SVE registers in
+ streaming mode. */
+
+/* The total number of tiles. This is always fixed regardless of the
+ streaming vector length (svl). */
+#define AARCH64_ZA_TILES_NUM 31
+/* svl limits for SME. */
+#define AARCH64_SME_MIN_SVL 128
+#define AARCH64_SME_MAX_SVL 2048
+
#endif /* ARCH_AARCH64_H */
@@ -146,6 +146,7 @@ aarch64*-*-linux*)
# Target: AArch64 linux
gdb_target_obs="aarch64-linux-tdep.o arch/aarch64.o\
arch/aarch64-mte-linux.o \
+ arch/aarch64-scalable-linux.o \
arch/arm.o arch/arm-linux.o arch/arm-get-next-pcs.o \
arm-tdep.o arm-linux-tdep.o \
glibc-tdep.o linux-tdep.o solib-svr4.o \
new file mode 100644
@@ -0,0 +1,63 @@
+/* Copyright (C) 2023 Free Software Foundation, Inc.
+
+ This file is part of GDB.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include "gdbsupport/tdesc.h"
+#include <cmath>
+
+/* This function is NOT auto generated from xml. Create the AArch64 SME
+ feature into RESULT. SVL is the streaming vector length in bytes.
+
+ The ZA register has a total size of SVL x SVL.
+
+ When in Streaming SVE mode, the effective SVE vector length, VL, is equal
+ to SVL. */
+
+static int
+create_feature_aarch64_sme (struct target_desc *result, long regnum,
+ size_t svl)
+{
+ struct tdesc_feature *feature;
+ tdesc_type *element_type;
+
+ feature = tdesc_create_feature (result, "org.gnu.gdb.aarch64.sme");
+
+ /* The SVG register. */
+ tdesc_create_reg (feature, "svg", regnum++, 1, nullptr, 64, "int");
+
+ /* SVCR flags type. */
+ tdesc_type_with_fields *type_with_fields
+ = tdesc_create_flags (feature, "svcr_flags", 8);
+ tdesc_add_flag (type_with_fields, 0, "SM");
+ tdesc_add_flag (type_with_fields, 1, "ZA");
+
+ /* The SVCR register. */
+ tdesc_create_reg (feature, "svcr", regnum++, 1, nullptr, 64, "svcr_flags");
+
+ /* Byte type. */
+ element_type = tdesc_named_type (feature, "uint8");
+ /* Vector of bytes. */
+ element_type = tdesc_create_vector (feature, "sme_bv", element_type,
+ svl);
+ /* Vector of vector of bytes (Matrix). */
+ element_type = tdesc_create_vector (feature, "sme_bvv", element_type,
+ svl);
+
+ /* The following is the ZA register set. */
+ tdesc_create_reg (feature, "za", regnum++, 1, nullptr,
+ std::pow (svl, 2) * 8, "sme_bvv");
+ return regnum;
+}
@@ -1,5 +1,4 @@
-/* Common target dependent routines for AArch64 Scalable Extensions
- (SVE/SME).
+/* Common native Linux code for the AArch64 scalable extensions: SVE and SME.
Copyright (C) 2018-2023 Free Software Foundation, Inc.
@@ -28,6 +27,193 @@
#include "gdbsupport/common-regcache.h"
#include "gdbsupport/byte-vector.h"
#include <endian.h>
+#include "arch/aarch64-scalable-linux.h"
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+aarch64_has_sve_state (int tid)
+{
+ struct user_sve_header header;
+
+ if (!read_sve_header (tid, header))
+ return false;
+
+ if ((header.flags & SVE_PT_REGS_SVE) == 0)
+ return false;
+
+ if (sizeof (header) == header.size)
+ return false;
+
+ return true;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+aarch64_has_ssve_state (int tid)
+{
+ struct user_sve_header header;
+
+ if (!read_ssve_header (tid, header))
+ return false;
+
+ if ((header.flags & SVE_PT_REGS_SVE) == 0)
+ return false;
+
+ if (sizeof (header) == header.size)
+ return false;
+
+ return true;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+aarch64_has_za_state (int tid)
+{
+ struct user_za_header header;
+
+ if (!read_za_header (tid, header))
+ return false;
+
+ if (sizeof (header) == header.size)
+ return false;
+
+ return true;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+read_sve_header (int tid, struct user_sve_header &header)
+{
+ struct iovec iovec;
+
+ iovec.iov_len = sizeof (header);
+ iovec.iov_base = &header;
+
+ if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SVE, &iovec) < 0)
+ {
+ /* SVE is not supported. */
+ return false;
+ }
+ return true;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+write_sve_header (int tid, const struct user_sve_header &header)
+{
+ struct iovec iovec;
+
+ iovec.iov_len = sizeof (header);
+ iovec.iov_base = (void *) &header;
+
+ if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_SVE, &iovec) < 0)
+ {
+ /* SVE is not supported. */
+ return false;
+ }
+ return true;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+read_ssve_header (int tid, struct user_sve_header &header)
+{
+ struct iovec iovec;
+
+ iovec.iov_len = sizeof (header);
+ iovec.iov_base = &header;
+
+ if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SSVE, &iovec) < 0)
+ {
+ /* SSVE is not supported. */
+ return false;
+ }
+ return true;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+write_ssve_header (int tid, const struct user_sve_header &header)
+{
+ struct iovec iovec;
+
+ iovec.iov_len = sizeof (header);
+ iovec.iov_base = (void *) &header;
+
+ if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_SSVE, &iovec) < 0)
+ {
+ /* SSVE is not supported. */
+ return false;
+ }
+ return true;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+read_za_header (int tid, struct user_za_header &header)
+{
+ struct iovec iovec;
+
+ iovec.iov_len = sizeof (header);
+ iovec.iov_base = &header;
+
+ if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_ZA, &iovec) < 0)
+ {
+ /* ZA is not supported. */
+ return false;
+ }
+ return true;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+write_za_header (int tid, const struct user_za_header &header)
+{
+ struct iovec iovec;
+
+ iovec.iov_len = sizeof (header);
+ iovec.iov_base = (void *) &header;
+
+ if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_ZA, &iovec) < 0)
+ {
+ /* ZA is not supported. */
+ return false;
+ }
+ return true;
+}
+
+/* Given VL, the streaming vector length for SME, return true if it is valid
+ and false otherwise. */
+
+static bool
+aarch64_sme_vl_valid (size_t vl)
+{
+ return (vl == 16 || vl == 32 || vl == 64 || vl == 128 || vl == 256);
+}
+
+/* Given VL, the vector length for SVE, return true if it is valid and false
+ otherwise. SVE_state is true when the check is for the SVE register set.
+ Otherwise the check is for the SSVE register set. */
+
+static bool
+aarch64_sve_vl_valid (const bool sve_state, size_t vl)
+{
+ if (sve_state)
+ return sve_vl_valid (vl);
+
+ /* We have an active SSVE state, where the valid vector length values are
+ more restrictive. */
+ return aarch64_sme_vl_valid (vl);
+}
/* See nat/aarch64-scalable-linux-ptrace.h. */
@@ -36,23 +222,25 @@ aarch64_sve_get_vq (int tid)
{
struct iovec iovec;
struct user_sve_header header;
-
iovec.iov_len = sizeof (header);
iovec.iov_base = &header;
- /* Ptrace gives the vector length in bytes. Convert it to VQ, the number of
- 128bit chunks in a Z register. We use VQ because 128bits is the minimum
- a Z register can increase in size. */
-
- if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SVE, &iovec) < 0)
+ /* Figure out which register set to use for the request. The vector length
+ for SVE can be different from the vector length for SSVE. */
+ bool has_sve_state = !aarch64_has_ssve_state (tid);
+ if (ptrace (PTRACE_GETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE,
+ &iovec) < 0)
{
/* SVE is not supported. */
return 0;
}
+ /* Ptrace gives the vector length in bytes. Convert it to VQ, the number of
+ 128bit chunks in a Z register. We use VQ because 128 bits is the minimum
+ a Z register can increase in size. */
uint64_t vq = sve_vq_from_vl (header.vl);
- if (!sve_vl_valid (header.vl))
+ if (!aarch64_sve_vl_valid (has_sve_state, header.vl))
{
warning (_("Invalid SVE state from kernel; SVE disabled."));
return 0;
@@ -72,15 +260,20 @@ aarch64_sve_set_vq (int tid, uint64_t vq)
iovec.iov_len = sizeof (header);
iovec.iov_base = &header;
- if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SVE, &iovec) < 0)
+ /* Figure out which register set to use for the request. The vector length
+ for SVE can be different from the vector length for SSVE. */
+ bool has_sve_state = !aarch64_has_ssve_state (tid);
+ if (ptrace (PTRACE_GETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE,
+ &iovec) < 0)
{
- /* SVE is not supported. */
+ /* SVE/SSVE is not supported. */
return false;
}
header.vl = sve_vl_from_vq (vq);
- if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_SVE, &iovec) < 0)
+ if (ptrace (PTRACE_SETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE,
+ &iovec) < 0)
{
/* Vector length change failed. */
return false;
@@ -120,13 +313,108 @@ aarch64_sve_set_vq (int tid, struct reg_buffer_common *reg_buf)
/* See nat/aarch64-scalable-linux-ptrace.h. */
+uint64_t
+aarch64_za_get_svq (int tid)
+{
+ struct user_za_header header;
+ if (!read_za_header (tid, header))
+ return 0;
+
+ uint64_t vq = sve_vq_from_vl (header.vl);
+
+ if (!aarch64_sve_vl_valid (false, header.vl))
+ {
+ warning (_("Invalid ZA state from kernel; ZA disabled."));
+ return 0;
+ }
+
+ return vq;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+aarch64_za_set_svq (int tid, uint64_t vq)
+{
+ struct iovec iovec;
+
+ /* Read the NT_ARM_ZA header. */
+ struct user_za_header header;
+ if (!read_za_header (tid, header))
+ {
+ /* ZA is not supported. */
+ return false;
+ }
+
+ /* If the size is the correct one already, don't update it. If we do
+ update the streaming vector length, we will invalidate the register
+ state for ZA, and we do not want that. */
+ if (header.vl == sve_vl_from_vq (vq))
+ return true;
+
+ /* The streaming vector length is about to get updated. Set the new value
+ in the NT_ARM_ZA header and adjust the size as well. */
+
+ header.vl = sve_vl_from_vq (vq);
+ header.size = sizeof (struct user_za_header);
+
+ /* Update the NT_ARM_ZA register set with the new streaming vector
+ length. */
+ iovec.iov_len = sizeof (header);
+ iovec.iov_base = &header;
+
+ if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_ZA, &iovec) < 0)
+ {
+ /* Streaming vector length change failed. */
+ return false;
+ }
+
+ /* At this point we have successfully adjusted the streaming vector length
+ for the NT_ARM_ZA register set, and it should have no payload
+ (no ZA state). */
+
+ return true;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+bool
+aarch64_za_set_svq (int tid, const struct reg_buffer_common *reg_buf,
+ int svg_regnum)
+{
+ uint64_t reg_svg = 0;
+
+ /* The svg register may not be valid if we've not collected any value yet.
+ This can happen, for example, if we're restoring the regcache after an
+ inferior function call, and the svg register comes after the Z
+ registers. */
+ if (reg_buf->get_register_status (svg_regnum) != REG_VALID)
+ {
+ /* If svg is not available yet, fetch it from ptrace. The svg value from
+ ptrace is likely the correct one. */
+ uint64_t svq = aarch64_za_get_svq (tid);
+
+ /* If something went wrong, just bail out. */
+ if (svq == 0)
+ return false;
+
+ reg_svg = sve_vg_from_vq (svq);
+ }
+ else
+ reg_buf->raw_collect (svg_regnum, ®_svg);
+
+ return aarch64_za_set_svq (tid, sve_vq_from_vg (reg_svg));
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
gdb::byte_vector
aarch64_fetch_sve_regset (int tid)
{
uint64_t vq = aarch64_sve_get_vq (tid);
if (vq == 0)
- perror_with_name (_("Unable to fetch SVE vector length"));
+ perror_with_name (_("Unable to fetch SVE/SSVE vector length"));
/* A ptrace call with NT_ARM_SVE will return a header followed by either a
dump of all the SVE and FP registers, or an fpsimd structure (identical to
@@ -139,8 +427,10 @@ aarch64_fetch_sve_regset (int tid)
iovec.iov_base = sve_state.data ();
iovec.iov_len = sve_state.size ();
- if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_SVE, &iovec) < 0)
- perror_with_name (_("Unable to fetch SVE registers"));
+ bool has_sve_state = !aarch64_has_ssve_state (tid);
+ if (ptrace (PTRACE_GETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE,
+ &iovec) < 0)
+ perror_with_name (_("Unable to fetch SVE/SSVE registers"));
return sve_state;
}
@@ -155,8 +445,82 @@ aarch64_store_sve_regset (int tid, const gdb::byte_vector &sve_state)
iovec.iov_base = (void *) sve_state.data ();
iovec.iov_len = sve_state.size ();
- if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_SVE, &iovec) < 0)
- perror_with_name (_("Unable to store SVE registers"));
+ bool has_sve_state = !aarch64_has_ssve_state (tid);
+ if (ptrace (PTRACE_SETREGSET, tid, has_sve_state? NT_ARM_SVE : NT_ARM_SSVE,
+ &iovec) < 0)
+ perror_with_name (_("Unable to store SVE/SSVE registers"));
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+gdb::byte_vector
+aarch64_fetch_za_regset (int tid)
+{
+ struct user_za_header header;
+ if (!read_za_header (tid, header))
+ error (_("Failed to read NT_ARM_ZA header."));
+
+ if (!aarch64_sme_vl_valid (header.vl))
+ error (_("Found invalid vector length for NT_ARM_ZA."));
+
+ struct iovec iovec;
+ iovec.iov_len = header.size;
+ gdb::byte_vector za_state (header.size);
+ iovec.iov_base = za_state.data ();
+
+ if (ptrace (PTRACE_GETREGSET, tid, NT_ARM_ZA, &iovec) < 0)
+ perror_with_name (_("Failed to fetch NT_ARM_ZA register set."));
+
+ return za_state;
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+void
+aarch64_store_za_regset (int tid, const gdb::byte_vector &za_state)
+{
+ struct iovec iovec;
+ /* We need to cast from (const void *) here. */
+ iovec.iov_base = (void *) za_state.data ();
+ iovec.iov_len = za_state.size ();
+
+ if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_ZA, &iovec) < 0)
+ perror_with_name (_("Failed to write to the NT_ARM_ZA register set."));
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+void
+aarch64_initialize_za_regset (int tid)
+{
+ /* First fetch the NT_ARM_ZA header so we can fetch the streaming vector
+ length. */
+ struct user_za_header header;
+ if (!read_za_header (tid, header))
+ error (_("Failed to read NT_ARM_ZA header."));
+
+ /* The vector should be default-initialized to zero, and we should account
+ for the payload as well. */
+ std::vector<gdb_byte> za_new_state (ZA_PT_SIZE (sve_vq_from_vl (header.vl)));
+
+ /* Adjust the header size since we are adding the initialized ZA
+ payload. */
+ header.size = ZA_PT_SIZE (sve_vq_from_vl (header.vl));
+
+ /* Overlay the modified header onto the new ZA state. */
+ const gdb_byte *base = (gdb_byte *) &header;
+ memcpy (za_new_state.data (), base, sizeof (user_za_header));
+
+ /* Set the ptrace request up and update the NT_ARM_ZA register set. */
+ struct iovec iovec;
+ iovec.iov_len = za_new_state.size ();
+ iovec.iov_base = za_new_state.data ();
+
+ if (ptrace (PTRACE_SETREGSET, tid, NT_ARM_ZA, &iovec) < 0)
+ perror_with_name (_("Failed to initialize the NT_ARM_ZA register set."));
+
+ /* The NT_ARM_ZA register set should now contain a zero-initialized ZA
+ payload. */
}
/* If we are running in BE mode, byteswap the contents
@@ -451,3 +815,177 @@ aarch64_sve_regs_copy_from_reg_buf (int tid,
passed on to ptrace. */
aarch64_store_sve_regset (tid, new_state);
}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+void
+aarch64_za_regs_copy_to_reg_buf (int tid, struct reg_buffer_common *reg_buf,
+ int za_regnum, int svg_regnum,
+ int svcr_regnum)
+{
+ /* Fetch the current ZA state from the thread. */
+ gdb::byte_vector za_state = aarch64_fetch_za_regset (tid);
+
+ /* Sanity check. */
+ gdb_assert (!za_state.empty ());
+
+ char *base = (char *) za_state.data ();
+ struct user_za_header *header = (struct user_za_header *) base;
+
+ /* If we have ZA state, read it. Otherwise, make the contents of ZA
+ in the register cache all zeroes. This is how we present the ZA
+ state when it is not initialized. */
+ uint64_t svcr_value = 0;
+ if (aarch64_has_za_state (tid))
+ {
+ /* Sanity check the data in the header. */
+ if (!sve_vl_valid (header->vl)
+ || ZA_PT_SIZE (sve_vq_from_vl (header->vl)) != header->size)
+ {
+ error (_("Found invalid streaming vector length in NT_ARM_ZA"
+ " register set"));
+ }
+
+ reg_buf->raw_supply (za_regnum, base + ZA_PT_ZA_OFFSET);
+ svcr_value |= SVCR_ZA_BIT;
+ }
+ else
+ {
+ size_t za_bytes = header->vl * header->vl;
+ gdb_byte za_zeroed[za_bytes];
+ memset (za_zeroed, 0, za_bytes);
+ reg_buf->raw_supply (za_regnum, za_zeroed);
+ }
+
+ /* Handle the svg and svcr registers separately. We need to calculate
+ their values manually, as the Linux Kernel doesn't expose those
+ explicitly. */
+ svcr_value |= aarch64_has_ssve_state (tid)? SVCR_SM_BIT : 0;
+ uint64_t svg_value = sve_vg_from_vl (header->vl);
+
+ /* Update the contents of svg and svcr registers. */
+ reg_buf->raw_supply (svg_regnum, &svg_value);
+ reg_buf->raw_supply (svcr_regnum, &svcr_value);
+
+ /* The register buffer should now contain the updated copy of the NT_ARM_ZA
+ state. */
+}
+
+/* See nat/aarch64-scalable-linux-ptrace.h. */
+
+void
+aarch64_za_regs_copy_from_reg_buf (int tid,
+ struct reg_buffer_common *reg_buf,
+ int za_regnum, int svg_regnum,
+ int svcr_regnum)
+{
+ /* REG_BUF contains the updated ZA state. We need to extract that state
+ and write it to the thread TID. */
+
+
+ /* First check if there is a change to the streaming vector length. Two
+ outcomes are possible here:
+
+ 1 - The streaming vector length in the register cache differs from the
+ one currently on the thread state. This means that we will need to
+ update the NT_ARM_ZA register set to reflect the new streaming vector
+ length.
+
+ 2 - The streaming vector length in the register cache is the same as in
+ the thread state. This means we do not need to update the NT_ARM_ZA
+ register set for a new streaming vector length, and we only need to
+ deal with changes to za, svg and svcr.
+
+ None of the two possibilities above imply that the ZA state actually
+ exists. They only determine what needs to be done with any ZA content
+ based on the state of the streaming vector length. */
+
+ /* First fetch the NT_ARM_ZA header so we can fetch the streaming vector
+ length. */
+ struct user_za_header header;
+ if (!read_za_header (tid, header))
+ error (_("Failed to read NT_ARM_ZA header."));
+
+ /* Fetch the current streaming vector length. */
+ uint64_t old_svg = sve_vg_from_vl (header.vl);
+
+ /* Fetch the (potentially) new streaming vector length. */
+ uint64_t new_svg;
+ reg_buf->raw_collect (svg_regnum, &new_svg);
+
+ /* Did the streaming vector length change? */
+ bool svg_changed = new_svg != old_svg;
+
+ /* First store the streaming vector length to the thread. This is done
+ first to ensure the ptrace buffers read from the kernel are the correct
+ size. If the streaming vector length is the same as the current one, it
+ won't be updated. */
+ if (!aarch64_za_set_svq (tid, reg_buf, svg_regnum))
+ error (_("Unable to set svg register"));
+
+ bool has_za_state = aarch64_has_za_state (tid);
+
+ size_t za_bytes = sve_vl_from_vg (old_svg) * sve_vl_from_vg (old_svg);
+ gdb_byte za_zeroed[za_bytes];
+ memset (za_zeroed, 0, za_bytes);
+
+ /* If the streaming vector length changed, zero out the contents of ZA in
+ the register cache. Otherwise, we will need to update the ZA contents
+ in the thread with the ZA contents from the register cache, and they will
+ differ in size. */
+ if (svg_changed)
+ reg_buf->raw_supply (za_regnum, za_zeroed);
+
+ /* When we update svg, we don't automatically initialize the ZA buffer. If
+ we have no ZA state and the ZA register contents in the register cache are
+ zero, just return and leave the ZA register cache contents as zero. */
+ if (!has_za_state
+ && reg_buf->raw_compare (za_regnum, za_zeroed, 0))
+ {
+ /* No ZA state in the thread or in the register cache. This was likely
+ just an adjustment of the streaming vector length. Let this fall
+ through and update svcr and svg in the register cache. */
+ }
+ else
+ {
+ /* If there is no ZA state but the register cache contains ZA data, we
+ need to initialize the ZA data through ptrace. First we initialize
+ all the bytes of ZA to zero. */
+ if (!has_za_state
+ && !reg_buf->raw_compare (za_regnum, za_zeroed, 0))
+ aarch64_initialize_za_regset (tid);
+
+ /* From this point onwards, it is assumed we have a ZA payload in
+ the NT_ARM_ZA register set for this thread, and we need to update
+ such state based on the contents of the register cache. */
+
+ /* Fetch the current ZA state from the thread. */
+ gdb::byte_vector za_state = aarch64_fetch_za_regset (tid);
+
+ char *base = (char *) za_state.data ();
+ struct user_za_header *za_header = (struct user_za_header *) base;
+ uint64_t svq = sve_vq_from_vl (za_header->vl);
+
+ /* Sanity check the data in the header. */
+ if (!sve_vl_valid (za_header->vl)
+ || ZA_PT_SIZE (svq) != za_header->size)
+ error (_("Invalid vector length or payload size when reading ZA."));
+
+ /* Overwrite the ZA state contained in the thread with the ZA state from
+ the register cache. */
+ if (REG_VALID == reg_buf->get_register_status (za_regnum))
+ reg_buf->raw_collect (za_regnum, base + ZA_PT_ZA_OFFSET);
+
+ /* Write back the ZA state to the thread's NT_ARM_ZA register set. */
+ aarch64_store_za_regset (tid, za_state);
+ }
+
+ /* Update svcr and svg accordingly. */
+ uint64_t svcr_value = 0;
+ svcr_value |= aarch64_has_ssve_state (tid)? SVCR_SM_BIT : 0;
+ svcr_value |= aarch64_has_za_state (tid)? SVCR_ZA_BIT : 0;
+ reg_buf->raw_supply (svcr_regnum, &svcr_value);
+
+ /* At this point we have written the data contained in the register cache to
+ the thread's NT_ARM_ZA register set. */
+}
@@ -1,5 +1,5 @@
-/* Common target dependent definitions for AArch64 Scalable Extensions
- (SVE/SME).
+/* Common native Linux definitions for the AArch64 scalable
+ extensions: SVE and SME.
Copyright (C) 2018-2023 Free Software Foundation, Inc.
@@ -31,19 +31,58 @@
result when <asm/ptrace.h> is included before <sys/ptrace.h>. */
#include <sys/ptrace.h>
#include <asm/ptrace.h>
-
-#ifndef SVE_SIG_ZREGS_SIZE
+#include <stdarg.h>
+#include "aarch64-scalable-linux-ptrace.h"
#include "aarch64-scalable-linux-sigcontext.h"
-#endif
/* Indicates whether a SVE ptrace header is followed by SVE registers or a
fpsimd structure. */
-
#define HAS_SVE_STATE(header) ((header).flags & SVE_PT_REGS_SVE)
+/* Return true if there is an active SVE state in TID.
+ Return false otherwise. */
+bool aarch64_has_sve_state (int tid);
+
+/* Return true if there is an active SSVE state in TID.
+ Return false otherwise. */
+bool aarch64_has_ssve_state (int tid);
+
+/* Return true if there is an active ZA state in TID.
+ Return false otherwise. */
+bool aarch64_has_za_state (int tid);
+
+/* Given TID, read the SVE header into HEADER.
+
+ Return true if successful, false otherwise. */
+bool read_sve_header (int tid, struct user_sve_header &header);
+
+/* Given TID, store the SVE HEADER.
+
+ Return true if successful, false otherwise. */
+bool write_sve_header (int tid, const struct user_sve_header &header);
+
+/* Given TID, read the SSVE header into HEADER.
+
+ Return true if successful, false otherwise. */
+bool read_ssve_header (int tid, struct user_sve_header &header);
+
+/* Given TID, store the SSVE HEADER.
+
+ Return true if successful, false otherwise. */
+bool write_ssve_header (int tid, const struct user_sve_header &header);
+
+/* Given TID, read the ZA header into HEADER.
+
+ Return true if successful, false otherwise. */
+bool read_za_header (int tid, struct user_za_header &header);
+
+/* Given TID, store the ZA HEADER.
+
+ Return true if successful, false otherwise. */
+bool write_za_header (int tid, const struct user_za_header &header);
+
/* Read VQ for the given tid using ptrace. If SVE is not supported then zero
is returned (on a system that supports SVE, then VQ cannot be zero). */
-
uint64_t aarch64_sve_get_vq (int tid);
/* Set VQ in the kernel for the given tid, using either the value VQ or
@@ -52,27 +91,64 @@ uint64_t aarch64_sve_get_vq (int tid);
bool aarch64_sve_set_vq (int tid, uint64_t vq);
bool aarch64_sve_set_vq (int tid, struct reg_buffer_common *reg_buf);
-/* Read the current SVE register set from thread TID and return its data
- through a byte vector. */
+/* Read the streaming mode vq (svq) for the given TID. If the ZA state is not
+ supported or active, return 0. */
+uint64_t aarch64_za_get_svq (int tid);
+
+/* Set the vector quotient (vq) in the kernel for the given TID using the
+ value VQ.
+ Return true if successful, false otherwise. */
+bool aarch64_za_set_svq (int tid, uint64_t vq);
+bool aarch64_za_set_svq (int tid, const struct reg_buffer_common *reg_buf,
+ int svg_regnum);
+
+/* Given TID, return the SVE/SSVE data as a vector of bytes. */
extern gdb::byte_vector aarch64_fetch_sve_regset (int tid);
-/* Write the SVE contents from SVE_STATE to thread TID. */
+/* Write the SVE/SSVE contents from SVE_STATE to TID. */
+extern void aarch64_store_sve_regset (int tid,
+ const gdb::byte_vector &sve_state);
-extern void
-aarch64_store_sve_regset (int tid, const gdb::byte_vector &sve_state);
+/* Given TID, return the ZA data as a vector of bytes. */
+extern gdb::byte_vector aarch64_fetch_za_regset (int tid);
-/* Given a thread id TID and a register buffer REG_BUF, update the register
- buffer with the SVE state from thread TID. */
+/* Write ZA_STATE for TID. */
+extern void aarch64_store_za_regset (int tid, const gdb::byte_vector &za_state);
+/* Given TID, initialize the ZA register set so the header contains the right
+ size. The bytes of the ZA register are initialized to zero. */
+extern void aarch64_initialize_za_regset (int tid);
+
+/* Given a register buffer REG_BUF, update it with SVE/SSVE register data
+ from SVE_STATE. */
extern void
aarch64_sve_regs_copy_to_reg_buf (int tid, struct reg_buffer_common *reg_buf);
-/* Given a thread id TID and a register buffer REG_BUF containing SVE
+/* Given a thread id TID and a register buffer REG_BUF containing SVE/SSVE
register data, write the SVE data to thread TID. */
-
extern void
aarch64_sve_regs_copy_from_reg_buf (int tid,
struct reg_buffer_common *reg_buf);
+/* Given a thread id TID and a register buffer REG_BUF, update the register
+ buffer with the ZA state from thread TID.
+
+ ZA_REGNUM, SVG_REGNUM and SVCR_REGNUM are the register numbers for ZA,
+ SVG and SVCR registers. */
+extern void aarch64_za_regs_copy_to_reg_buf (int tid,
+ struct reg_buffer_common *reg_buf,
+ int za_regnum, int svg_regnum,
+ int svcr_regnum);
+
+/* Given a thread id TID and a register buffer REG_BUF containing ZA register
+ data, write the ZA data to thread TID.
+
+ ZA_REGNUM, SVG_REGNUM and SVCR_REGNUM are the register numbers for ZA,
+ SVG and SVCR registers. */
+extern void
+aarch64_za_regs_copy_from_reg_buf (int tid,
+ struct reg_buffer_common *reg_buf,
+ int za_regnum, int svg_regnum,
+ int svcr_regnum);
#endif /* NAT_AARCH64_SCALABLE_LINUX_PTRACE_H */
@@ -22,8 +22,11 @@
#ifndef NAT_AARCH64_SCALABLE_LINUX_SIGCONTEXT_H
#define NAT_AARCH64_SCALABLE_LINUX_SIGCONTEXT_H
+#ifndef SVE_SIG_ZREGS_SIZE
+
#define SVE_MAGIC 0x53564501
+
struct sve_context {
struct _aarch64_ctx head;
__u16 vl;
@@ -132,7 +135,7 @@ struct sve_context {
#define SVE_SIG_CONTEXT_SIZE(vq) (SVE_SIG_REGS_OFFSET + SVE_SIG_REGS_SIZE(vq))
-/* SVE/FP/SIMD state (NT_ARM_SVE) */
+/* SVE/FP/SIMD state (NT_ARM_SVE and NT_ARM_SSVE) */
struct user_sve_header {
__u32 size; /* total meaningful regset content in bytes */
@@ -242,6 +245,7 @@ struct user_sve_header {
(SVE_PT_SVE_PREG_OFFSET(vq, SVE_NUM_PREGS) - \
SVE_PT_SVE_PREGS_OFFSET(vq))
+/* For streaming mode SVE (SSVE) FFR must be read and written as zero. */
#define SVE_PT_SVE_FFR_OFFSET(vq) \
__SVE_SIG_TO_PT(SVE_SIG_FFR_OFFSET(vq))
@@ -267,4 +271,55 @@ struct user_sve_header {
SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, flags) \
: SVE_PT_FPSIMD_OFFSET + SVE_PT_FPSIMD_SIZE(vq, flags))
+#endif /* SVE_SIG_ZREGS_SIZE */
+
+/* Scalable Matrix Extensions (SME) definitions. */
+
+/* Make sure we only define these if the kernel header doesn't. */
+#ifndef ZA_PT_SIZE
+
+/* ZA state (NT_ARM_ZA) */
+struct user_za_header {
+ __u32 size; /* total meaningful regset content in bytes */
+ __u32 max_size; /* maximum possible size for this thread */
+ __u16 vl; /* current vector length */
+ __u16 max_vl; /* maximum possible vector length */
+ __u16 flags;
+ __u16 __reserved;
+};
+
+/* The remainder of the ZA state follows struct user_za_header. The
+ total size of the ZA state (including header) depends on the
+ metadata in the header: ZA_PT_SIZE(vq, flags) gives the total size
+ of the state in bytes, including the header.
+
+ Refer to arch/arm64/include/uapi/asm/sigcontext.h from the Linux kernel
+ for details of how to pass the correct "vq" argument to these macros. */
+
+/* Offset from the start of struct user_za_header to the register data */
+#define ZA_PT_ZA_OFFSET \
+ ((sizeof (struct user_za_header) + (__SVE_VQ_BYTES - 1)) \
+ / __SVE_VQ_BYTES * __SVE_VQ_BYTES)
+
+/* The payload starts at offset ZA_PT_ZA_OFFSET, and is of size
+ ZA_PT_ZA_SIZE(vq, flags).
+
+ The ZA array is stored as a sequence of horizontal vectors ZAV of SVL/8
+ bytes each, starting from vector 0.
+
+ Additional data might be appended in the future.
+
+ The ZA matrix is represented in memory in an endianness-invariant layout
+ which differs from the layout used for the FPSIMD V-registers on big-endian
+ systems: see sigcontext.h for more explanation. */
+
+#define ZA_PT_ZAV_OFFSET(vq, n) \
+ (ZA_PT_ZA_OFFSET + ((vq * __SVE_VQ_BYTES) * n))
+
+#define ZA_PT_ZA_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
+
+#define ZA_PT_SIZE(vq) \
+ (ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq))
+#endif /* ZA_PT_SIZE */
+
#endif /* NAT_AARCH64_SCALABLE_LINUX_SIGCONTEXT_H */
@@ -49,6 +49,7 @@ case "${gdbserver_host}" in
srv_tgtobj="$srv_tgtobj arch/aarch64-insn.o"
srv_tgtobj="$srv_tgtobj arch/aarch64.o"
srv_tgtobj="$srv_tgtobj arch/aarch64-mte-linux.o"
+ srv_tgtobj="$srv_tgtobj arch/aarch64-scalable-linux.o"
srv_tgtobj="$srv_tgtobj linux-aarch64-tdesc.o"
srv_tgtobj="$srv_tgtobj nat/aarch64-mte-linux-ptrace.o"
srv_tgtobj="$srv_tgtobj nat/aarch64-scalable-linux-ptrace.o"