[RFC,02/13] i387-tdep: Add function to read XSAVE layout from NT_X86_CPUID
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gdb_build--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_gdb_check--master-arm |
success
|
Testing passed
|
Commit Message
This can be used by x86 arches to determine the XSAVE layout instead
of guessing based on the XCR0 mask and XSAVE register note size.
---
gdb/i387-tdep.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++
gdb/i387-tdep.h | 8 +++
2 files changed, 140 insertions(+)
Comments
On 2023-10-09 14:36, John Baldwin wrote:
> This can be used by x86 arches to determine the XSAVE layout instead
> of guessing based on the XCR0 mask and XSAVE register note size.
Just some nits below:
> +typedef std::unordered_map<cpuid_key, cpuid_values> cpuid_map;
For new stuff I would suggest:
using cpuid_map = std::unordered_map<cpuid_key, cpuid_values>;
> +
> +static cpuid_map
> +i387_parse_cpuid_from_core (bfd *bfd)
> +{
> + asection *section = bfd_get_section_by_name (bfd, ".reg-x86-cpuid");
> + if (section == nullptr)
> + return {};
> +
> + size_t size = bfd_section_size (section);
> + if (size == 0 || (size % (6 * 4)) != 0)
That 4 could be `sizeof (uint32_t)`. And `6 * 4` appears below again,
it could a constexpr value with a name like entry_size.
> + return {};
> +
> + char contents[size];
> + if (!bfd_get_section_contents (bfd, section, contents, 0, size))
> + {
> + warning (_("Couldn't read `.reg-x86-cpuid' section in core file."));
> + return {};
> + }
> +
> + cpuid_map map;
> + size_t index = 0;
> + while (index < size)
> + {
> + uint32_t leaf = bfd_get_32 (bfd, contents + index);
> + uint32_t count = bfd_get_32 (bfd, contents + index + 4);
> + uint32_t eax = bfd_get_32 (bfd, contents + index + 8);
> + uint32_t ebx = bfd_get_32 (bfd, contents + index + 12);
> + uint32_t ecx = bfd_get_32 (bfd, contents + index + 16);
> + uint32_t edx = bfd_get_32 (bfd, contents + index + 20);
> +
> + if (map.count (cpuid_key (leaf, count)) != 0)
> + {
> + warning (_("Duplicate cpuid leaf %#x,%#x"), leaf, count);
> + return {};
> + }
> + map.emplace (cpuid_key (leaf, count),
> + cpuid_values (eax, ebx, ecx, edx));
This could be slightly more optimal using map::try_emplace (to avoid
having two map lookups).
Simon
Hi,
I am not familiar with XSAVE details, but I have pure c++ style comments
below.
On Mon, Oct 09, 2023 at 11:36:04AM -0700, John Baldwin wrote:
> This can be used by x86 arches to determine the XSAVE layout instead
> of guessing based on the XCR0 mask and XSAVE register note size.
> ---
> gdb/i387-tdep.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++
> gdb/i387-tdep.h | 8 +++
> 2 files changed, 140 insertions(+)
>
> diff --git a/gdb/i387-tdep.c b/gdb/i387-tdep.c
> index 47667da21c7..1eac2b6bd2a 100644
> --- a/gdb/i387-tdep.c
> +++ b/gdb/i387-tdep.c
> @@ -26,6 +26,8 @@
> #include "target-float.h"
> #include "value.h"
>
> +#include <stdexcept>
> +
> #include "i386-tdep.h"
> #include "i387-tdep.h"
> #include "gdbsupport/x86-xstate.h"
> @@ -987,6 +989,136 @@ i387_guess_xsave_layout (uint64_t xcr0, size_t xsave_size,
> return true;
> }
>
> +/* Parse a reg-x86-cpuid pseudo section building a hash table mapping
> + cpuid leaves to their results. */
> +
> +struct cpuid_key
> +{
> + cpuid_key (uint32_t _leaf, uint32_t _subleaf)
> + : leaf(_leaf), subleaf(_subleaf)
> + {}
> +
> + uint32_t leaf;
> + uint32_t subleaf;
> +
> + constexpr bool operator== (const cpuid_key &other) const
> + { return (leaf == other.leaf && subleaf == other.subleaf); }
> +};
> +
> +namespace std
> +{
> +template<>
> +struct hash<cpuid_key>
> +{
> + size_t operator() (const cpuid_key &key) const
> + {
> + return key.leaf ^ (key.subleaf << 1);
> + }
> +};
> +}
I think there was a discussion not long ago regarding opening std, and
it seems that the prefered approach is to use:
template<>
struct std::hash<cpuid_key>
{
...
};
See
https://sourceware.org/pipermail/gdb-patches/2023-September/202336.html
for the discussion.
> +
> +struct cpuid_values
> +{
> + cpuid_values (uint32_t _eax, uint32_t _ebx, uint32_t _ecx, uint32_t _edx)
> + : eax(_eax), ebx(_ebx), ecx(_ecx), edx(_edx)
> + {}
> +
> + uint32_t eax;
> + uint32_t ebx;
> + uint32_t ecx;
> + uint32_t edx;
> +};
> +
> +typedef std::unordered_map<cpuid_key, cpuid_values> cpuid_map;
> +
> +static cpuid_map
> +i387_parse_cpuid_from_core (bfd *bfd)
> +{
> + asection *section = bfd_get_section_by_name (bfd, ".reg-x86-cpuid");
> + if (section == nullptr)
> + return {};
> +
> + size_t size = bfd_section_size (section);
> + if (size == 0 || (size % (6 * 4)) != 0)
> + return {};
> +
> + char contents[size];
If I remember correctly, VLAs are not a C++ feature (but are supported
as a GCC extension
https://gcc.gnu.org/onlinedocs/gcc/Variable-Length.html). I am unsure
if GDB has a policy regarding the use of extensions, so maybe this is
fine. Otherwise, you could use a std::vector instead (it comes with a
dynamic allocation, but I am not too concerned at this is hardly on a
performance critical path)
std::vector<char> contents (size);
> + if (!bfd_get_section_contents (bfd, section, contents, 0, size))
> + {
> + warning (_("Couldn't read `.reg-x86-cpuid' section in core file."));
> + return {};
> + }
> +
> + cpuid_map map;
> + size_t index = 0;
> + while (index < size)
> + {
> + uint32_t leaf = bfd_get_32 (bfd, contents + index);
> + uint32_t count = bfd_get_32 (bfd, contents + index + 4);
> + uint32_t eax = bfd_get_32 (bfd, contents + index + 8);
> + uint32_t ebx = bfd_get_32 (bfd, contents + index + 12);
> + uint32_t ecx = bfd_get_32 (bfd, contents + index + 16);
> + uint32_t edx = bfd_get_32 (bfd, contents + index + 20);
> +
> + if (map.count (cpuid_key (leaf, count)) != 0)
> + {
> + warning (_("Duplicate cpuid leaf %#x,%#x"), leaf, count);
> + return {};
> + }
> + map.emplace (cpuid_key (leaf, count),
> + cpuid_values (eax, ebx, ecx, edx));
As Simon pointed out, there are two lookups here, where you can get away
with just one. However, this is C++17 only which is not [yet] available
in GDB. Instead, you can use the value returned by emplace to know if
an insertation has been done or not:
auto emplace_result = map.emplace (cpuid_key (leaf, count),
cpuid_values (eax, ebx, ecx, edx));
if (!emplace_result.second)
{
warning (_("Duplicate cpuid leaf %#x,%#x"), leaf, count);
return {};
}
> +
> + index += 6 * 4;
> + }
> +
> + return map;
> +}
> +
> +/* Fetch the offset of a specific XSAVE extended region. */
> +
> +static int
I think it is worth returning uint32_t here as int is (in theory) target
dependent.
> +xsave_feature_offset (cpuid_map &map, uint64_t xcr0, int feature)
I think that the MAP parameter could be `const` here.
> +{
> + if ((xcr0 & (1ULL << feature)) == 0)
> + return 0;
> +
> + return map.at (cpuid_key (0xd, feature)).ebx;
> +}
> +
> +/* See i387-tdep.h. */
> +
> +bool
> +i387_read_xsave_layout_from_core (bfd *bfd, uint64_t xcr0, size_t xsave_size,
> + x86_xsave_layout &layout)
> +{
> + cpuid_map map = i387_parse_cpuid_from_core (bfd);
> + if (map.empty ())
> + return false;
> +
> + try
> + {
> + layout.sizeof_xsave = xsave_size;
> + layout.avx_offset = xsave_feature_offset (map, xcr0,
> + X86_XSTATE_AVX_ID);
> + layout.bndregs_offset = xsave_feature_offset (map, xcr0,
> + X86_XSTATE_BNDREGS_ID);
> + layout.bndcfg_offset = xsave_feature_offset (map, xcr0,
> + X86_XSTATE_BNDCFG_ID);
> + layout.k_offset = xsave_feature_offset (map, xcr0,
> + X86_XSTATE_K_ID);
> + layout.zmm_h_offset = xsave_feature_offset (map, xcr0,
> + X86_XSTATE_ZMM_H_ID);
> + layout.zmm_offset = xsave_feature_offset (map, xcr0, X86_XSTATE_ZMM_ID);
> + layout.pkru_offset = xsave_feature_offset (map, xcr0, X86_XSTATE_PKRU_ID);
> + }
> + catch (const std::out_of_range &)
> + {
> + return false;
> + }
> +
> + return true;
> +}
> +
> /* Extract from XSAVE a bitset of the features that are available on the
> target, but which have not yet been enabled. */
>
> diff --git a/gdb/i387-tdep.h b/gdb/i387-tdep.h
> index e149e30e52e..b16b9a60b67 100644
> --- a/gdb/i387-tdep.h
> +++ b/gdb/i387-tdep.h
> @@ -147,6 +147,14 @@ extern void i387_supply_fxsave (struct regcache *regcache, int regnum,
> extern bool i387_guess_xsave_layout (uint64_t xcr0, size_t xsave_size,
> x86_xsave_layout &layout);
>
> +/* Determine the XSAVE layout from the `reg-x86-cpuid` section in a
> + core dump. Returns true on sucess, or false if a layout can not be
s/sucess/success/
> + read. */
> +
> +extern bool i387_read_xsave_layout_from_core (bfd *bfd, uint64_t xcr0,
> + size_t xsave_size,
> + x86_xsave_layout &layout);
> +
> /* Similar to i387_supply_fxsave, but use XSAVE extended state. */
>
> extern void i387_supply_xsave (struct regcache *regcache, int regnum,
> --
> 2.41.0
>
Best,
Lancelot.
On 10/11/23 9:27 PM, Simon Marchi wrote:
>
>
> On 2023-10-09 14:36, John Baldwin wrote:
>> This can be used by x86 arches to determine the XSAVE layout instead
>> of guessing based on the XCR0 mask and XSAVE register note size.
>
> Just some nits below:
>
>> +typedef std::unordered_map<cpuid_key, cpuid_values> cpuid_map;
>
> For new stuff I would suggest:
>
> using cpuid_map = std::unordered_map<cpuid_key, cpuid_values>;
Ok.
>> +
>> +static cpuid_map
>> +i387_parse_cpuid_from_core (bfd *bfd)
>> +{
>> + asection *section = bfd_get_section_by_name (bfd, ".reg-x86-cpuid");
>> + if (section == nullptr)
>> + return {};
>> +
>> + size_t size = bfd_section_size (section);
>> + if (size == 0 || (size % (6 * 4)) != 0)
>
> That 4 could be `sizeof (uint32_t)`. And `6 * 4` appears below again,
> it could a constexpr value with a name like entry_size.
Ok.
>> + return {};
>> +
>> + char contents[size];
>> + if (!bfd_get_section_contents (bfd, section, contents, 0, size))
>> + {
>> + warning (_("Couldn't read `.reg-x86-cpuid' section in core file."));
>> + return {};
>> + }
>> +
>> + cpuid_map map;
>> + size_t index = 0;
>> + while (index < size)
>> + {
>> + uint32_t leaf = bfd_get_32 (bfd, contents + index);
>> + uint32_t count = bfd_get_32 (bfd, contents + index + 4);
>> + uint32_t eax = bfd_get_32 (bfd, contents + index + 8);
>> + uint32_t ebx = bfd_get_32 (bfd, contents + index + 12);
>> + uint32_t ecx = bfd_get_32 (bfd, contents + index + 16);
>> + uint32_t edx = bfd_get_32 (bfd, contents + index + 20);
>> +
>> + if (map.count (cpuid_key (leaf, count)) != 0)
>> + {
>> + warning (_("Duplicate cpuid leaf %#x,%#x"), leaf, count);
>> + return {};
>> + }
>> + map.emplace (cpuid_key (leaf, count),
>> + cpuid_values (eax, ebx, ecx, edx));
>
> This could be slightly more optimal using map::try_emplace (to avoid
> having two map lookups).
Ok.
Presumably C++17 will be required before this series lands, and I
don't plan to backport it to GDB 14. (If I did it could also turn
back into a loop + emplace though as part of the backport.)
On 10/16/23 2:17 AM, Lancelot SIX wrote:
> Hi,
>
> I am not familiar with XSAVE details, but I have pure c++ style comments
> below.
Thanks, I've generally accepted the changes aside from a few modifications below.
> On Mon, Oct 09, 2023 at 11:36:04AM -0700, John Baldwin wrote:
>> This can be used by x86 arches to determine the XSAVE layout instead
>> of guessing based on the XCR0 mask and XSAVE register note size.
>> ---
>> gdb/i387-tdep.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++
>> gdb/i387-tdep.h | 8 +++
>> 2 files changed, 140 insertions(+)
>>
>> diff --git a/gdb/i387-tdep.c b/gdb/i387-tdep.c
>> index 47667da21c7..1eac2b6bd2a 100644
>> --- a/gdb/i387-tdep.c
>> +++ b/gdb/i387-tdep.c
>> + size_t size = bfd_section_size (section);
>> + if (size == 0 || (size % (6 * 4)) != 0)
>> + return {};
>> +
>> + char contents[size];
>
> If I remember correctly, VLAs are not a C++ feature (but are supported
> as a GCC extension
> https://gcc.gnu.org/onlinedocs/gcc/Variable-Length.html). I am unsure
> if GDB has a policy regarding the use of extensions, so maybe this is
> fine. Otherwise, you could use a std::vector instead (it comes with a
> dynamic allocation, but I am not too concerned at this is hardly on a
> performance critical path)
>
> std::vector<char> contents (size);
I've used a gdb::byte_vector instead of a plain std::vector<>.
>> + if (map.count (cpuid_key (leaf, count)) != 0)
>> + {
>> + warning (_("Duplicate cpuid leaf %#x,%#x"), leaf, count);
>> + return {};
>> + }
>> + map.emplace (cpuid_key (leaf, count),
>> + cpuid_values (eax, ebx, ecx, edx));
>
> As Simon pointed out, there are two lookups here, where you can get away
> with just one. However, this is C++17 only which is not [yet] available
> in GDB. Instead, you can use the value returned by emplace to know if
> an insertation has been done or not:
>
> auto emplace_result = map.emplace (cpuid_key (leaf, count),
> cpuid_values (eax, ebx, ecx, edx));
> if (!emplace_result.second)
> {
> warning (_("Duplicate cpuid leaf %#x,%#x"), leaf, count);
> return {};
> }
I'm going to assume that C++17 will land first in GDB before this and just
go with try_emplace for now. If I end up backporting this to GDB 14 (which
I don't currently anticipate), then this wil be nicer for that.
@@ -26,6 +26,8 @@
#include "target-float.h"
#include "value.h"
+#include <stdexcept>
+
#include "i386-tdep.h"
#include "i387-tdep.h"
#include "gdbsupport/x86-xstate.h"
@@ -987,6 +989,136 @@ i387_guess_xsave_layout (uint64_t xcr0, size_t xsave_size,
return true;
}
+/* Parse a reg-x86-cpuid pseudo section building a hash table mapping
+ cpuid leaves to their results. */
+
+struct cpuid_key
+{
+ cpuid_key (uint32_t _leaf, uint32_t _subleaf)
+ : leaf(_leaf), subleaf(_subleaf)
+ {}
+
+ uint32_t leaf;
+ uint32_t subleaf;
+
+ constexpr bool operator== (const cpuid_key &other) const
+ { return (leaf == other.leaf && subleaf == other.subleaf); }
+};
+
+namespace std
+{
+template<>
+struct hash<cpuid_key>
+{
+ size_t operator() (const cpuid_key &key) const
+ {
+ return key.leaf ^ (key.subleaf << 1);
+ }
+};
+}
+
+struct cpuid_values
+{
+ cpuid_values (uint32_t _eax, uint32_t _ebx, uint32_t _ecx, uint32_t _edx)
+ : eax(_eax), ebx(_ebx), ecx(_ecx), edx(_edx)
+ {}
+
+ uint32_t eax;
+ uint32_t ebx;
+ uint32_t ecx;
+ uint32_t edx;
+};
+
+typedef std::unordered_map<cpuid_key, cpuid_values> cpuid_map;
+
+static cpuid_map
+i387_parse_cpuid_from_core (bfd *bfd)
+{
+ asection *section = bfd_get_section_by_name (bfd, ".reg-x86-cpuid");
+ if (section == nullptr)
+ return {};
+
+ size_t size = bfd_section_size (section);
+ if (size == 0 || (size % (6 * 4)) != 0)
+ return {};
+
+ char contents[size];
+ if (!bfd_get_section_contents (bfd, section, contents, 0, size))
+ {
+ warning (_("Couldn't read `.reg-x86-cpuid' section in core file."));
+ return {};
+ }
+
+ cpuid_map map;
+ size_t index = 0;
+ while (index < size)
+ {
+ uint32_t leaf = bfd_get_32 (bfd, contents + index);
+ uint32_t count = bfd_get_32 (bfd, contents + index + 4);
+ uint32_t eax = bfd_get_32 (bfd, contents + index + 8);
+ uint32_t ebx = bfd_get_32 (bfd, contents + index + 12);
+ uint32_t ecx = bfd_get_32 (bfd, contents + index + 16);
+ uint32_t edx = bfd_get_32 (bfd, contents + index + 20);
+
+ if (map.count (cpuid_key (leaf, count)) != 0)
+ {
+ warning (_("Duplicate cpuid leaf %#x,%#x"), leaf, count);
+ return {};
+ }
+ map.emplace (cpuid_key (leaf, count),
+ cpuid_values (eax, ebx, ecx, edx));
+
+ index += 6 * 4;
+ }
+
+ return map;
+}
+
+/* Fetch the offset of a specific XSAVE extended region. */
+
+static int
+xsave_feature_offset (cpuid_map &map, uint64_t xcr0, int feature)
+{
+ if ((xcr0 & (1ULL << feature)) == 0)
+ return 0;
+
+ return map.at (cpuid_key (0xd, feature)).ebx;
+}
+
+/* See i387-tdep.h. */
+
+bool
+i387_read_xsave_layout_from_core (bfd *bfd, uint64_t xcr0, size_t xsave_size,
+ x86_xsave_layout &layout)
+{
+ cpuid_map map = i387_parse_cpuid_from_core (bfd);
+ if (map.empty ())
+ return false;
+
+ try
+ {
+ layout.sizeof_xsave = xsave_size;
+ layout.avx_offset = xsave_feature_offset (map, xcr0,
+ X86_XSTATE_AVX_ID);
+ layout.bndregs_offset = xsave_feature_offset (map, xcr0,
+ X86_XSTATE_BNDREGS_ID);
+ layout.bndcfg_offset = xsave_feature_offset (map, xcr0,
+ X86_XSTATE_BNDCFG_ID);
+ layout.k_offset = xsave_feature_offset (map, xcr0,
+ X86_XSTATE_K_ID);
+ layout.zmm_h_offset = xsave_feature_offset (map, xcr0,
+ X86_XSTATE_ZMM_H_ID);
+ layout.zmm_offset = xsave_feature_offset (map, xcr0, X86_XSTATE_ZMM_ID);
+ layout.pkru_offset = xsave_feature_offset (map, xcr0, X86_XSTATE_PKRU_ID);
+ }
+ catch (const std::out_of_range &)
+ {
+ return false;
+ }
+
+ return true;
+}
+
/* Extract from XSAVE a bitset of the features that are available on the
target, but which have not yet been enabled. */
@@ -147,6 +147,14 @@ extern void i387_supply_fxsave (struct regcache *regcache, int regnum,
extern bool i387_guess_xsave_layout (uint64_t xcr0, size_t xsave_size,
x86_xsave_layout &layout);
+/* Determine the XSAVE layout from the `reg-x86-cpuid` section in a
+ core dump. Returns true on sucess, or false if a layout can not be
+ read. */
+
+extern bool i387_read_xsave_layout_from_core (bfd *bfd, uint64_t xcr0,
+ size_t xsave_size,
+ x86_xsave_layout &layout);
+
/* Similar to i387_supply_fxsave, but use XSAVE extended state. */
extern void i387_supply_xsave (struct regcache *regcache, int regnum,