[v6,01/15] x86: Add an x86_xsave_layout structure to handle variable XSAVE layouts.

Message ID 20230714155151.21723-2-jhb@FreeBSD.org
State New
Headers
Series Handle variable XSAVE layouts |

Commit Message

John Baldwin July 14, 2023, 3:51 p.m. UTC
  The standard layout of the XSAVE extended state area consists of three
regions.  The first 512 bytes (legacy region) match the layout of the
FXSAVE instruction including floating point registers, MMX registers,
and SSE registers.  The next 64 bytes (XSAVE header) contains a header
with a fixed layout.  The final region (extended region) contains zero
or more optional state components.  Examples of these include the
upper 128 bits of YMM registers for AVX.

These optional state components generally have an
architecturally-fixed size, but they are not assigned architectural
offsets in the extended region.  Instead, processors provide
additional CPUID leafs describing the size and offset of each
component in the "standard" layout for a given CPU.  (There is also a
"compact" format which uses an alternate layout, but existing OS's
currently export the "standard" layout when exporting XSAVE data via
ptrace() and core dumps.)

To date, GDB has assumed the layout used on current Intel processors
for state components in the extended region and hardcoded those
offsets in the tables in i387-tdep.c and i387-fp.cc.  However, this
fails on recent AMD processors which use a different layout.
Specifically, AMD Zen3 and later processors do not leave space for the
MPX register set in between the AVX and AVX512 register sets.

To rectify this, add an x86_xsave_layout structure which contains the
total size of the XSAVE extended state area as well as the offset of
each known optional state component.

Subsequent commits will modify XSAVE parsing in both gdb and gdbserver
to use x86_xsave_layout.

Co-authored-by: Aleksandar Paunovic <aleksandar.paunovic@intel.com>
---
 gdbsupport/x86-xstate.h | 65 +++++++++++++++++++++++++++++++++++------
 1 file changed, 56 insertions(+), 9 deletions(-)
  

Comments

Simon Marchi July 26, 2023, 7:22 p.m. UTC | #1
On 7/14/23 11:51, John Baldwin wrote:
> The standard layout of the XSAVE extended state area consists of three
> regions.  The first 512 bytes (legacy region) match the layout of the
> FXSAVE instruction including floating point registers, MMX registers,
> and SSE registers.  The next 64 bytes (XSAVE header) contains a header
> with a fixed layout.  The final region (extended region) contains zero
> or more optional state components.  Examples of these include the
> upper 128 bits of YMM registers for AVX.
> 
> These optional state components generally have an
> architecturally-fixed size, but they are not assigned architectural
> offsets in the extended region.  Instead, processors provide
> additional CPUID leafs describing the size and offset of each
> component in the "standard" layout for a given CPU.  (There is also a
> "compact" format which uses an alternate layout, but existing OS's
> currently export the "standard" layout when exporting XSAVE data via
> ptrace() and core dumps.)
> 
> To date, GDB has assumed the layout used on current Intel processors
> for state components in the extended region and hardcoded those
> offsets in the tables in i387-tdep.c and i387-fp.cc.  However, this
> fails on recent AMD processors which use a different layout.
> Specifically, AMD Zen3 and later processors do not leave space for the
> MPX register set in between the AVX and AVX512 register sets.
> 
> To rectify this, add an x86_xsave_layout structure which contains the
> total size of the XSAVE extended state area as well as the offset of
> each known optional state component.
> 
> Subsequent commits will modify XSAVE parsing in both gdb and gdbserver
> to use x86_xsave_layout.
> 
> Co-authored-by: Aleksandar Paunovic <aleksandar.paunovic@intel.com>
> ---
>  gdbsupport/x86-xstate.h | 65 +++++++++++++++++++++++++++++++++++------
>  1 file changed, 56 insertions(+), 9 deletions(-)
> 
> diff --git a/gdbsupport/x86-xstate.h b/gdbsupport/x86-xstate.h
> index b8740fd8701..27fc0bd12f2 100644
> --- a/gdbsupport/x86-xstate.h
> +++ b/gdbsupport/x86-xstate.h
> @@ -20,22 +20,69 @@
>  #ifndef COMMON_X86_XSTATE_H
>  #define COMMON_X86_XSTATE_H
>  
> +/* The extended state feature IDs in the state component bitmap.  */
> +#define X86_XSTATE_X87_ID	0
> +#define X86_XSTATE_SSE_ID	1
> +#define X86_XSTATE_AVX_ID	2
> +#define X86_XSTATE_BNDREGS_ID	3
> +#define X86_XSTATE_BNDCFG_ID	4
> +#define X86_XSTATE_K_ID		5
> +#define X86_XSTATE_ZMM_H_ID	6
> +#define X86_XSTATE_ZMM_ID	7
> +#define X86_XSTATE_PKRU_ID	9
> +
>  /* The extended state feature bits.  */
> -#define X86_XSTATE_X87		(1ULL << 0)
> -#define X86_XSTATE_SSE		(1ULL << 1)
> -#define X86_XSTATE_AVX		(1ULL << 2)
> -#define X86_XSTATE_BNDREGS	(1ULL << 3)
> -#define X86_XSTATE_BNDCFG	(1ULL << 4)
> +#define X86_XSTATE_X87		(1ULL << X86_XSTATE_X87_ID)
> +#define X86_XSTATE_SSE		(1ULL << X86_XSTATE_SSE_ID)
> +#define X86_XSTATE_AVX		(1ULL << X86_XSTATE_AVX_ID)
> +#define X86_XSTATE_BNDREGS	(1ULL << X86_XSTATE_BNDREGS_ID)
> +#define X86_XSTATE_BNDCFG	(1ULL << X86_XSTATE_BNDCFG_ID)
>  #define X86_XSTATE_MPX		(X86_XSTATE_BNDREGS | X86_XSTATE_BNDCFG)
>  
>  /* AVX 512 adds three feature bits.  All three must be enabled.  */
> -#define X86_XSTATE_K		(1ULL << 5)
> -#define X86_XSTATE_ZMM_H	(1ULL << 6)
> -#define X86_XSTATE_ZMM		(1ULL << 7)
> +#define X86_XSTATE_K		(1ULL << X86_XSTATE_K_ID)
> +#define X86_XSTATE_ZMM_H	(1ULL << X86_XSTATE_ZMM_H_ID)
> +#define X86_XSTATE_ZMM		(1ULL << X86_XSTATE_ZMM_ID)
>  #define X86_XSTATE_AVX512	(X86_XSTATE_K | X86_XSTATE_ZMM_H \
>  				 | X86_XSTATE_ZMM)
>  
> -#define X86_XSTATE_PKRU		(1ULL << 9)
> +#define X86_XSTATE_PKRU		(1ULL << X86_XSTATE_PKRU_ID)
> +
> +/* Size and offsets of register states in the XSAVE area extended
> +   region.  Offsets are set to 0 to indicate the absence of the
> +   associated registers.  */

Extreme comment nitpick.  In "Size and offsets", one is singular and the
other is plural.  Should it be "Sizes and offsets", or "Size and
offset"?

In any case:

Approved-By: Simon Marchi <simon.marchi@efficios.com>

Simon
  
John Baldwin July 26, 2023, 9:27 p.m. UTC | #2
On 7/26/23 12:22 PM, Simon Marchi wrote:
> On 7/14/23 11:51, John Baldwin wrote:
>> The standard layout of the XSAVE extended state area consists of three
>> regions.  The first 512 bytes (legacy region) match the layout of the
>> FXSAVE instruction including floating point registers, MMX registers,
>> and SSE registers.  The next 64 bytes (XSAVE header) contains a header
>> with a fixed layout.  The final region (extended region) contains zero
>> or more optional state components.  Examples of these include the
>> upper 128 bits of YMM registers for AVX.
>>
>> These optional state components generally have an
>> architecturally-fixed size, but they are not assigned architectural
>> offsets in the extended region.  Instead, processors provide
>> additional CPUID leafs describing the size and offset of each
>> component in the "standard" layout for a given CPU.  (There is also a
>> "compact" format which uses an alternate layout, but existing OS's
>> currently export the "standard" layout when exporting XSAVE data via
>> ptrace() and core dumps.)
>>
>> To date, GDB has assumed the layout used on current Intel processors
>> for state components in the extended region and hardcoded those
>> offsets in the tables in i387-tdep.c and i387-fp.cc.  However, this
>> fails on recent AMD processors which use a different layout.
>> Specifically, AMD Zen3 and later processors do not leave space for the
>> MPX register set in between the AVX and AVX512 register sets.
>>
>> To rectify this, add an x86_xsave_layout structure which contains the
>> total size of the XSAVE extended state area as well as the offset of
>> each known optional state component.
>>
>> Subsequent commits will modify XSAVE parsing in both gdb and gdbserver
>> to use x86_xsave_layout.
>>
>> Co-authored-by: Aleksandar Paunovic <aleksandar.paunovic@intel.com>
>> ---
>>   gdbsupport/x86-xstate.h | 65 +++++++++++++++++++++++++++++++++++------
>>   1 file changed, 56 insertions(+), 9 deletions(-)
>>
>> diff --git a/gdbsupport/x86-xstate.h b/gdbsupport/x86-xstate.h
>> index b8740fd8701..27fc0bd12f2 100644
>> --- a/gdbsupport/x86-xstate.h
>> +++ b/gdbsupport/x86-xstate.h
>> @@ -20,22 +20,69 @@
>>   #ifndef COMMON_X86_XSTATE_H
>>   #define COMMON_X86_XSTATE_H
>>   
>> +/* The extended state feature IDs in the state component bitmap.  */
>> +#define X86_XSTATE_X87_ID	0
>> +#define X86_XSTATE_SSE_ID	1
>> +#define X86_XSTATE_AVX_ID	2
>> +#define X86_XSTATE_BNDREGS_ID	3
>> +#define X86_XSTATE_BNDCFG_ID	4
>> +#define X86_XSTATE_K_ID		5
>> +#define X86_XSTATE_ZMM_H_ID	6
>> +#define X86_XSTATE_ZMM_ID	7
>> +#define X86_XSTATE_PKRU_ID	9
>> +
>>   /* The extended state feature bits.  */
>> -#define X86_XSTATE_X87		(1ULL << 0)
>> -#define X86_XSTATE_SSE		(1ULL << 1)
>> -#define X86_XSTATE_AVX		(1ULL << 2)
>> -#define X86_XSTATE_BNDREGS	(1ULL << 3)
>> -#define X86_XSTATE_BNDCFG	(1ULL << 4)
>> +#define X86_XSTATE_X87		(1ULL << X86_XSTATE_X87_ID)
>> +#define X86_XSTATE_SSE		(1ULL << X86_XSTATE_SSE_ID)
>> +#define X86_XSTATE_AVX		(1ULL << X86_XSTATE_AVX_ID)
>> +#define X86_XSTATE_BNDREGS	(1ULL << X86_XSTATE_BNDREGS_ID)
>> +#define X86_XSTATE_BNDCFG	(1ULL << X86_XSTATE_BNDCFG_ID)
>>   #define X86_XSTATE_MPX		(X86_XSTATE_BNDREGS | X86_XSTATE_BNDCFG)
>>   
>>   /* AVX 512 adds three feature bits.  All three must be enabled.  */
>> -#define X86_XSTATE_K		(1ULL << 5)
>> -#define X86_XSTATE_ZMM_H	(1ULL << 6)
>> -#define X86_XSTATE_ZMM		(1ULL << 7)
>> +#define X86_XSTATE_K		(1ULL << X86_XSTATE_K_ID)
>> +#define X86_XSTATE_ZMM_H	(1ULL << X86_XSTATE_ZMM_H_ID)
>> +#define X86_XSTATE_ZMM		(1ULL << X86_XSTATE_ZMM_ID)
>>   #define X86_XSTATE_AVX512	(X86_XSTATE_K | X86_XSTATE_ZMM_H \
>>   				 | X86_XSTATE_ZMM)
>>   
>> -#define X86_XSTATE_PKRU		(1ULL << 9)
>> +#define X86_XSTATE_PKRU		(1ULL << X86_XSTATE_PKRU_ID)
>> +
>> +/* Size and offsets of register states in the XSAVE area extended
>> +   region.  Offsets are set to 0 to indicate the absence of the
>> +   associated registers.  */
> 
> Extreme comment nitpick.  In "Size and offsets", one is singular and the
> other is plural.  Should it be "Sizes and offsets", or "Size and
> offset"?

Ah, there's a single size and multiple offsets.  Is this version clearer:

/* Total size of the XSAVE area extended region and offsets of
    register states within the region.  Offsets are set to 0 to
    indicate the absence of the associated registers.  */
  
Simon Marchi July 26, 2023, 10:51 p.m. UTC | #3
On July 26, 2023 5:27:41 p.m. EDT, John Baldwin <jhb@FreeBSD.org> wrote:
>On 7/26/23 12:22 PM, Simon Marchi wrote:
>> On 7/14/23 11:51, John Baldwin wrote:
>>> The standard layout of the XSAVE extended state area consists of three
>>> regions.  The first 512 bytes (legacy region) match the layout of the
>>> FXSAVE instruction including floating point registers, MMX registers,
>>> and SSE registers.  The next 64 bytes (XSAVE header) contains a header
>>> with a fixed layout.  The final region (extended region) contains zero
>>> or more optional state components.  Examples of these include the
>>> upper 128 bits of YMM registers for AVX.
>>> 
>>> These optional state components generally have an
>>> architecturally-fixed size, but they are not assigned architectural
>>> offsets in the extended region.  Instead, processors provide
>>> additional CPUID leafs describing the size and offset of each
>>> component in the "standard" layout for a given CPU.  (There is also a
>>> "compact" format which uses an alternate layout, but existing OS's
>>> currently export the "standard" layout when exporting XSAVE data via
>>> ptrace() and core dumps.)
>>> 
>>> To date, GDB has assumed the layout used on current Intel processors
>>> for state components in the extended region and hardcoded those
>>> offsets in the tables in i387-tdep.c and i387-fp.cc.  However, this
>>> fails on recent AMD processors which use a different layout.
>>> Specifically, AMD Zen3 and later processors do not leave space for the
>>> MPX register set in between the AVX and AVX512 register sets.
>>> 
>>> To rectify this, add an x86_xsave_layout structure which contains the
>>> total size of the XSAVE extended state area as well as the offset of
>>> each known optional state component.
>>> 
>>> Subsequent commits will modify XSAVE parsing in both gdb and gdbserver
>>> to use x86_xsave_layout.
>>> 
>>> Co-authored-by: Aleksandar Paunovic <aleksandar.paunovic@intel.com>
>>> ---
>>>   gdbsupport/x86-xstate.h | 65 +++++++++++++++++++++++++++++++++++------
>>>   1 file changed, 56 insertions(+), 9 deletions(-)
>>> 
>>> diff --git a/gdbsupport/x86-xstate.h b/gdbsupport/x86-xstate.h
>>> index b8740fd8701..27fc0bd12f2 100644
>>> --- a/gdbsupport/x86-xstate.h
>>> +++ b/gdbsupport/x86-xstate.h
>>> @@ -20,22 +20,69 @@
>>>   #ifndef COMMON_X86_XSTATE_H
>>>   #define COMMON_X86_XSTATE_H
>>>   +/* The extended state feature IDs in the state component bitmap.  */
>>> +#define X86_XSTATE_X87_ID	0
>>> +#define X86_XSTATE_SSE_ID	1
>>> +#define X86_XSTATE_AVX_ID	2
>>> +#define X86_XSTATE_BNDREGS_ID	3
>>> +#define X86_XSTATE_BNDCFG_ID	4
>>> +#define X86_XSTATE_K_ID		5
>>> +#define X86_XSTATE_ZMM_H_ID	6
>>> +#define X86_XSTATE_ZMM_ID	7
>>> +#define X86_XSTATE_PKRU_ID	9
>>> +
>>>   /* The extended state feature bits.  */
>>> -#define X86_XSTATE_X87		(1ULL << 0)
>>> -#define X86_XSTATE_SSE		(1ULL << 1)
>>> -#define X86_XSTATE_AVX		(1ULL << 2)
>>> -#define X86_XSTATE_BNDREGS	(1ULL << 3)
>>> -#define X86_XSTATE_BNDCFG	(1ULL << 4)
>>> +#define X86_XSTATE_X87		(1ULL << X86_XSTATE_X87_ID)
>>> +#define X86_XSTATE_SSE		(1ULL << X86_XSTATE_SSE_ID)
>>> +#define X86_XSTATE_AVX		(1ULL << X86_XSTATE_AVX_ID)
>>> +#define X86_XSTATE_BNDREGS	(1ULL << X86_XSTATE_BNDREGS_ID)
>>> +#define X86_XSTATE_BNDCFG	(1ULL << X86_XSTATE_BNDCFG_ID)
>>>   #define X86_XSTATE_MPX		(X86_XSTATE_BNDREGS | X86_XSTATE_BNDCFG)
>>>     /* AVX 512 adds three feature bits.  All three must be enabled.  */
>>> -#define X86_XSTATE_K		(1ULL << 5)
>>> -#define X86_XSTATE_ZMM_H	(1ULL << 6)
>>> -#define X86_XSTATE_ZMM		(1ULL << 7)
>>> +#define X86_XSTATE_K		(1ULL << X86_XSTATE_K_ID)
>>> +#define X86_XSTATE_ZMM_H	(1ULL << X86_XSTATE_ZMM_H_ID)
>>> +#define X86_XSTATE_ZMM		(1ULL << X86_XSTATE_ZMM_ID)
>>>   #define X86_XSTATE_AVX512	(X86_XSTATE_K | X86_XSTATE_ZMM_H \
>>>   				 | X86_XSTATE_ZMM)
>>>   -#define X86_XSTATE_PKRU		(1ULL << 9)
>>> +#define X86_XSTATE_PKRU		(1ULL << X86_XSTATE_PKRU_ID)
>>> +
>>> +/* Size and offsets of register states in the XSAVE area extended
>>> +   region.  Offsets are set to 0 to indicate the absence of the
>>> +   associated registers.  */
>> 
>> Extreme comment nitpick.  In "Size and offsets", one is singular and the
>> other is plural.  Should it be "Sizes and offsets", or "Size and
>> offset"?
>
>Ah, there's a single size and multiple offsets.  Is this version clearer:
>
>/* Total size of the XSAVE area extended region and offsets of
>   register states within the region.  Offsets are set to 0 to
>   indicate the absence of the associated registers.  */
>

Ah, sorry. The new comment is clearer, thanks. 

Simon
  

Patch

diff --git a/gdbsupport/x86-xstate.h b/gdbsupport/x86-xstate.h
index b8740fd8701..27fc0bd12f2 100644
--- a/gdbsupport/x86-xstate.h
+++ b/gdbsupport/x86-xstate.h
@@ -20,22 +20,69 @@ 
 #ifndef COMMON_X86_XSTATE_H
 #define COMMON_X86_XSTATE_H
 
+/* The extended state feature IDs in the state component bitmap.  */
+#define X86_XSTATE_X87_ID	0
+#define X86_XSTATE_SSE_ID	1
+#define X86_XSTATE_AVX_ID	2
+#define X86_XSTATE_BNDREGS_ID	3
+#define X86_XSTATE_BNDCFG_ID	4
+#define X86_XSTATE_K_ID		5
+#define X86_XSTATE_ZMM_H_ID	6
+#define X86_XSTATE_ZMM_ID	7
+#define X86_XSTATE_PKRU_ID	9
+
 /* The extended state feature bits.  */
-#define X86_XSTATE_X87		(1ULL << 0)
-#define X86_XSTATE_SSE		(1ULL << 1)
-#define X86_XSTATE_AVX		(1ULL << 2)
-#define X86_XSTATE_BNDREGS	(1ULL << 3)
-#define X86_XSTATE_BNDCFG	(1ULL << 4)
+#define X86_XSTATE_X87		(1ULL << X86_XSTATE_X87_ID)
+#define X86_XSTATE_SSE		(1ULL << X86_XSTATE_SSE_ID)
+#define X86_XSTATE_AVX		(1ULL << X86_XSTATE_AVX_ID)
+#define X86_XSTATE_BNDREGS	(1ULL << X86_XSTATE_BNDREGS_ID)
+#define X86_XSTATE_BNDCFG	(1ULL << X86_XSTATE_BNDCFG_ID)
 #define X86_XSTATE_MPX		(X86_XSTATE_BNDREGS | X86_XSTATE_BNDCFG)
 
 /* AVX 512 adds three feature bits.  All three must be enabled.  */
-#define X86_XSTATE_K		(1ULL << 5)
-#define X86_XSTATE_ZMM_H	(1ULL << 6)
-#define X86_XSTATE_ZMM		(1ULL << 7)
+#define X86_XSTATE_K		(1ULL << X86_XSTATE_K_ID)
+#define X86_XSTATE_ZMM_H	(1ULL << X86_XSTATE_ZMM_H_ID)
+#define X86_XSTATE_ZMM		(1ULL << X86_XSTATE_ZMM_ID)
 #define X86_XSTATE_AVX512	(X86_XSTATE_K | X86_XSTATE_ZMM_H \
 				 | X86_XSTATE_ZMM)
 
-#define X86_XSTATE_PKRU		(1ULL << 9)
+#define X86_XSTATE_PKRU		(1ULL << X86_XSTATE_PKRU_ID)
+
+/* Size and offsets of register states in the XSAVE area extended
+   region.  Offsets are set to 0 to indicate the absence of the
+   associated registers.  */
+
+struct x86_xsave_layout
+{
+  int sizeof_xsave = 0;
+  int avx_offset = 0;
+  int bndregs_offset = 0;
+  int bndcfg_offset = 0;
+  int k_offset = 0;
+  int zmm_h_offset = 0;
+  int zmm_offset = 0;
+  int pkru_offset = 0;
+};
+
+constexpr bool operator== (const x86_xsave_layout &lhs,
+			   const x86_xsave_layout &rhs)
+{
+  return lhs.sizeof_xsave == rhs.sizeof_xsave
+    && lhs.avx_offset == rhs.avx_offset
+    && lhs.bndregs_offset == rhs.bndregs_offset
+    && lhs.bndcfg_offset == rhs.bndcfg_offset
+    && lhs.k_offset == rhs.k_offset
+    && lhs.zmm_h_offset == rhs.zmm_h_offset
+    && lhs.zmm_offset == rhs.zmm_offset
+    && lhs.pkru_offset == rhs.pkru_offset;
+}
+
+constexpr bool operator!= (const x86_xsave_layout &lhs,
+			   const x86_xsave_layout &rhs)
+{
+  return !(lhs == rhs);
+}
+
 
 /* Supported mask and size of the extended state.  */
 #define X86_XSTATE_X87_MASK	X86_XSTATE_X87