x86: Simplify AMX XSAVEC calculation logic.

Message ID 20250331050248.1275139-1-skpgkp2@gmail.com (mailing list archive)
State Under Review
Delegated to: Florian Weimer
Headers
Series x86: Simplify AMX XSAVEC calculation logic. |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed

Commit Message

Sunil Pandey March 31, 2025, 5:02 a.m. UTC
  Tested on SKL/SKX/EMR without regressions.
---
 sysdeps/x86/cpu-features.c | 62 +++++++++-----------------------------
 1 file changed, 15 insertions(+), 47 deletions(-)
  

Comments

Florian Weimer March 31, 2025, 1:13 p.m. UTC | #1
* Sunil K. Pandey:

> +#ifdef __x86_64__
> +		      if (amx_flag)
> +			{
> +			  unsigned int amx_size
> +			    = xstate_comp_offsets[18] + xstate_comp_sizes[18];
> +			  _dl_x86_features_tlsdesc_state_size
> +			    = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
> +					64);
> +			}
> +#endif

I diskike the magic 18 (as the AMX bit number) appearing here.

The logic for including and excluding states seems reversed to me.
Shouldn't we save and restore everything that we don't know anything
about, especially for the TLSDESC case?

Thanks,
Florian
  
Sunil Pandey April 1, 2025, 4:25 a.m. UTC | #2
On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:

> * Sunil K. Pandey:
>
> > +#ifdef __x86_64__
> > +                   if (amx_flag)
> > +                     {
> > +                       unsigned int amx_size
> > +                         = xstate_comp_offsets[18] +
> xstate_comp_sizes[18];
> > +                       _dl_x86_features_tlsdesc_state_size
> > +                         = ALIGN_UP (amx_size +
> TLSDESC_CALL_REGISTER_SAVE_AREA,
> > +                                     64);
> > +                     }
> > +#endif
>
> I diskike the magic 18 (as the AMX bit number) appearing here.
>
> The logic for including and excluding states seems reversed to me.
> Shouldn't we save and restore everything that we don't know anything
> about, especially for the TLSDESC case?
>
> Thanks,
> Florian
>

Thank you so much. Will submit v2.
  
Sunil Pandey April 1, 2025, 2:03 p.m. UTC | #3
On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:

>
>
> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:
>
>> * Sunil K. Pandey:
>>
>> > +#ifdef __x86_64__
>> > +                   if (amx_flag)
>> > +                     {
>> > +                       unsigned int amx_size
>> > +                         = xstate_comp_offsets[18] +
>> xstate_comp_sizes[18];
>> > +                       _dl_x86_features_tlsdesc_state_size
>> > +                         = ALIGN_UP (amx_size +
>> TLSDESC_CALL_REGISTER_SAVE_AREA,
>> > +                                     64);
>> > +                     }
>> > +#endif
>>
>> I diskike the magic 18 (as the AMX bit number) appearing here.
>>
>> The logic for including and excluding states seems reversed to me.
>> Shouldn't we save and restore everything that we don't know anything
>> about, especially for the TLSDESC case?
>>
>> Thanks,
>> Florian
>>
>
> Thank you so much. Will submit v2.
>

v2 patch attached.
  
H.J. Lu April 2, 2025, 10:23 p.m. UTC | #4
On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
>
> On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>>
>>
>>
>> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:
>>>
>>> * Sunil K. Pandey:
>>>
>>> > +#ifdef __x86_64__
>>> > +                   if (amx_flag)
>>> > +                     {
>>> > +                       unsigned int amx_size
>>> > +                         = xstate_comp_offsets[18] + xstate_comp_sizes[18];
>>> > +                       _dl_x86_features_tlsdesc_state_size
>>> > +                         = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
>>> > +                                     64);
>>> > +                     }
>>> > +#endif
>>>
>>> I diskike the magic 18 (as the AMX bit number) appearing here.
>>>
>>> The logic for including and excluding states seems reversed to me.
>>> Shouldn't we save and restore everything that we don't know anything
>>> about, especially for the TLSDESC case?
>>>
>>> Thanks,
>>> Florian
>>
>>
>> Thank you so much. Will submit v2.
>
>
> v2 patch attached.

Please add more descriptions in the commit log.

 #ifdef __x86_64__
-       unsigned int amx_size
- = (xstate_amx_comp_offsets[31]
-    + xstate_amx_comp_sizes[31]);
-       amx_size
- = ALIGN_UP ((amx_size
-      + TLSDESC_CALL_REGISTER_SAVE_AREA),
-     64);
-       /* Set TLSDESC state size to the compact AMX
- state size for XSAVEC.  */
-       _dl_x86_features_tlsdesc_state_size = amx_size;
+       _dl_x86_features_tlsdesc_state_size
+ = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
+       /* Subtract AMX size from total size.  */

Subtract size from the start of X86_XSTATE_TILECFG_ID space to the end of
X86_XSTATE_TILEDATA_ID space.

+       size -= ((xstate_comp_offsets[18]
+ + xstate_comp_sizes[18])

xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?

+        - (xstate_comp_offsets[16]
+   + xstate_comp_sizes[16]));

xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
  
Sunil Pandey April 2, 2025, 11:45 p.m. UTC | #5
On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:

> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >
> >
> >
> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >>
> >>
> >>
> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com>
> wrote:
> >>>
> >>> * Sunil K. Pandey:
> >>>
> >>> > +#ifdef __x86_64__
> >>> > +                   if (amx_flag)
> >>> > +                     {
> >>> > +                       unsigned int amx_size
> >>> > +                         = xstate_comp_offsets[18] +
> xstate_comp_sizes[18];
> >>> > +                       _dl_x86_features_tlsdesc_state_size
> >>> > +                         = ALIGN_UP (amx_size +
> TLSDESC_CALL_REGISTER_SAVE_AREA,
> >>> > +                                     64);
> >>> > +                     }
> >>> > +#endif
> >>>
> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
> >>>
> >>> The logic for including and excluding states seems reversed to me.
> >>> Shouldn't we save and restore everything that we don't know anything
> >>> about, especially for the TLSDESC case?
> >>>
> >>> Thanks,
> >>> Florian
> >>
> >>
> >> Thank you so much. Will submit v2.
> >
> >
> > v2 patch attached.
>
> Please add more descriptions in the commit log.
>
>  #ifdef __x86_64__
> -       unsigned int amx_size
> - = (xstate_amx_comp_offsets[31]
> -    + xstate_amx_comp_sizes[31]);
> -       amx_size
> - = ALIGN_UP ((amx_size
> -      + TLSDESC_CALL_REGISTER_SAVE_AREA),
> -     64);
> -       /* Set TLSDESC state size to the compact AMX
> - state size for XSAVEC.  */
> -       _dl_x86_features_tlsdesc_state_size = amx_size;
> +       _dl_x86_features_tlsdesc_state_size
> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
> +       /* Subtract AMX size from total size.  */
>
> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the end of
> X86_XSTATE_TILEDATA_ID space.
>
> +       size -= ((xstate_comp_offsets[18]
> + + xstate_comp_sizes[18])
>
> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
>
> +        - (xstate_comp_offsets[16]
> +   + xstate_comp_sizes[16]));
>
> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
>


 X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from cache
aligned location.
Depending on which previous state gets enabled, size calculation may not be
correct.

How about

 size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
              - (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
                 + xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));



> --
> H.J.
>
  
H.J. Lu April 3, 2025, 2:10 a.m. UTC | #6
On Wed, Apr 2, 2025 at 4:46 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
>
> On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>> >
>> >
>> >
>> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>> >>
>> >>
>> >>
>> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:
>> >>>
>> >>> * Sunil K. Pandey:
>> >>>
>> >>> > +#ifdef __x86_64__
>> >>> > +                   if (amx_flag)
>> >>> > +                     {
>> >>> > +                       unsigned int amx_size
>> >>> > +                         = xstate_comp_offsets[18] + xstate_comp_sizes[18];
>> >>> > +                       _dl_x86_features_tlsdesc_state_size
>> >>> > +                         = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
>> >>> > +                                     64);
>> >>> > +                     }
>> >>> > +#endif
>> >>>
>> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
>> >>>
>> >>> The logic for including and excluding states seems reversed to me.
>> >>> Shouldn't we save and restore everything that we don't know anything
>> >>> about, especially for the TLSDESC case?
>> >>>
>> >>> Thanks,
>> >>> Florian
>> >>
>> >>
>> >> Thank you so much. Will submit v2.
>> >
>> >
>> > v2 patch attached.
>>
>> Please add more descriptions in the commit log.
>>
>>  #ifdef __x86_64__
>> -       unsigned int amx_size
>> - = (xstate_amx_comp_offsets[31]
>> -    + xstate_amx_comp_sizes[31]);
>> -       amx_size
>> - = ALIGN_UP ((amx_size
>> -      + TLSDESC_CALL_REGISTER_SAVE_AREA),
>> -     64);
>> -       /* Set TLSDESC state size to the compact AMX
>> - state size for XSAVEC.  */
>> -       _dl_x86_features_tlsdesc_state_size = amx_size;
>> +       _dl_x86_features_tlsdesc_state_size
>> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
>> +       /* Subtract AMX size from total size.  */
>>
>> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the end of
>> X86_XSTATE_TILEDATA_ID space.
>>
>> +       size -= ((xstate_comp_offsets[18]
>> + + xstate_comp_sizes[18])
>>
>> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
>>
>> +        - (xstate_comp_offsets[16]
>> +   + xstate_comp_sizes[16]));
>>
>> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
>
>
>
>  X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from cache aligned location.
> Depending on which previous state gets enabled, size calculation may not be correct.
>
> How about
>
>  size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
>               - (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
>                  + xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));
>

How about

size  = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,  64);
size -= xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] -
xstate_comp_offsets[X86_XSTATE_TILECFG_ID];

xstate_comp_offsets[X86_XSTATE_TILECFG_ID] ==
xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
if there is no AMX.  Otherwise, they are multiples of 64.
  
H.J. Lu April 3, 2025, 3:55 p.m. UTC | #7
On Wed, Apr 2, 2025 at 7:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Apr 2, 2025 at 4:46 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >
> >
> >
> > On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>
> >> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >> >
> >> >
> >> >
> >> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >> >>
> >> >>
> >> >>
> >> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:
> >> >>>
> >> >>> * Sunil K. Pandey:
> >> >>>
> >> >>> > +#ifdef __x86_64__
> >> >>> > +                   if (amx_flag)
> >> >>> > +                     {
> >> >>> > +                       unsigned int amx_size
> >> >>> > +                         = xstate_comp_offsets[18] + xstate_comp_sizes[18];
> >> >>> > +                       _dl_x86_features_tlsdesc_state_size
> >> >>> > +                         = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
> >> >>> > +                                     64);
> >> >>> > +                     }
> >> >>> > +#endif
> >> >>>
> >> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
> >> >>>
> >> >>> The logic for including and excluding states seems reversed to me.
> >> >>> Shouldn't we save and restore everything that we don't know anything
> >> >>> about, especially for the TLSDESC case?
> >> >>>
> >> >>> Thanks,
> >> >>> Florian
> >> >>
> >> >>
> >> >> Thank you so much. Will submit v2.
> >> >
> >> >
> >> > v2 patch attached.
> >>
> >> Please add more descriptions in the commit log.
> >>
> >>  #ifdef __x86_64__
> >> -       unsigned int amx_size
> >> - = (xstate_amx_comp_offsets[31]
> >> -    + xstate_amx_comp_sizes[31]);
> >> -       amx_size
> >> - = ALIGN_UP ((amx_size
> >> -      + TLSDESC_CALL_REGISTER_SAVE_AREA),
> >> -     64);
> >> -       /* Set TLSDESC state size to the compact AMX
> >> - state size for XSAVEC.  */
> >> -       _dl_x86_features_tlsdesc_state_size = amx_size;
> >> +       _dl_x86_features_tlsdesc_state_size
> >> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
> >> +       /* Subtract AMX size from total size.  */
> >>
> >> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the end of
> >> X86_XSTATE_TILEDATA_ID space.
> >>
> >> +       size -= ((xstate_comp_offsets[18]
> >> + + xstate_comp_sizes[18])
> >>
> >> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
> >>
> >> +        - (xstate_comp_offsets[16]
> >> +   + xstate_comp_sizes[16]));
> >>
> >> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
> >
> >
> >
> >  X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from cache aligned location.
> > Depending on which previous state gets enabled, size calculation may not be correct.
> >
> > How about
> >
> >  size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
> >               - (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
> >                  + xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));
> >
>
> How about
>
> size  = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,  64);
> size -= xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] -
> xstate_comp_offsets[X86_XSTATE_TILECFG_ID];
>
> xstate_comp_offsets[X86_XSTATE_TILECFG_ID] ==
> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
> if there is no AMX.  Otherwise, they are multiples of 64.
>
>
> --
> H.J.

Please try this.
  
Sunil Pandey April 3, 2025, 4:24 p.m. UTC | #8
On Thu, Apr 3, 2025 at 8:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:

> On Wed, Apr 2, 2025 at 7:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Wed, Apr 2, 2025 at 4:46 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > >
> > >
> > >
> > > On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >>
> > >> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com>
> wrote:
> > >> >
> > >> >
> > >> >
> > >> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com>
> wrote:
> > >> >>
> > >> >>
> > >> >>
> > >> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com>
> wrote:
> > >> >>>
> > >> >>> * Sunil K. Pandey:
> > >> >>>
> > >> >>> > +#ifdef __x86_64__
> > >> >>> > +                   if (amx_flag)
> > >> >>> > +                     {
> > >> >>> > +                       unsigned int amx_size
> > >> >>> > +                         = xstate_comp_offsets[18] +
> xstate_comp_sizes[18];
> > >> >>> > +                       _dl_x86_features_tlsdesc_state_size
> > >> >>> > +                         = ALIGN_UP (amx_size +
> TLSDESC_CALL_REGISTER_SAVE_AREA,
> > >> >>> > +                                     64);
> > >> >>> > +                     }
> > >> >>> > +#endif
> > >> >>>
> > >> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
> > >> >>>
> > >> >>> The logic for including and excluding states seems reversed to me.
> > >> >>> Shouldn't we save and restore everything that we don't know
> anything
> > >> >>> about, especially for the TLSDESC case?
> > >> >>>
> > >> >>> Thanks,
> > >> >>> Florian
> > >> >>
> > >> >>
> > >> >> Thank you so much. Will submit v2.
> > >> >
> > >> >
> > >> > v2 patch attached.
> > >>
> > >> Please add more descriptions in the commit log.
> > >>
> > >>  #ifdef __x86_64__
> > >> -       unsigned int amx_size
> > >> - = (xstate_amx_comp_offsets[31]
> > >> -    + xstate_amx_comp_sizes[31]);
> > >> -       amx_size
> > >> - = ALIGN_UP ((amx_size
> > >> -      + TLSDESC_CALL_REGISTER_SAVE_AREA),
> > >> -     64);
> > >> -       /* Set TLSDESC state size to the compact AMX
> > >> - state size for XSAVEC.  */
> > >> -       _dl_x86_features_tlsdesc_state_size = amx_size;
> > >> +       _dl_x86_features_tlsdesc_state_size
> > >> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
> > >> +       /* Subtract AMX size from total size.  */
> > >>
> > >> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the
> end of
> > >> X86_XSTATE_TILEDATA_ID space.
> > >>
> > >> +       size -= ((xstate_comp_offsets[18]
> > >> + + xstate_comp_sizes[18])
> > >>
> > >> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
> > >>
> > >> +        - (xstate_comp_offsets[16]
> > >> +   + xstate_comp_sizes[16]));
> > >>
> > >> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
> > >
> > >
> > >
> > >  X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from
> cache aligned location.
> > > Depending on which previous state gets enabled, size calculation may
> not be correct.
> > >
> > > How about
> > >
> > >  size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
> > >               - (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
> > >                  + xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));
> > >
> >
> > How about
> >
> > size  = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,  64);
> > size -= xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] -
> > xstate_comp_offsets[X86_XSTATE_TILECFG_ID];
> >
> > xstate_comp_offsets[X86_XSTATE_TILECFG_ID] ==
> > xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
> > if there is no AMX.  Otherwise, they are multiples of 64.
> >
> >
> > --
> > H.J.
>
> Please try this.
>
>
> --
> H.J.
>

This looks great, as MAX hard coding also gets simplified.
Testing it out across different platforms.

--Sunil
  
Sunil Pandey April 3, 2025, 6:52 p.m. UTC | #9
On Thu, Apr 3, 2025 at 9:24 AM Sunil Pandey <skpgkp2@gmail.com> wrote:

>
>
> On Thu, Apr 3, 2025 at 8:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
>> On Wed, Apr 2, 2025 at 7:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>> >
>> > On Wed, Apr 2, 2025 at 4:46 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>> > >
>> > >
>> > >
>> > > On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>> > >>
>> > >> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com>
>> wrote:
>> > >> >
>> > >> >
>> > >> >
>> > >> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com>
>> wrote:
>> > >> >>
>> > >> >>
>> > >> >>
>> > >> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <
>> fweimer@redhat.com> wrote:
>> > >> >>>
>> > >> >>> * Sunil K. Pandey:
>> > >> >>>
>> > >> >>> > +#ifdef __x86_64__
>> > >> >>> > +                   if (amx_flag)
>> > >> >>> > +                     {
>> > >> >>> > +                       unsigned int amx_size
>> > >> >>> > +                         = xstate_comp_offsets[18] +
>> xstate_comp_sizes[18];
>> > >> >>> > +                       _dl_x86_features_tlsdesc_state_size
>> > >> >>> > +                         = ALIGN_UP (amx_size +
>> TLSDESC_CALL_REGISTER_SAVE_AREA,
>> > >> >>> > +                                     64);
>> > >> >>> > +                     }
>> > >> >>> > +#endif
>> > >> >>>
>> > >> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
>> > >> >>>
>> > >> >>> The logic for including and excluding states seems reversed to
>> me.
>> > >> >>> Shouldn't we save and restore everything that we don't know
>> anything
>> > >> >>> about, especially for the TLSDESC case?
>> > >> >>>
>> > >> >>> Thanks,
>> > >> >>> Florian
>> > >> >>
>> > >> >>
>> > >> >> Thank you so much. Will submit v2.
>> > >> >
>> > >> >
>> > >> > v2 patch attached.
>> > >>
>> > >> Please add more descriptions in the commit log.
>> > >>
>> > >>  #ifdef __x86_64__
>> > >> -       unsigned int amx_size
>> > >> - = (xstate_amx_comp_offsets[31]
>> > >> -    + xstate_amx_comp_sizes[31]);
>> > >> -       amx_size
>> > >> - = ALIGN_UP ((amx_size
>> > >> -      + TLSDESC_CALL_REGISTER_SAVE_AREA),
>> > >> -     64);
>> > >> -       /* Set TLSDESC state size to the compact AMX
>> > >> - state size for XSAVEC.  */
>> > >> -       _dl_x86_features_tlsdesc_state_size = amx_size;
>> > >> +       _dl_x86_features_tlsdesc_state_size
>> > >> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
>> > >> +       /* Subtract AMX size from total size.  */
>> > >>
>> > >> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the
>> end of
>> > >> X86_XSTATE_TILEDATA_ID space.
>> > >>
>> > >> +       size -= ((xstate_comp_offsets[18]
>> > >> + + xstate_comp_sizes[18])
>> > >>
>> > >> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
>> > >>
>> > >> +        - (xstate_comp_offsets[16]
>> > >> +   + xstate_comp_sizes[16]));
>> > >>
>> > >> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
>> > >
>> > >
>> > >
>> > >  X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from
>> cache aligned location.
>> > > Depending on which previous state gets enabled, size calculation may
>> not be correct.
>> > >
>> > > How about
>> > >
>> > >  size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
>> > >               - (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
>> > >                  + xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));
>> > >
>> >
>> > How about
>> >
>> > size  = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,  64);
>> > size -= xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] -
>> > xstate_comp_offsets[X86_XSTATE_TILECFG_ID];
>> >
>> > xstate_comp_offsets[X86_XSTATE_TILECFG_ID] ==
>> > xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
>> > if there is no AMX.  Otherwise, they are multiples of 64.
>> >
>> >
>> > --
>> > H.J.
>>
>> Please try this.
>>
>>
>> --
>> H.J.
>>
>
> This looks great, as MAX hard coding also gets simplified.
> Testing it out across different platforms.
>
> --Sunil
>

Completed validation on SKL/SKX/SPR/SDE and compared xsave number with
'ld.s. --list-diagnostics" option, no regression.
So this patch looks good to me.

--Sunil
  

Patch

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 6cf7e4caf1..467cf07dbb 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -327,51 +327,30 @@  update_active (struct cpu_features *cpu_features)
 		{
 		  unsigned int xstate_comp_offsets[32];
 		  unsigned int xstate_comp_sizes[32];
+		  unsigned int i;
 #ifdef __x86_64__
-		  unsigned int xstate_amx_comp_offsets[32];
-		  unsigned int xstate_amx_comp_sizes[32];
-		  unsigned int amx_ecx;
+		  bool amx_flag = 0;
 #endif
-		  unsigned int i;
 
 		  xstate_comp_offsets[0] = 0;
 		  xstate_comp_offsets[1] = 160;
 		  xstate_comp_offsets[2] = 576;
 		  xstate_comp_sizes[0] = 160;
 		  xstate_comp_sizes[1] = 256;
-#ifdef __x86_64__
-		  xstate_amx_comp_offsets[0] = 0;
-		  xstate_amx_comp_offsets[1] = 160;
-		  xstate_amx_comp_offsets[2] = 576;
-		  xstate_amx_comp_sizes[0] = 160;
-		  xstate_amx_comp_sizes[1] = 256;
-#endif
 
 		  for (i = 2; i < 32; i++)
 		    {
 		      if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
 			{
 			  __cpuid_count (0xd, i, eax, ebx, ecx, edx);
+			  xstate_comp_sizes[i] = eax;
 #ifdef __x86_64__
-			  /* Include this in xsave_state_full_size.  */
-			  amx_ecx = ecx;
-			  xstate_amx_comp_sizes[i] = eax;
 			  if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
-			    {
-			      /* Exclude this from xsave_state_size.  */
-			      ecx = 0;
-			      xstate_comp_sizes[i] = 0;
-			    }
-			  else
+			    amx_flag = 1;
 #endif
-			    xstate_comp_sizes[i] = eax;
 			}
 		      else
 			{
-#ifdef __x86_64__
-			  amx_ecx = 0;
-			  xstate_amx_comp_sizes[i] = 0;
-#endif
 			  ecx = 0;
 			  xstate_comp_sizes[i] = 0;
 			}
@@ -384,38 +363,27 @@  update_active (struct cpu_features *cpu_features)
 			  if ((ecx & (1 << 1)) != 0)
 			    xstate_comp_offsets[i]
 			      = ALIGN_UP (xstate_comp_offsets[i], 64);
-#ifdef __x86_64__
-			  xstate_amx_comp_offsets[i]
-			    = (xstate_amx_comp_offsets[i - 1]
-			       + xstate_amx_comp_sizes[i - 1]);
-			  if ((amx_ecx & (1 << 1)) != 0)
-			    xstate_amx_comp_offsets[i]
-			      = ALIGN_UP (xstate_amx_comp_offsets[i],
-					  64);
-#endif
 			}
 		    }
 
 		  /* Use XSAVEC.  */
 		  unsigned int size
-		    = xstate_comp_offsets[31] + xstate_comp_sizes[31];
+		    = xstate_comp_offsets[9] + xstate_comp_sizes[9];
 		  if (size)
 		    {
-#ifdef __x86_64__
-		      unsigned int amx_size
-			= (xstate_amx_comp_offsets[31]
-			   + xstate_amx_comp_sizes[31]);
-		      amx_size
-			= ALIGN_UP ((amx_size
-				     + TLSDESC_CALL_REGISTER_SAVE_AREA),
-				    64);
-		      /* Set TLSDESC state size to the compact AMX
-			 state size for XSAVEC.  */
-		      _dl_x86_features_tlsdesc_state_size = amx_size;
-#endif
 		      cpu_features->xsave_state_size
 			= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
 				    64);
+#ifdef __x86_64__
+		      if (amx_flag)
+			{
+			  unsigned int amx_size
+			    = xstate_comp_offsets[18] + xstate_comp_sizes[18];
+			  _dl_x86_features_tlsdesc_state_size
+			    = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
+					64);
+			}
+#endif
 		      CPU_FEATURE_SET (cpu_features, XSAVEC);
 		    }
 		}