x86: Simplify AMX XSAVEC calculation logic.
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Test passed
|
Commit Message
Tested on SKL/SKX/EMR without regressions.
---
sysdeps/x86/cpu-features.c | 62 +++++++++-----------------------------
1 file changed, 15 insertions(+), 47 deletions(-)
Comments
* Sunil K. Pandey:
> +#ifdef __x86_64__
> + if (amx_flag)
> + {
> + unsigned int amx_size
> + = xstate_comp_offsets[18] + xstate_comp_sizes[18];
> + _dl_x86_features_tlsdesc_state_size
> + = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
> + 64);
> + }
> +#endif
I diskike the magic 18 (as the AMX bit number) appearing here.
The logic for including and excluding states seems reversed to me.
Shouldn't we save and restore everything that we don't know anything
about, especially for the TLSDESC case?
Thanks,
Florian
On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:
> * Sunil K. Pandey:
>
> > +#ifdef __x86_64__
> > + if (amx_flag)
> > + {
> > + unsigned int amx_size
> > + = xstate_comp_offsets[18] +
> xstate_comp_sizes[18];
> > + _dl_x86_features_tlsdesc_state_size
> > + = ALIGN_UP (amx_size +
> TLSDESC_CALL_REGISTER_SAVE_AREA,
> > + 64);
> > + }
> > +#endif
>
> I diskike the magic 18 (as the AMX bit number) appearing here.
>
> The logic for including and excluding states seems reversed to me.
> Shouldn't we save and restore everything that we don't know anything
> about, especially for the TLSDESC case?
>
> Thanks,
> Florian
>
Thank you so much. Will submit v2.
On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:
>
>> * Sunil K. Pandey:
>>
>> > +#ifdef __x86_64__
>> > + if (amx_flag)
>> > + {
>> > + unsigned int amx_size
>> > + = xstate_comp_offsets[18] +
>> xstate_comp_sizes[18];
>> > + _dl_x86_features_tlsdesc_state_size
>> > + = ALIGN_UP (amx_size +
>> TLSDESC_CALL_REGISTER_SAVE_AREA,
>> > + 64);
>> > + }
>> > +#endif
>>
>> I diskike the magic 18 (as the AMX bit number) appearing here.
>>
>> The logic for including and excluding states seems reversed to me.
>> Shouldn't we save and restore everything that we don't know anything
>> about, especially for the TLSDESC case?
>>
>> Thanks,
>> Florian
>>
>
> Thank you so much. Will submit v2.
>
v2 patch attached.
On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
>
> On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>>
>>
>>
>> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:
>>>
>>> * Sunil K. Pandey:
>>>
>>> > +#ifdef __x86_64__
>>> > + if (amx_flag)
>>> > + {
>>> > + unsigned int amx_size
>>> > + = xstate_comp_offsets[18] + xstate_comp_sizes[18];
>>> > + _dl_x86_features_tlsdesc_state_size
>>> > + = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
>>> > + 64);
>>> > + }
>>> > +#endif
>>>
>>> I diskike the magic 18 (as the AMX bit number) appearing here.
>>>
>>> The logic for including and excluding states seems reversed to me.
>>> Shouldn't we save and restore everything that we don't know anything
>>> about, especially for the TLSDESC case?
>>>
>>> Thanks,
>>> Florian
>>
>>
>> Thank you so much. Will submit v2.
>
>
> v2 patch attached.
Please add more descriptions in the commit log.
#ifdef __x86_64__
- unsigned int amx_size
- = (xstate_amx_comp_offsets[31]
- + xstate_amx_comp_sizes[31]);
- amx_size
- = ALIGN_UP ((amx_size
- + TLSDESC_CALL_REGISTER_SAVE_AREA),
- 64);
- /* Set TLSDESC state size to the compact AMX
- state size for XSAVEC. */
- _dl_x86_features_tlsdesc_state_size = amx_size;
+ _dl_x86_features_tlsdesc_state_size
+ = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
+ /* Subtract AMX size from total size. */
Subtract size from the start of X86_XSTATE_TILECFG_ID space to the end of
X86_XSTATE_TILEDATA_ID space.
+ size -= ((xstate_comp_offsets[18]
+ + xstate_comp_sizes[18])
xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
+ - (xstate_comp_offsets[16]
+ + xstate_comp_sizes[16]));
xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >
> >
> >
> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >>
> >>
> >>
> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com>
> wrote:
> >>>
> >>> * Sunil K. Pandey:
> >>>
> >>> > +#ifdef __x86_64__
> >>> > + if (amx_flag)
> >>> > + {
> >>> > + unsigned int amx_size
> >>> > + = xstate_comp_offsets[18] +
> xstate_comp_sizes[18];
> >>> > + _dl_x86_features_tlsdesc_state_size
> >>> > + = ALIGN_UP (amx_size +
> TLSDESC_CALL_REGISTER_SAVE_AREA,
> >>> > + 64);
> >>> > + }
> >>> > +#endif
> >>>
> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
> >>>
> >>> The logic for including and excluding states seems reversed to me.
> >>> Shouldn't we save and restore everything that we don't know anything
> >>> about, especially for the TLSDESC case?
> >>>
> >>> Thanks,
> >>> Florian
> >>
> >>
> >> Thank you so much. Will submit v2.
> >
> >
> > v2 patch attached.
>
> Please add more descriptions in the commit log.
>
> #ifdef __x86_64__
> - unsigned int amx_size
> - = (xstate_amx_comp_offsets[31]
> - + xstate_amx_comp_sizes[31]);
> - amx_size
> - = ALIGN_UP ((amx_size
> - + TLSDESC_CALL_REGISTER_SAVE_AREA),
> - 64);
> - /* Set TLSDESC state size to the compact AMX
> - state size for XSAVEC. */
> - _dl_x86_features_tlsdesc_state_size = amx_size;
> + _dl_x86_features_tlsdesc_state_size
> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
> + /* Subtract AMX size from total size. */
>
> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the end of
> X86_XSTATE_TILEDATA_ID space.
>
> + size -= ((xstate_comp_offsets[18]
> + + xstate_comp_sizes[18])
>
> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
>
> + - (xstate_comp_offsets[16]
> + + xstate_comp_sizes[16]));
>
> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
>
X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from cache
aligned location.
Depending on which previous state gets enabled, size calculation may not be
correct.
How about
size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
- (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
+ xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));
> --
> H.J.
>
On Wed, Apr 2, 2025 at 4:46 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
>
> On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>> >
>> >
>> >
>> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>> >>
>> >>
>> >>
>> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:
>> >>>
>> >>> * Sunil K. Pandey:
>> >>>
>> >>> > +#ifdef __x86_64__
>> >>> > + if (amx_flag)
>> >>> > + {
>> >>> > + unsigned int amx_size
>> >>> > + = xstate_comp_offsets[18] + xstate_comp_sizes[18];
>> >>> > + _dl_x86_features_tlsdesc_state_size
>> >>> > + = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
>> >>> > + 64);
>> >>> > + }
>> >>> > +#endif
>> >>>
>> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
>> >>>
>> >>> The logic for including and excluding states seems reversed to me.
>> >>> Shouldn't we save and restore everything that we don't know anything
>> >>> about, especially for the TLSDESC case?
>> >>>
>> >>> Thanks,
>> >>> Florian
>> >>
>> >>
>> >> Thank you so much. Will submit v2.
>> >
>> >
>> > v2 patch attached.
>>
>> Please add more descriptions in the commit log.
>>
>> #ifdef __x86_64__
>> - unsigned int amx_size
>> - = (xstate_amx_comp_offsets[31]
>> - + xstate_amx_comp_sizes[31]);
>> - amx_size
>> - = ALIGN_UP ((amx_size
>> - + TLSDESC_CALL_REGISTER_SAVE_AREA),
>> - 64);
>> - /* Set TLSDESC state size to the compact AMX
>> - state size for XSAVEC. */
>> - _dl_x86_features_tlsdesc_state_size = amx_size;
>> + _dl_x86_features_tlsdesc_state_size
>> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
>> + /* Subtract AMX size from total size. */
>>
>> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the end of
>> X86_XSTATE_TILEDATA_ID space.
>>
>> + size -= ((xstate_comp_offsets[18]
>> + + xstate_comp_sizes[18])
>>
>> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
>>
>> + - (xstate_comp_offsets[16]
>> + + xstate_comp_sizes[16]));
>>
>> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
>
>
>
> X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from cache aligned location.
> Depending on which previous state gets enabled, size calculation may not be correct.
>
> How about
>
> size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
> - (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
> + xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));
>
How about
size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
size -= xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] -
xstate_comp_offsets[X86_XSTATE_TILECFG_ID];
xstate_comp_offsets[X86_XSTATE_TILECFG_ID] ==
xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
if there is no AMX. Otherwise, they are multiples of 64.
On Wed, Apr 2, 2025 at 7:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Apr 2, 2025 at 4:46 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >
> >
> >
> > On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >>
> >> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >> >
> >> >
> >> >
> >> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> >> >>
> >> >>
> >> >>
> >> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com> wrote:
> >> >>>
> >> >>> * Sunil K. Pandey:
> >> >>>
> >> >>> > +#ifdef __x86_64__
> >> >>> > + if (amx_flag)
> >> >>> > + {
> >> >>> > + unsigned int amx_size
> >> >>> > + = xstate_comp_offsets[18] + xstate_comp_sizes[18];
> >> >>> > + _dl_x86_features_tlsdesc_state_size
> >> >>> > + = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
> >> >>> > + 64);
> >> >>> > + }
> >> >>> > +#endif
> >> >>>
> >> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
> >> >>>
> >> >>> The logic for including and excluding states seems reversed to me.
> >> >>> Shouldn't we save and restore everything that we don't know anything
> >> >>> about, especially for the TLSDESC case?
> >> >>>
> >> >>> Thanks,
> >> >>> Florian
> >> >>
> >> >>
> >> >> Thank you so much. Will submit v2.
> >> >
> >> >
> >> > v2 patch attached.
> >>
> >> Please add more descriptions in the commit log.
> >>
> >> #ifdef __x86_64__
> >> - unsigned int amx_size
> >> - = (xstate_amx_comp_offsets[31]
> >> - + xstate_amx_comp_sizes[31]);
> >> - amx_size
> >> - = ALIGN_UP ((amx_size
> >> - + TLSDESC_CALL_REGISTER_SAVE_AREA),
> >> - 64);
> >> - /* Set TLSDESC state size to the compact AMX
> >> - state size for XSAVEC. */
> >> - _dl_x86_features_tlsdesc_state_size = amx_size;
> >> + _dl_x86_features_tlsdesc_state_size
> >> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
> >> + /* Subtract AMX size from total size. */
> >>
> >> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the end of
> >> X86_XSTATE_TILEDATA_ID space.
> >>
> >> + size -= ((xstate_comp_offsets[18]
> >> + + xstate_comp_sizes[18])
> >>
> >> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
> >>
> >> + - (xstate_comp_offsets[16]
> >> + + xstate_comp_sizes[16]));
> >>
> >> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
> >
> >
> >
> > X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from cache aligned location.
> > Depending on which previous state gets enabled, size calculation may not be correct.
> >
> > How about
> >
> > size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
> > - (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
> > + xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));
> >
>
> How about
>
> size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
> size -= xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] -
> xstate_comp_offsets[X86_XSTATE_TILECFG_ID];
>
> xstate_comp_offsets[X86_XSTATE_TILECFG_ID] ==
> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
> if there is no AMX. Otherwise, they are multiples of 64.
>
>
> --
> H.J.
Please try this.
On Thu, Apr 3, 2025 at 8:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> On Wed, Apr 2, 2025 at 7:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Wed, Apr 2, 2025 at 4:46 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
> > >
> > >
> > >
> > > On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >>
> > >> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com>
> wrote:
> > >> >
> > >> >
> > >> >
> > >> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com>
> wrote:
> > >> >>
> > >> >>
> > >> >>
> > >> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <fweimer@redhat.com>
> wrote:
> > >> >>>
> > >> >>> * Sunil K. Pandey:
> > >> >>>
> > >> >>> > +#ifdef __x86_64__
> > >> >>> > + if (amx_flag)
> > >> >>> > + {
> > >> >>> > + unsigned int amx_size
> > >> >>> > + = xstate_comp_offsets[18] +
> xstate_comp_sizes[18];
> > >> >>> > + _dl_x86_features_tlsdesc_state_size
> > >> >>> > + = ALIGN_UP (amx_size +
> TLSDESC_CALL_REGISTER_SAVE_AREA,
> > >> >>> > + 64);
> > >> >>> > + }
> > >> >>> > +#endif
> > >> >>>
> > >> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
> > >> >>>
> > >> >>> The logic for including and excluding states seems reversed to me.
> > >> >>> Shouldn't we save and restore everything that we don't know
> anything
> > >> >>> about, especially for the TLSDESC case?
> > >> >>>
> > >> >>> Thanks,
> > >> >>> Florian
> > >> >>
> > >> >>
> > >> >> Thank you so much. Will submit v2.
> > >> >
> > >> >
> > >> > v2 patch attached.
> > >>
> > >> Please add more descriptions in the commit log.
> > >>
> > >> #ifdef __x86_64__
> > >> - unsigned int amx_size
> > >> - = (xstate_amx_comp_offsets[31]
> > >> - + xstate_amx_comp_sizes[31]);
> > >> - amx_size
> > >> - = ALIGN_UP ((amx_size
> > >> - + TLSDESC_CALL_REGISTER_SAVE_AREA),
> > >> - 64);
> > >> - /* Set TLSDESC state size to the compact AMX
> > >> - state size for XSAVEC. */
> > >> - _dl_x86_features_tlsdesc_state_size = amx_size;
> > >> + _dl_x86_features_tlsdesc_state_size
> > >> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
> > >> + /* Subtract AMX size from total size. */
> > >>
> > >> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the
> end of
> > >> X86_XSTATE_TILEDATA_ID space.
> > >>
> > >> + size -= ((xstate_comp_offsets[18]
> > >> + + xstate_comp_sizes[18])
> > >>
> > >> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
> > >>
> > >> + - (xstate_comp_offsets[16]
> > >> + + xstate_comp_sizes[16]));
> > >>
> > >> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
> > >
> > >
> > >
> > > X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from
> cache aligned location.
> > > Depending on which previous state gets enabled, size calculation may
> not be correct.
> > >
> > > How about
> > >
> > > size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
> > > - (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
> > > + xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));
> > >
> >
> > How about
> >
> > size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
> > size -= xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] -
> > xstate_comp_offsets[X86_XSTATE_TILECFG_ID];
> >
> > xstate_comp_offsets[X86_XSTATE_TILECFG_ID] ==
> > xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
> > if there is no AMX. Otherwise, they are multiples of 64.
> >
> >
> > --
> > H.J.
>
> Please try this.
>
>
> --
> H.J.
>
This looks great, as MAX hard coding also gets simplified.
Testing it out across different platforms.
--Sunil
On Thu, Apr 3, 2025 at 9:24 AM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
> On Thu, Apr 3, 2025 at 8:55 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
>> On Wed, Apr 2, 2025 at 7:10 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>> >
>> > On Wed, Apr 2, 2025 at 4:46 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>> > >
>> > >
>> > >
>> > > On Wed, Apr 2, 2025 at 3:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>> > >>
>> > >> On Tue, Apr 1, 2025 at 7:07 AM Sunil Pandey <skpgkp2@gmail.com>
>> wrote:
>> > >> >
>> > >> >
>> > >> >
>> > >> > On Mon, Mar 31, 2025 at 9:25 PM Sunil Pandey <skpgkp2@gmail.com>
>> wrote:
>> > >> >>
>> > >> >>
>> > >> >>
>> > >> >> On Mon, Mar 31, 2025 at 6:13 AM Florian Weimer <
>> fweimer@redhat.com> wrote:
>> > >> >>>
>> > >> >>> * Sunil K. Pandey:
>> > >> >>>
>> > >> >>> > +#ifdef __x86_64__
>> > >> >>> > + if (amx_flag)
>> > >> >>> > + {
>> > >> >>> > + unsigned int amx_size
>> > >> >>> > + = xstate_comp_offsets[18] +
>> xstate_comp_sizes[18];
>> > >> >>> > + _dl_x86_features_tlsdesc_state_size
>> > >> >>> > + = ALIGN_UP (amx_size +
>> TLSDESC_CALL_REGISTER_SAVE_AREA,
>> > >> >>> > + 64);
>> > >> >>> > + }
>> > >> >>> > +#endif
>> > >> >>>
>> > >> >>> I diskike the magic 18 (as the AMX bit number) appearing here.
>> > >> >>>
>> > >> >>> The logic for including and excluding states seems reversed to
>> me.
>> > >> >>> Shouldn't we save and restore everything that we don't know
>> anything
>> > >> >>> about, especially for the TLSDESC case?
>> > >> >>>
>> > >> >>> Thanks,
>> > >> >>> Florian
>> > >> >>
>> > >> >>
>> > >> >> Thank you so much. Will submit v2.
>> > >> >
>> > >> >
>> > >> > v2 patch attached.
>> > >>
>> > >> Please add more descriptions in the commit log.
>> > >>
>> > >> #ifdef __x86_64__
>> > >> - unsigned int amx_size
>> > >> - = (xstate_amx_comp_offsets[31]
>> > >> - + xstate_amx_comp_sizes[31]);
>> > >> - amx_size
>> > >> - = ALIGN_UP ((amx_size
>> > >> - + TLSDESC_CALL_REGISTER_SAVE_AREA),
>> > >> - 64);
>> > >> - /* Set TLSDESC state size to the compact AMX
>> > >> - state size for XSAVEC. */
>> > >> - _dl_x86_features_tlsdesc_state_size = amx_size;
>> > >> + _dl_x86_features_tlsdesc_state_size
>> > >> + = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
>> > >> + /* Subtract AMX size from total size. */
>> > >>
>> > >> Subtract size from the start of X86_XSTATE_TILECFG_ID space to the
>> end of
>> > >> X86_XSTATE_TILEDATA_ID space.
>> > >>
>> > >> + size -= ((xstate_comp_offsets[18]
>> > >> + + xstate_comp_sizes[18])
>> > >>
>> > >> xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]?
>> > >>
>> > >> + - (xstate_comp_offsets[16]
>> > >> + + xstate_comp_sizes[16]));
>> > >>
>> > >> xstate_comp_offsets[X86_XSTATE_TILECFG_ID]?
>> > >
>> > >
>> > >
>> > > X86_XSTATE_TILEDATA_ID and X86_XSTATE_TILECFG_ID must start from
>> cache aligned location.
>> > > Depending on which previous state gets enabled, size calculation may
>> not be correct.
>> > >
>> > > How about
>> > >
>> > > size -= (xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
>> > > - (xstate_comp_offsets[X86_XSTATE_TILECFG_ID - 1]
>> > > + xstate_comp_sizes[X86_XSTATE_TILECFG_ID - 1]));
>> > >
>> >
>> > How about
>> >
>> > size = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
>> > size -= xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1] -
>> > xstate_comp_offsets[X86_XSTATE_TILECFG_ID];
>> >
>> > xstate_comp_offsets[X86_XSTATE_TILECFG_ID] ==
>> > xstate_comp_offsets[X86_XSTATE_TILEDATA_ID + 1]
>> > if there is no AMX. Otherwise, they are multiples of 64.
>> >
>> >
>> > --
>> > H.J.
>>
>> Please try this.
>>
>>
>> --
>> H.J.
>>
>
> This looks great, as MAX hard coding also gets simplified.
> Testing it out across different platforms.
>
> --Sunil
>
Completed validation on SKL/SKX/SPR/SDE and compared xsave number with
'ld.s. --list-diagnostics" option, no regression.
So this patch looks good to me.
--Sunil
@@ -327,51 +327,30 @@ update_active (struct cpu_features *cpu_features)
{
unsigned int xstate_comp_offsets[32];
unsigned int xstate_comp_sizes[32];
+ unsigned int i;
#ifdef __x86_64__
- unsigned int xstate_amx_comp_offsets[32];
- unsigned int xstate_amx_comp_sizes[32];
- unsigned int amx_ecx;
+ bool amx_flag = 0;
#endif
- unsigned int i;
xstate_comp_offsets[0] = 0;
xstate_comp_offsets[1] = 160;
xstate_comp_offsets[2] = 576;
xstate_comp_sizes[0] = 160;
xstate_comp_sizes[1] = 256;
-#ifdef __x86_64__
- xstate_amx_comp_offsets[0] = 0;
- xstate_amx_comp_offsets[1] = 160;
- xstate_amx_comp_offsets[2] = 576;
- xstate_amx_comp_sizes[0] = 160;
- xstate_amx_comp_sizes[1] = 256;
-#endif
for (i = 2; i < 32; i++)
{
if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0)
{
__cpuid_count (0xd, i, eax, ebx, ecx, edx);
+ xstate_comp_sizes[i] = eax;
#ifdef __x86_64__
- /* Include this in xsave_state_full_size. */
- amx_ecx = ecx;
- xstate_amx_comp_sizes[i] = eax;
if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0)
- {
- /* Exclude this from xsave_state_size. */
- ecx = 0;
- xstate_comp_sizes[i] = 0;
- }
- else
+ amx_flag = 1;
#endif
- xstate_comp_sizes[i] = eax;
}
else
{
-#ifdef __x86_64__
- amx_ecx = 0;
- xstate_amx_comp_sizes[i] = 0;
-#endif
ecx = 0;
xstate_comp_sizes[i] = 0;
}
@@ -384,38 +363,27 @@ update_active (struct cpu_features *cpu_features)
if ((ecx & (1 << 1)) != 0)
xstate_comp_offsets[i]
= ALIGN_UP (xstate_comp_offsets[i], 64);
-#ifdef __x86_64__
- xstate_amx_comp_offsets[i]
- = (xstate_amx_comp_offsets[i - 1]
- + xstate_amx_comp_sizes[i - 1]);
- if ((amx_ecx & (1 << 1)) != 0)
- xstate_amx_comp_offsets[i]
- = ALIGN_UP (xstate_amx_comp_offsets[i],
- 64);
-#endif
}
}
/* Use XSAVEC. */
unsigned int size
- = xstate_comp_offsets[31] + xstate_comp_sizes[31];
+ = xstate_comp_offsets[9] + xstate_comp_sizes[9];
if (size)
{
-#ifdef __x86_64__
- unsigned int amx_size
- = (xstate_amx_comp_offsets[31]
- + xstate_amx_comp_sizes[31]);
- amx_size
- = ALIGN_UP ((amx_size
- + TLSDESC_CALL_REGISTER_SAVE_AREA),
- 64);
- /* Set TLSDESC state size to the compact AMX
- state size for XSAVEC. */
- _dl_x86_features_tlsdesc_state_size = amx_size;
-#endif
cpu_features->xsave_state_size
= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
64);
+#ifdef __x86_64__
+ if (amx_flag)
+ {
+ unsigned int amx_size
+ = xstate_comp_offsets[18] + xstate_comp_sizes[18];
+ _dl_x86_features_tlsdesc_state_size
+ = ALIGN_UP (amx_size + TLSDESC_CALL_REGISTER_SAVE_AREA,
+ 64);
+ }
+#endif
CPU_FEATURE_SET (cpu_features, XSAVEC);
}
}