x86: Add DMR model detection support
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Test passed
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Test passed
|
Commit Message
- Add DIAMONDRAPIDS model detection.
- Enable Bigcore tuning to DMR.
Intel® Architecture Instruction Set Extensions Programming Reference
https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
---
sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
1 file changed, 36 insertions(+), 3 deletions(-)
Comments
On Thu, Apr 10, 2025 at 11:44 AM Sunil K Pandey
<sunil.k.pandey@intel.com> wrote:
>
> - Add DIAMONDRAPIDS model detection.
> - Enable Bigcore tuning to DMR.
>
> Intel® Architecture Instruction Set Extensions Programming Reference
> https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
> ---
> sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
> 1 file changed, 36 insertions(+), 3 deletions(-)
>
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index f1d2a179e4..3f4699a9f7 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -502,7 +502,7 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
> "Incorrect index_arch_Fast_Unaligned_Load");
>
>
> -/* Intel Family-6 microarch list. */
> +/* Intel Family microarch list. */
> enum
> {
> /* Atom processors. */
> @@ -542,6 +542,7 @@ enum
> INTEL_BIGCORE_ARROWLAKE,
> INTEL_BIGCORE_PANTHERLAKE,
> INTEL_BIGCORE_GRANITERAPIDS,
> + INTEL_BIGCORE_DIAMONDRAPIDS,
>
> /* Mixed (bigcore + atom SOC). */
> INTEL_MIXED_LAKEFIELD,
> @@ -749,6 +750,7 @@ init_cpu_features (struct cpu_features *cpu_features)
> if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
> {
> unsigned int extended_model;
> + unsigned int microarch;
>
> kind = arch_kind_intel;
>
> @@ -767,8 +769,7 @@ init_cpu_features (struct cpu_features *cpu_features)
> if (family == 0x06)
> {
> model += extended_model;
> - unsigned int microarch
> - = intel_get_fam6_microarch (model, stepping);
> + microarch = intel_get_fam6_microarch (model, stepping);
>
> switch (microarch)
> {
> @@ -932,6 +933,38 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
> break;
> }
> }
> + else if (family == 0x13)
> + {
> + switch (model)
> + {
> + case 0x01:
> + microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
> + break;
> +
> + default:
> + microarch = INTEL_UNKNOWN;
> + break;
> + }
> +
> + switch (microarch)
> + {
> + /* Intel Diamond Rapids tuning. */
> + case INTEL_BIGCORE_DIAMONDRAPIDS:
> + /* Rep string instructions, unaligned load, unaligned copy,
> + and pminub are fast on Intel Core i3, i5 and i7. */
> + cpu_features->preferred[index_arch_Fast_Rep_String]
> + |= (bit_arch_Fast_Rep_String
> + | bit_arch_Fast_Unaligned_Load
> + | bit_arch_Fast_Unaligned_Copy
> + | bit_arch_Prefer_PMINUB_for_stringop);
> + cpu_features->cachesize_non_temporal_divisor = 2;
> + break;
> +
> + default:
> + /* Unknown family 0x13 processors. */
> + break;
> + }
> + }
Please change how family 6 and 19 CPU models are handled in 2 patches:
1. Handled unknown family CPUs with default_tuning.
2. Add family 19 CPU support.
>
> /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
> --
> 2.49.0
>
On Thu, Apr 10, 2025 at 2:31 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Apr 10, 2025 at 11:44 AM Sunil K Pandey
> <sunil.k.pandey@intel.com> wrote:
> >
> > - Add DIAMONDRAPIDS model detection.
> > - Enable Bigcore tuning to DMR.
> >
> > Intel® Architecture Instruction Set Extensions Programming Reference
> > https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
> > ---
> > sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
> > 1 file changed, 36 insertions(+), 3 deletions(-)
> >
> > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > index f1d2a179e4..3f4699a9f7 100644
> > --- a/sysdeps/x86/cpu-features.c
> > +++ b/sysdeps/x86/cpu-features.c
> > @@ -502,7 +502,7 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
> > "Incorrect index_arch_Fast_Unaligned_Load");
> >
> >
> > -/* Intel Family-6 microarch list. */
> > +/* Intel Family microarch list. */
> > enum
> > {
> > /* Atom processors. */
> > @@ -542,6 +542,7 @@ enum
> > INTEL_BIGCORE_ARROWLAKE,
> > INTEL_BIGCORE_PANTHERLAKE,
> > INTEL_BIGCORE_GRANITERAPIDS,
> > + INTEL_BIGCORE_DIAMONDRAPIDS,
> >
> > /* Mixed (bigcore + atom SOC). */
> > INTEL_MIXED_LAKEFIELD,
> > @@ -749,6 +750,7 @@ init_cpu_features (struct cpu_features *cpu_features)
> > if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
> > {
> > unsigned int extended_model;
> > + unsigned int microarch;
> >
> > kind = arch_kind_intel;
> >
> > @@ -767,8 +769,7 @@ init_cpu_features (struct cpu_features *cpu_features)
> > if (family == 0x06)
> > {
> > model += extended_model;
> > - unsigned int microarch
> > - = intel_get_fam6_microarch (model, stepping);
> > + microarch = intel_get_fam6_microarch (model, stepping);
> >
> > switch (microarch)
> > {
> > @@ -932,6 +933,38 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
> > break;
> > }
> > }
> > + else if (family == 0x13)
> > + {
Linux kernel has
unsigned int x86_model(unsigned int sig)
{
unsigned int fam, model;
fam = x86_family(sig);
model = (sig >> 4) & 0xf;
if (fam >= 0x6)
^^^^^^^^^^^^^^^
model += ((sig >> 16) & 0xf) << 4;
return model;
}
Please check which one is correct, Linux kernel or SDM.
> > + switch (model)
> > + {
> > + case 0x01:
> > + microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
> > + break;
> > +
> > + default:
> > + microarch = INTEL_UNKNOWN;
> > + break;
> > + }
> > +
> > + switch (microarch)
> > + {
> > + /* Intel Diamond Rapids tuning. */
> > + case INTEL_BIGCORE_DIAMONDRAPIDS:
> > + /* Rep string instructions, unaligned load, unaligned copy,
> > + and pminub are fast on Intel Core i3, i5 and i7. */
> > + cpu_features->preferred[index_arch_Fast_Rep_String]
> > + |= (bit_arch_Fast_Rep_String
> > + | bit_arch_Fast_Unaligned_Load
> > + | bit_arch_Fast_Unaligned_Copy
> > + | bit_arch_Prefer_PMINUB_for_stringop);
> > + cpu_features->cachesize_non_temporal_divisor = 2;
> > + break;
> > +
> > + default:
> > + /* Unknown family 0x13 processors. */
> > + break;
> > + }
> > + }
>
> Please change how family 6 and 19 CPU models are handled in 2 patches:
>
> 1. Handled unknown family CPUs with default_tuning.
> 2. Add family 19 CPU support.
>
> >
> > /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
> > --
> > 2.49.0
> >
>
>
> --
> H.J.
On Thu, Apr 10, 2025 at 3:00 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Apr 10, 2025 at 2:31 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Thu, Apr 10, 2025 at 11:44 AM Sunil K Pandey
> > <sunil.k.pandey@intel.com> wrote:
> > >
> > > - Add DIAMONDRAPIDS model detection.
> > > - Enable Bigcore tuning to DMR.
> > >
> > > Intel® Architecture Instruction Set Extensions Programming Reference
> > > https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
> > > ---
> > > sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
> > > 1 file changed, 36 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > > index f1d2a179e4..3f4699a9f7 100644
> > > --- a/sysdeps/x86/cpu-features.c
> > > +++ b/sysdeps/x86/cpu-features.c
> > > @@ -502,7 +502,7 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
> > > "Incorrect index_arch_Fast_Unaligned_Load");
> > >
> > >
> > > -/* Intel Family-6 microarch list. */
> > > +/* Intel Family microarch list. */
> > > enum
> > > {
> > > /* Atom processors. */
> > > @@ -542,6 +542,7 @@ enum
> > > INTEL_BIGCORE_ARROWLAKE,
> > > INTEL_BIGCORE_PANTHERLAKE,
> > > INTEL_BIGCORE_GRANITERAPIDS,
> > > + INTEL_BIGCORE_DIAMONDRAPIDS,
> > >
> > > /* Mixed (bigcore + atom SOC). */
> > > INTEL_MIXED_LAKEFIELD,
> > > @@ -749,6 +750,7 @@ init_cpu_features (struct cpu_features
> *cpu_features)
> > > if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
> > > {
> > > unsigned int extended_model;
> > > + unsigned int microarch;
> > >
> > > kind = arch_kind_intel;
> > >
> > > @@ -767,8 +769,7 @@ init_cpu_features (struct cpu_features
> *cpu_features)
> > > if (family == 0x06)
> > > {
> > > model += extended_model;
> > > - unsigned int microarch
> > > - = intel_get_fam6_microarch (model, stepping);
> > > + microarch = intel_get_fam6_microarch (model, stepping);
> > >
> > > switch (microarch)
> > > {
> > > @@ -932,6 +933,38 @@
> https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
> > > break;
> > > }
> > > }
> > > + else if (family == 0x13)
> > > + {
>
> Linux kernel has
>
> unsigned int x86_model(unsigned int sig)
> {
> unsigned int fam, model;
>
> fam = x86_family(sig);
>
> model = (sig >> 4) & 0xf;
>
> if (fam >= 0x6)
> ^^^^^^^^^^^^^^^
> model += ((sig >> 16) & 0xf) << 4;
>
> return model;
> }
>
> Please check which one is correct, Linux kernel or SDM.
>
SDM and kernel are in sync if we assume only 2 family id 0x06 and 0x0F.
Kernel code may not be correct for family id other than 0x06/0x0F.
Family id for DMR is 0x0F, it gets added with extended family id to get
display family id 0x13.
Since family id for DMR is 0x0F, the extended model needs to be taken into
account.
Glibc doing this calculation for DMR in get_common_indices function.
447 if (*family == 0x0f)
448 {
449 *family += (eax >> 20) & 0xff;
450 *model += *extended_model;
451 }
>
> > > + switch (model)
> > > + {
> > > + case 0x01:
> > > + microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
> > > + break;
> > > +
> > > + default:
> > > + microarch = INTEL_UNKNOWN;
> > > + break;
> > > + }
> > > +
> > > + switch (microarch)
> > > + {
> > > + /* Intel Diamond Rapids tuning. */
> > > + case INTEL_BIGCORE_DIAMONDRAPIDS:
> > > + /* Rep string instructions, unaligned load, unaligned
> copy,
> > > + and pminub are fast on Intel Core i3, i5 and i7. */
> > > + cpu_features->preferred[index_arch_Fast_Rep_String]
> > > + |= (bit_arch_Fast_Rep_String
> > > + | bit_arch_Fast_Unaligned_Load
> > > + | bit_arch_Fast_Unaligned_Copy
> > > + | bit_arch_Prefer_PMINUB_for_stringop);
> > > + cpu_features->cachesize_non_temporal_divisor = 2;
> > > + break;
> > > +
> > > + default:
> > > + /* Unknown family 0x13 processors. */
> > > + break;
> > > + }
> > > + }
> >
> > Please change how family 6 and 19 CPU models are handled in 2 patches:
> >
> > 1. Handled unknown family CPUs with default_tuning.
> > 2. Add family 19 CPU support.
> >
> > >
> > > /* Since AVX512ER is unique to Xeon Phi, set
> Prefer_No_VZEROUPPER
> > > --
> > > 2.49.0
> > >
> >
> >
> > --
> > H.J.
>
>
>
> --
> H.J.
>
On Thu, Apr 10, 2025 at 10:35 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
>
> On Thu, Apr 10, 2025 at 3:00 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> On Thu, Apr 10, 2025 at 2:31 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>> >
>> > On Thu, Apr 10, 2025 at 11:44 AM Sunil K Pandey
>> > <sunil.k.pandey@intel.com> wrote:
>> > >
>> > > - Add DIAMONDRAPIDS model detection.
>> > > - Enable Bigcore tuning to DMR.
>> > >
>> > > Intel® Architecture Instruction Set Extensions Programming Reference
>> > > https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
>> > > ---
>> > > sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
>> > > 1 file changed, 36 insertions(+), 3 deletions(-)
>> > >
>> > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
>> > > index f1d2a179e4..3f4699a9f7 100644
>> > > --- a/sysdeps/x86/cpu-features.c
>> > > +++ b/sysdeps/x86/cpu-features.c
>> > > @@ -502,7 +502,7 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
>> > > "Incorrect index_arch_Fast_Unaligned_Load");
>> > >
>> > >
>> > > -/* Intel Family-6 microarch list. */
>> > > +/* Intel Family microarch list. */
>> > > enum
>> > > {
>> > > /* Atom processors. */
>> > > @@ -542,6 +542,7 @@ enum
>> > > INTEL_BIGCORE_ARROWLAKE,
>> > > INTEL_BIGCORE_PANTHERLAKE,
>> > > INTEL_BIGCORE_GRANITERAPIDS,
>> > > + INTEL_BIGCORE_DIAMONDRAPIDS,
>> > >
>> > > /* Mixed (bigcore + atom SOC). */
>> > > INTEL_MIXED_LAKEFIELD,
>> > > @@ -749,6 +750,7 @@ init_cpu_features (struct cpu_features *cpu_features)
>> > > if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
>> > > {
>> > > unsigned int extended_model;
>> > > + unsigned int microarch;
>> > >
>> > > kind = arch_kind_intel;
>> > >
>> > > @@ -767,8 +769,7 @@ init_cpu_features (struct cpu_features *cpu_features)
>> > > if (family == 0x06)
>> > > {
>> > > model += extended_model;
>> > > - unsigned int microarch
>> > > - = intel_get_fam6_microarch (model, stepping);
>> > > + microarch = intel_get_fam6_microarch (model, stepping);
>> > >
>> > > switch (microarch)
>> > > {
>> > > @@ -932,6 +933,38 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
>> > > break;
>> > > }
>> > > }
>> > > + else if (family == 0x13)
>> > > + {
>>
>> Linux kernel has
>>
>> unsigned int x86_model(unsigned int sig)
>> {
>> unsigned int fam, model;
>>
>> fam = x86_family(sig);
>>
>> model = (sig >> 4) & 0xf;
>>
>> if (fam >= 0x6)
>> ^^^^^^^^^^^^^^^
>> model += ((sig >> 16) & 0xf) << 4;
>>
>> return model;
>> }
>>
>> Please check which one is correct, Linux kernel or SDM.
>
>
> SDM and kernel are in sync if we assume only 2 family id 0x06 and 0x0F.
>
> Kernel code may not be correct for family id other than 0x06/0x0F.
>
> Family id for DMR is 0x0F, it gets added with extended family id to get display family id 0x13.
> Since family id for DMR is 0x0F, the extended model needs to be taken into account.
> Glibc doing this calculation for DMR in get_common_indices function.
>
> 447 if (*family == 0x0f)
> 448 {
> 449 *family += (eax >> 20) & 0xff;
> 450 *model += *extended_model;
> 451 }
You are right. There is no issue.
>
>>
>>
>> > > + switch (model)
>> > > + {
>> > > + case 0x01:
>> > > + microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
>> > > + break;
>> > > +
>> > > + default:
>> > > + microarch = INTEL_UNKNOWN;
>> > > + break;
>> > > + }
>> > > +
>> > > + switch (microarch)
>> > > + {
>> > > + /* Intel Diamond Rapids tuning. */
>> > > + case INTEL_BIGCORE_DIAMONDRAPIDS:
>> > > + /* Rep string instructions, unaligned load, unaligned copy,
>> > > + and pminub are fast on Intel Core i3, i5 and i7. */
>> > > + cpu_features->preferred[index_arch_Fast_Rep_String]
>> > > + |= (bit_arch_Fast_Rep_String
>> > > + | bit_arch_Fast_Unaligned_Load
>> > > + | bit_arch_Fast_Unaligned_Copy
>> > > + | bit_arch_Prefer_PMINUB_for_stringop);
>> > > + cpu_features->cachesize_non_temporal_divisor = 2;
>> > > + break;
>> > > +
>> > > + default:
>> > > + /* Unknown family 0x13 processors. */
>> > > + break;
>> > > + }
>> > > + }
>> >
>> > Please change how family 6 and 19 CPU models are handled in 2 patches:
>> >
>> > 1. Handled unknown family CPUs with default_tuning.
>> > 2. Add family 19 CPU support.
>> >
>> > >
>> > > /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
>> > > --
>> > > 2.49.0
>> > >
>> >
>> >
>> > --
>> > H.J.
>>
>>
>>
>> --
>> H.J.
@@ -502,7 +502,7 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
"Incorrect index_arch_Fast_Unaligned_Load");
-/* Intel Family-6 microarch list. */
+/* Intel Family microarch list. */
enum
{
/* Atom processors. */
@@ -542,6 +542,7 @@ enum
INTEL_BIGCORE_ARROWLAKE,
INTEL_BIGCORE_PANTHERLAKE,
INTEL_BIGCORE_GRANITERAPIDS,
+ INTEL_BIGCORE_DIAMONDRAPIDS,
/* Mixed (bigcore + atom SOC). */
INTEL_MIXED_LAKEFIELD,
@@ -749,6 +750,7 @@ init_cpu_features (struct cpu_features *cpu_features)
if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
{
unsigned int extended_model;
+ unsigned int microarch;
kind = arch_kind_intel;
@@ -767,8 +769,7 @@ init_cpu_features (struct cpu_features *cpu_features)
if (family == 0x06)
{
model += extended_model;
- unsigned int microarch
- = intel_get_fam6_microarch (model, stepping);
+ microarch = intel_get_fam6_microarch (model, stepping);
switch (microarch)
{
@@ -932,6 +933,38 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
break;
}
}
+ else if (family == 0x13)
+ {
+ switch (model)
+ {
+ case 0x01:
+ microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
+ break;
+
+ default:
+ microarch = INTEL_UNKNOWN;
+ break;
+ }
+
+ switch (microarch)
+ {
+ /* Intel Diamond Rapids tuning. */
+ case INTEL_BIGCORE_DIAMONDRAPIDS:
+ /* Rep string instructions, unaligned load, unaligned copy,
+ and pminub are fast on Intel Core i3, i5 and i7. */
+ cpu_features->preferred[index_arch_Fast_Rep_String]
+ |= (bit_arch_Fast_Rep_String
+ | bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop);
+ cpu_features->cachesize_non_temporal_divisor = 2;
+ break;
+
+ default:
+ /* Unknown family 0x13 processors. */
+ break;
+ }
+ }
/* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER