x86: Add DMR model detection support

Message ID 20250410184258.1823672-1-sunil.k.pandey@intel.com (mailing list archive)
State Changes Requested
Headers
Series x86: Add DMR model detection support |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed

Commit Message

Sunil K Pandey April 10, 2025, 6:42 p.m. UTC
  - Add DIAMONDRAPIDS model detection.
- Enable Bigcore tuning to DMR.

Intel® Architecture Instruction Set Extensions Programming Reference
https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
---
 sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)
  

Comments

H.J. Lu April 10, 2025, 9:31 p.m. UTC | #1
On Thu, Apr 10, 2025 at 11:44 AM Sunil K Pandey
<sunil.k.pandey@intel.com> wrote:
>
> - Add DIAMONDRAPIDS model detection.
> - Enable Bigcore tuning to DMR.
>
> Intel® Architecture Instruction Set Extensions Programming Reference
> https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
> ---
>  sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
>  1 file changed, 36 insertions(+), 3 deletions(-)
>
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index f1d2a179e4..3f4699a9f7 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -502,7 +502,7 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
>                 "Incorrect index_arch_Fast_Unaligned_Load");
>
>
> -/* Intel Family-6 microarch list.  */
> +/* Intel Family microarch list.  */
>  enum
>  {
>    /* Atom processors.  */
> @@ -542,6 +542,7 @@ enum
>    INTEL_BIGCORE_ARROWLAKE,
>    INTEL_BIGCORE_PANTHERLAKE,
>    INTEL_BIGCORE_GRANITERAPIDS,
> +  INTEL_BIGCORE_DIAMONDRAPIDS,
>
>    /* Mixed (bigcore + atom SOC).  */
>    INTEL_MIXED_LAKEFIELD,
> @@ -749,6 +750,7 @@ init_cpu_features (struct cpu_features *cpu_features)
>    if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
>      {
>        unsigned int extended_model;
> +      unsigned int microarch;
>
>        kind = arch_kind_intel;
>
> @@ -767,8 +769,7 @@ init_cpu_features (struct cpu_features *cpu_features)
>        if (family == 0x06)
>         {
>           model += extended_model;
> -         unsigned int microarch
> -             = intel_get_fam6_microarch (model, stepping);
> +         microarch = intel_get_fam6_microarch (model, stepping);
>
>           switch (microarch)
>             {
> @@ -932,6 +933,38 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
>                 break;
>             }
>         }
> +      else if (family == 0x13)
> +       {
> +         switch (model)
> +           {
> +           case 0x01:
> +             microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
> +             break;
> +
> +           default:
> +             microarch = INTEL_UNKNOWN;
> +             break;
> +           }
> +
> +         switch (microarch)
> +           {
> +             /* Intel Diamond Rapids tuning.  */
> +           case INTEL_BIGCORE_DIAMONDRAPIDS:
> +             /* Rep string instructions, unaligned load, unaligned copy,
> +                and pminub are fast on Intel Core i3, i5 and i7.  */
> +             cpu_features->preferred[index_arch_Fast_Rep_String]
> +               |= (bit_arch_Fast_Rep_String
> +                   | bit_arch_Fast_Unaligned_Load
> +                   | bit_arch_Fast_Unaligned_Copy
> +                   | bit_arch_Prefer_PMINUB_for_stringop);
> +             cpu_features->cachesize_non_temporal_divisor = 2;
> +             break;
> +
> +           default:
> +             /* Unknown family 0x13 processors.  */
> +             break;
> +           }
> +       }

Please change how family 6 and 19 CPU models are handled in 2 patches:

1.  Handled unknown family CPUs with default_tuning.
2.  Add family 19 CPU support.

>
>        /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
> --
> 2.49.0
>
  
H.J. Lu April 10, 2025, 9:59 p.m. UTC | #2
On Thu, Apr 10, 2025 at 2:31 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Apr 10, 2025 at 11:44 AM Sunil K Pandey
> <sunil.k.pandey@intel.com> wrote:
> >
> > - Add DIAMONDRAPIDS model detection.
> > - Enable Bigcore tuning to DMR.
> >
> > Intel® Architecture Instruction Set Extensions Programming Reference
> > https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
> > ---
> >  sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
> >  1 file changed, 36 insertions(+), 3 deletions(-)
> >
> > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > index f1d2a179e4..3f4699a9f7 100644
> > --- a/sysdeps/x86/cpu-features.c
> > +++ b/sysdeps/x86/cpu-features.c
> > @@ -502,7 +502,7 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
> >                 "Incorrect index_arch_Fast_Unaligned_Load");
> >
> >
> > -/* Intel Family-6 microarch list.  */
> > +/* Intel Family microarch list.  */
> >  enum
> >  {
> >    /* Atom processors.  */
> > @@ -542,6 +542,7 @@ enum
> >    INTEL_BIGCORE_ARROWLAKE,
> >    INTEL_BIGCORE_PANTHERLAKE,
> >    INTEL_BIGCORE_GRANITERAPIDS,
> > +  INTEL_BIGCORE_DIAMONDRAPIDS,
> >
> >    /* Mixed (bigcore + atom SOC).  */
> >    INTEL_MIXED_LAKEFIELD,
> > @@ -749,6 +750,7 @@ init_cpu_features (struct cpu_features *cpu_features)
> >    if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
> >      {
> >        unsigned int extended_model;
> > +      unsigned int microarch;
> >
> >        kind = arch_kind_intel;
> >
> > @@ -767,8 +769,7 @@ init_cpu_features (struct cpu_features *cpu_features)
> >        if (family == 0x06)
> >         {
> >           model += extended_model;
> > -         unsigned int microarch
> > -             = intel_get_fam6_microarch (model, stepping);
> > +         microarch = intel_get_fam6_microarch (model, stepping);
> >
> >           switch (microarch)
> >             {
> > @@ -932,6 +933,38 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
> >                 break;
> >             }
> >         }
> > +      else if (family == 0x13)
> > +       {

Linux kernel has

unsigned int x86_model(unsigned int sig)
{
        unsigned int fam, model;

        fam = x86_family(sig);

        model = (sig >> 4) & 0xf;

        if (fam >= 0x6)
       ^^^^^^^^^^^^^^^
                model += ((sig >> 16) & 0xf) << 4;

        return model;
}

Please check which one is correct, Linux kernel or SDM.

> > +         switch (model)
> > +           {
> > +           case 0x01:
> > +             microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
> > +             break;
> > +
> > +           default:
> > +             microarch = INTEL_UNKNOWN;
> > +             break;
> > +           }
> > +
> > +         switch (microarch)
> > +           {
> > +             /* Intel Diamond Rapids tuning.  */
> > +           case INTEL_BIGCORE_DIAMONDRAPIDS:
> > +             /* Rep string instructions, unaligned load, unaligned copy,
> > +                and pminub are fast on Intel Core i3, i5 and i7.  */
> > +             cpu_features->preferred[index_arch_Fast_Rep_String]
> > +               |= (bit_arch_Fast_Rep_String
> > +                   | bit_arch_Fast_Unaligned_Load
> > +                   | bit_arch_Fast_Unaligned_Copy
> > +                   | bit_arch_Prefer_PMINUB_for_stringop);
> > +             cpu_features->cachesize_non_temporal_divisor = 2;
> > +             break;
> > +
> > +           default:
> > +             /* Unknown family 0x13 processors.  */
> > +             break;
> > +           }
> > +       }
>
> Please change how family 6 and 19 CPU models are handled in 2 patches:
>
> 1.  Handled unknown family CPUs with default_tuning.
> 2.  Add family 19 CPU support.
>
> >
> >        /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
> > --
> > 2.49.0
> >
>
>
> --
> H.J.
  
Sunil Pandey April 11, 2025, 5:35 a.m. UTC | #3
On Thu, Apr 10, 2025 at 3:00 PM H.J. Lu <hjl.tools@gmail.com> wrote:

> On Thu, Apr 10, 2025 at 2:31 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Thu, Apr 10, 2025 at 11:44 AM Sunil K Pandey
> > <sunil.k.pandey@intel.com> wrote:
> > >
> > > - Add DIAMONDRAPIDS model detection.
> > > - Enable Bigcore tuning to DMR.
> > >
> > > Intel® Architecture Instruction Set Extensions Programming Reference
> > > https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
> > > ---
> > >  sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
> > >  1 file changed, 36 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > > index f1d2a179e4..3f4699a9f7 100644
> > > --- a/sysdeps/x86/cpu-features.c
> > > +++ b/sysdeps/x86/cpu-features.c
> > > @@ -502,7 +502,7 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
> > >                 "Incorrect index_arch_Fast_Unaligned_Load");
> > >
> > >
> > > -/* Intel Family-6 microarch list.  */
> > > +/* Intel Family microarch list.  */
> > >  enum
> > >  {
> > >    /* Atom processors.  */
> > > @@ -542,6 +542,7 @@ enum
> > >    INTEL_BIGCORE_ARROWLAKE,
> > >    INTEL_BIGCORE_PANTHERLAKE,
> > >    INTEL_BIGCORE_GRANITERAPIDS,
> > > +  INTEL_BIGCORE_DIAMONDRAPIDS,
> > >
> > >    /* Mixed (bigcore + atom SOC).  */
> > >    INTEL_MIXED_LAKEFIELD,
> > > @@ -749,6 +750,7 @@ init_cpu_features (struct cpu_features
> *cpu_features)
> > >    if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
> > >      {
> > >        unsigned int extended_model;
> > > +      unsigned int microarch;
> > >
> > >        kind = arch_kind_intel;
> > >
> > > @@ -767,8 +769,7 @@ init_cpu_features (struct cpu_features
> *cpu_features)
> > >        if (family == 0x06)
> > >         {
> > >           model += extended_model;
> > > -         unsigned int microarch
> > > -             = intel_get_fam6_microarch (model, stepping);
> > > +         microarch = intel_get_fam6_microarch (model, stepping);
> > >
> > >           switch (microarch)
> > >             {
> > > @@ -932,6 +933,38 @@
> https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
> > >                 break;
> > >             }
> > >         }
> > > +      else if (family == 0x13)
> > > +       {
>
> Linux kernel has
>
> unsigned int x86_model(unsigned int sig)
> {
>         unsigned int fam, model;
>
>         fam = x86_family(sig);
>
>         model = (sig >> 4) & 0xf;
>
>         if (fam >= 0x6)
>        ^^^^^^^^^^^^^^^
>                 model += ((sig >> 16) & 0xf) << 4;
>
>         return model;
> }
>
> Please check which one is correct, Linux kernel or SDM.
>

SDM and kernel are in sync if we assume only 2 family id 0x06 and 0x0F.

Kernel code may not be correct for family id other than 0x06/0x0F.

Family id for DMR is 0x0F, it gets added with extended family id to get
display family id 0x13.
Since family id for DMR is 0x0F,  the extended model needs to be taken into
account.
Glibc doing this calculation for DMR in get_common_indices function.

447       if (*family == 0x0f)
 448         {
 449           *family += (eax >> 20) & 0xff;
 450           *model += *extended_model;
 451         }



>
> > > +         switch (model)
> > > +           {
> > > +           case 0x01:
> > > +             microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
> > > +             break;
> > > +
> > > +           default:
> > > +             microarch = INTEL_UNKNOWN;
> > > +             break;
> > > +           }
> > > +
> > > +         switch (microarch)
> > > +           {
> > > +             /* Intel Diamond Rapids tuning.  */
> > > +           case INTEL_BIGCORE_DIAMONDRAPIDS:
> > > +             /* Rep string instructions, unaligned load, unaligned
> copy,
> > > +                and pminub are fast on Intel Core i3, i5 and i7.  */
> > > +             cpu_features->preferred[index_arch_Fast_Rep_String]
> > > +               |= (bit_arch_Fast_Rep_String
> > > +                   | bit_arch_Fast_Unaligned_Load
> > > +                   | bit_arch_Fast_Unaligned_Copy
> > > +                   | bit_arch_Prefer_PMINUB_for_stringop);
> > > +             cpu_features->cachesize_non_temporal_divisor = 2;
> > > +             break;
> > > +
> > > +           default:
> > > +             /* Unknown family 0x13 processors.  */
> > > +             break;
> > > +           }
> > > +       }
> >
> > Please change how family 6 and 19 CPU models are handled in 2 patches:
> >
> > 1.  Handled unknown family CPUs with default_tuning.
> > 2.  Add family 19 CPU support.
> >
> > >
> > >        /* Since AVX512ER is unique to Xeon Phi, set
> Prefer_No_VZEROUPPER
> > > --
> > > 2.49.0
> > >
> >
> >
> > --
> > H.J.
>
>
>
> --
> H.J.
>
  
H.J. Lu April 11, 2025, 1:40 p.m. UTC | #4
On Thu, Apr 10, 2025 at 10:35 PM Sunil Pandey <skpgkp2@gmail.com> wrote:
>
>
>
> On Thu, Apr 10, 2025 at 3:00 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> On Thu, Apr 10, 2025 at 2:31 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>> >
>> > On Thu, Apr 10, 2025 at 11:44 AM Sunil K Pandey
>> > <sunil.k.pandey@intel.com> wrote:
>> > >
>> > > - Add DIAMONDRAPIDS model detection.
>> > > - Enable Bigcore tuning to DMR.
>> > >
>> > > Intel® Architecture Instruction Set Extensions Programming Reference
>> > > https://cdrdv2.intel.com/v1/dl/getContent/671368 Section 1.2.
>> > > ---
>> > >  sysdeps/x86/cpu-features.c | 39 +++++++++++++++++++++++++++++++++++---
>> > >  1 file changed, 36 insertions(+), 3 deletions(-)
>> > >
>> > > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
>> > > index f1d2a179e4..3f4699a9f7 100644
>> > > --- a/sysdeps/x86/cpu-features.c
>> > > +++ b/sysdeps/x86/cpu-features.c
>> > > @@ -502,7 +502,7 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
>> > >                 "Incorrect index_arch_Fast_Unaligned_Load");
>> > >
>> > >
>> > > -/* Intel Family-6 microarch list.  */
>> > > +/* Intel Family microarch list.  */
>> > >  enum
>> > >  {
>> > >    /* Atom processors.  */
>> > > @@ -542,6 +542,7 @@ enum
>> > >    INTEL_BIGCORE_ARROWLAKE,
>> > >    INTEL_BIGCORE_PANTHERLAKE,
>> > >    INTEL_BIGCORE_GRANITERAPIDS,
>> > > +  INTEL_BIGCORE_DIAMONDRAPIDS,
>> > >
>> > >    /* Mixed (bigcore + atom SOC).  */
>> > >    INTEL_MIXED_LAKEFIELD,
>> > > @@ -749,6 +750,7 @@ init_cpu_features (struct cpu_features *cpu_features)
>> > >    if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
>> > >      {
>> > >        unsigned int extended_model;
>> > > +      unsigned int microarch;
>> > >
>> > >        kind = arch_kind_intel;
>> > >
>> > > @@ -767,8 +769,7 @@ init_cpu_features (struct cpu_features *cpu_features)
>> > >        if (family == 0x06)
>> > >         {
>> > >           model += extended_model;
>> > > -         unsigned int microarch
>> > > -             = intel_get_fam6_microarch (model, stepping);
>> > > +         microarch = intel_get_fam6_microarch (model, stepping);
>> > >
>> > >           switch (microarch)
>> > >             {
>> > > @@ -932,6 +933,38 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
>> > >                 break;
>> > >             }
>> > >         }
>> > > +      else if (family == 0x13)
>> > > +       {
>>
>> Linux kernel has
>>
>> unsigned int x86_model(unsigned int sig)
>> {
>>         unsigned int fam, model;
>>
>>         fam = x86_family(sig);
>>
>>         model = (sig >> 4) & 0xf;
>>
>>         if (fam >= 0x6)
>>        ^^^^^^^^^^^^^^^
>>                 model += ((sig >> 16) & 0xf) << 4;
>>
>>         return model;
>> }
>>
>> Please check which one is correct, Linux kernel or SDM.
>
>
> SDM and kernel are in sync if we assume only 2 family id 0x06 and 0x0F.
>
> Kernel code may not be correct for family id other than 0x06/0x0F.
>
> Family id for DMR is 0x0F, it gets added with extended family id to get display family id 0x13.
> Since family id for DMR is 0x0F,  the extended model needs to be taken into account.
> Glibc doing this calculation for DMR in get_common_indices function.
>
> 447       if (*family == 0x0f)
>  448         {
>  449           *family += (eax >> 20) & 0xff;
>  450           *model += *extended_model;
>  451         }

You are right.  There is no issue.

>
>>
>>
>> > > +         switch (model)
>> > > +           {
>> > > +           case 0x01:
>> > > +             microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
>> > > +             break;
>> > > +
>> > > +           default:
>> > > +             microarch = INTEL_UNKNOWN;
>> > > +             break;
>> > > +           }
>> > > +
>> > > +         switch (microarch)
>> > > +           {
>> > > +             /* Intel Diamond Rapids tuning.  */
>> > > +           case INTEL_BIGCORE_DIAMONDRAPIDS:
>> > > +             /* Rep string instructions, unaligned load, unaligned copy,
>> > > +                and pminub are fast on Intel Core i3, i5 and i7.  */
>> > > +             cpu_features->preferred[index_arch_Fast_Rep_String]
>> > > +               |= (bit_arch_Fast_Rep_String
>> > > +                   | bit_arch_Fast_Unaligned_Load
>> > > +                   | bit_arch_Fast_Unaligned_Copy
>> > > +                   | bit_arch_Prefer_PMINUB_for_stringop);
>> > > +             cpu_features->cachesize_non_temporal_divisor = 2;
>> > > +             break;
>> > > +
>> > > +           default:
>> > > +             /* Unknown family 0x13 processors.  */
>> > > +             break;
>> > > +           }
>> > > +       }
>> >
>> > Please change how family 6 and 19 CPU models are handled in 2 patches:
>> >
>> > 1.  Handled unknown family CPUs with default_tuning.
>> > 2.  Add family 19 CPU support.
>> >
>> > >
>> > >        /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER
>> > > --
>> > > 2.49.0
>> > >
>> >
>> >
>> > --
>> > H.J.
>>
>>
>>
>> --
>> H.J.
  

Patch

diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index f1d2a179e4..3f4699a9f7 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -502,7 +502,7 @@  _Static_assert (((index_arch_Fast_Unaligned_Load
 		"Incorrect index_arch_Fast_Unaligned_Load");
 
 
-/* Intel Family-6 microarch list.  */
+/* Intel Family microarch list.  */
 enum
 {
   /* Atom processors.  */
@@ -542,6 +542,7 @@  enum
   INTEL_BIGCORE_ARROWLAKE,
   INTEL_BIGCORE_PANTHERLAKE,
   INTEL_BIGCORE_GRANITERAPIDS,
+  INTEL_BIGCORE_DIAMONDRAPIDS,
 
   /* Mixed (bigcore + atom SOC).  */
   INTEL_MIXED_LAKEFIELD,
@@ -749,6 +750,7 @@  init_cpu_features (struct cpu_features *cpu_features)
   if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
     {
       unsigned int extended_model;
+      unsigned int microarch;
 
       kind = arch_kind_intel;
 
@@ -767,8 +769,7 @@  init_cpu_features (struct cpu_features *cpu_features)
       if (family == 0x06)
 	{
 	  model += extended_model;
-	  unsigned int microarch
-	      = intel_get_fam6_microarch (model, stepping);
+	  microarch = intel_get_fam6_microarch (model, stepping);
 
 	  switch (microarch)
 	    {
@@ -932,6 +933,38 @@  https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
 		break;
 	    }
 	}
+      else if (family == 0x13)
+	{
+	  switch (model)
+	    {
+	    case 0x01:
+	      microarch = INTEL_BIGCORE_DIAMONDRAPIDS;
+	      break;
+
+	    default:
+	      microarch = INTEL_UNKNOWN;
+	      break;
+	    }
+
+	  switch (microarch)
+	    {
+	      /* Intel Diamond Rapids tuning.  */
+	    case INTEL_BIGCORE_DIAMONDRAPIDS:
+	      /* Rep string instructions, unaligned load, unaligned copy,
+		 and pminub are fast on Intel Core i3, i5 and i7.  */
+	      cpu_features->preferred[index_arch_Fast_Rep_String]
+		|= (bit_arch_Fast_Rep_String
+		    | bit_arch_Fast_Unaligned_Load
+		    | bit_arch_Fast_Unaligned_Copy
+		    | bit_arch_Prefer_PMINUB_for_stringop);
+	      cpu_features->cachesize_non_temporal_divisor = 2;
+	      break;
+
+	    default:
+	      /* Unknown family 0x13 processors.  */
+	      break;
+	    }
+	}
 
 
       /* Since AVX512ER is unique to Xeon Phi, set Prefer_No_VZEROUPPER