[v5,2/3] x86: Refactor Intel `init_cpu_features`
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
This patch should have no affect on existing functionality.
The current code, which has a single switch for model detection and
setting prefered features, is difficult to follow/extend. The cases
use magic numbers and many microarchitectures are missing. This makes
it difficult to reason about what is implemented so far and/or
how/where to add support for new features.
This patch splits the model detection and preference setting stages so
that CPU preferences can be set based on a complete list of available
microarchitectures, rather than based on model magic numbers.
---
sysdeps/x86/cpu-features.c | 401 +++++++++++++++++++++++++++++--------
1 file changed, 316 insertions(+), 85 deletions(-)
Comments
On Mon, May 8, 2023 at 8:13 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> This patch should have no affect on existing functionality.
>
> The current code, which has a single switch for model detection and
> setting prefered features, is difficult to follow/extend. The cases
> use magic numbers and many microarchitectures are missing. This makes
> it difficult to reason about what is implemented so far and/or
> how/where to add support for new features.
>
> This patch splits the model detection and preference setting stages so
> that CPU preferences can be set based on a complete list of available
> microarchitectures, rather than based on model magic numbers.
> ---
> sysdeps/x86/cpu-features.c | 401 +++++++++++++++++++++++++++++--------
> 1 file changed, 316 insertions(+), 85 deletions(-)
>
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index 5bff8ec0b4..bec70c3c49 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -417,6 +417,217 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
> == index_arch_Fast_Copy_Backward)),
> "Incorrect index_arch_Fast_Unaligned_Load");
>
> +
> +/* Intel Family-6 microarch list. */
> +enum
> +{
> + /* Atom processors. */
> + INTEL_ATOM_BONNELL,
> + INTEL_ATOM_SALTWELL,
> + INTEL_ATOM_SILVERMONT,
> + INTEL_ATOM_AIRMONT,
> + INTEL_ATOM_GOLDMONT,
> + INTEL_ATOM_GOLDMONT_PLUS,
> + INTEL_ATOM_SIERRAFOREST,
> + INTEL_ATOM_GRANDRIDGE,
> + INTEL_ATOM_TREMONT,
> +
> + /* Bigcore processors. */
> + INTEL_BIGCORE_MEROM,
> + INTEL_BIGCORE_PENRYN,
> + INTEL_BIGCORE_DUNNINGTON,
> + INTEL_BIGCORE_NEHALEM,
> + INTEL_BIGCORE_WESTMERE,
> + INTEL_BIGCORE_SANDYBRIDGE,
> + INTEL_BIGCORE_IVYBRIDGE,
> + INTEL_BIGCORE_HASWELL,
> + INTEL_BIGCORE_BROADWELL,
> + INTEL_BIGCORE_SKYLAKE,
> + INTEL_BIGCORE_AMBERLAKE,
> + INTEL_BIGCORE_COFFEELAKE,
> + INTEL_BIGCORE_WHISKEYLAKE,
> + INTEL_BIGCORE_KABYLAKE,
> + INTEL_BIGCORE_COMETLAKE,
> + INTEL_BIGCORE_SKYLAKE_AVX512,
> + INTEL_BIGCORE_CANNONLAKE,
> + INTEL_BIGCORE_CASCADELAKE,
> + INTEL_BIGCORE_COOPERLAKE,
> + INTEL_BIGCORE_ICELAKE,
> + INTEL_BIGCORE_TIGERLAKE,
> + INTEL_BIGCORE_ROCKETLAKE,
> + INTEL_BIGCORE_SAPPHIRERAPIDS,
> + INTEL_BIGCORE_RAPTORLAKE,
> + INTEL_BIGCORE_EMERALDRAPIDS,
> + INTEL_BIGCORE_METEORLAKE,
> + INTEL_BIGCORE_LUNARLAKE,
> + INTEL_BIGCORE_ARROWLAKE,
> + INTEL_BIGCORE_GRANITERAPIDS,
> +
> + /* Mixed (bigcore + atom SOC). */
> + INTEL_MIXED_LAKEFIELD,
> + INTEL_MIXED_ALDERLAKE,
> +
> + /* KNL. */
> + INTEL_KNIGHTS_MILL,
> + INTEL_KNIGHTS_LANDING,
> +
> + /* Unknown. */
> + INTEL_UNKNOWN,
> +};
> +
> +static unsigned int
> +intel_get_fam6_microarch (unsigned int model, unsigned int stepping)
> +{
> + switch (model)
> + {
> + case 0x1C:
> + case 0x26:
> + return INTEL_ATOM_BONNELL;
> + case 0x27:
> + case 0x35:
> + case 0x36:
> + return INTEL_ATOM_SALTWELL;
> + case 0x37:
> + case 0x4A:
> + case 0x4D:
> + case 0x5D:
> + return INTEL_ATOM_SILVERMONT;
> + case 0x4C:
> + case 0x5A:
> + case 0x75:
> + return INTEL_ATOM_AIRMONT;
> + case 0x5C:
> + case 0x5F:
> + return INTEL_ATOM_GOLDMONT;
> + case 0x7A:
> + return INTEL_ATOM_GOLDMONT_PLUS;
> + case 0xAF:
> + return INTEL_ATOM_SIERRAFOREST;
> + case 0xB6:
> + return INTEL_ATOM_GRANDRIDGE;
> + case 0x86:
> + case 0x96:
> + case 0x9C:
> + return INTEL_ATOM_TREMONT;
> + case 0x0F:
> + case 0x16:
> + return INTEL_BIGCORE_MEROM;
> + case 0x17:
> + return INTEL_BIGCORE_PENRYN;
> + case 0x1D:
> + return INTEL_BIGCORE_DUNNINGTON;
> + case 0x1A:
> + case 0x1E:
> + case 0x1F:
> + case 0x2E:
> + return INTEL_BIGCORE_NEHALEM;
> + case 0x25:
> + case 0x2C:
> + case 0x2F:
> + return INTEL_BIGCORE_WESTMERE;
> + case 0x2A:
> + case 0x2D:
> + return INTEL_BIGCORE_SANDYBRIDGE;
> + case 0x3A:
> + case 0x3E:
> + return INTEL_BIGCORE_IVYBRIDGE;
> + case 0x3C:
> + case 0x3F:
> + case 0x45:
> + case 0x46:
> + return INTEL_BIGCORE_HASWELL;
> + case 0x3D:
> + case 0x47:
> + case 0x4F:
> + case 0x56:
> + return INTEL_BIGCORE_BROADWELL;
> + case 0x4E:
> + case 0x5E:
> + return INTEL_BIGCORE_SKYLAKE;
> + case 0x8E:
> + switch (stepping)
> + {
> + case 0x09:
> + return INTEL_BIGCORE_AMBERLAKE;
> + case 0x0A:
> + return INTEL_BIGCORE_COFFEELAKE;
> + case 0x0B:
> + case 0x0C:
> + return INTEL_BIGCORE_WHISKEYLAKE;
> + default:
> + return INTEL_BIGCORE_KABYLAKE;
> + }
> + case 0x9E:
> + switch (stepping)
> + {
> + case 0x0A:
> + case 0x0B:
> + case 0x0C:
> + case 0x0D:
> + return INTEL_BIGCORE_COFFEELAKE;
> + default:
> + return INTEL_BIGCORE_KABYLAKE;
> + }
> + case 0xA5:
> + case 0xA6:
> + return INTEL_BIGCORE_COMETLAKE;
> + case 0x66:
> + return INTEL_BIGCORE_CANNONLAKE;
> + case 0x55:
> + switch (stepping)
> + {
> + case 0x06:
> + case 0x07:
> + return INTEL_BIGCORE_CASCADELAKE;
> + case 0x0b:
> + return INTEL_BIGCORE_COOPERLAKE;
> + default:
> + return INTEL_BIGCORE_SKYLAKE_AVX512;
> + }
> + case 0x6A:
> + case 0x6C:
> + case 0x7D:
> + case 0x7E:
> + case 0x9D:
> + return INTEL_BIGCORE_ICELAKE;
> + case 0x8C:
> + case 0x8D:
> + return INTEL_BIGCORE_TIGERLAKE;
> + case 0xA7:
> + return INTEL_BIGCORE_ROCKETLAKE;
> + case 0x8F:
> + return INTEL_BIGCORE_SAPPHIRERAPIDS;
> + case 0xB7:
> + case 0xBA:
> + case 0xBF:
> + return INTEL_BIGCORE_RAPTORLAKE;
> + case 0xCF:
> + return INTEL_BIGCORE_EMERALDRAPIDS;
> + case 0xAA:
> + case 0xAC:
> + return INTEL_BIGCORE_METEORLAKE;
> + case 0xbd:
> + return INTEL_BIGCORE_LUNARLAKE;
> + case 0xc6:
> + return INTEL_BIGCORE_ARROWLAKE;
> + case 0xAD:
> + case 0xAE:
> + return INTEL_BIGCORE_GRANITERAPIDS;
> + case 0x8A:
> + return INTEL_MIXED_LAKEFIELD;
> + case 0x97:
> + case 0x9A:
> + case 0xBE:
> + return INTEL_MIXED_ALDERLAKE;
> + case 0x85:
> + return INTEL_KNIGHTS_MILL;
> + case 0x57:
> + return INTEL_KNIGHTS_LANDING;
> + default:
> + return INTEL_UNKNOWN;
> + }
> +}
> +
> static inline void
> init_cpu_features (struct cpu_features *cpu_features)
> {
> @@ -453,129 +664,149 @@ init_cpu_features (struct cpu_features *cpu_features)
> if (family == 0x06)
> {
> model += extended_model;
> - switch (model)
> + unsigned int microarch
> + = intel_get_fam6_microarch (model, stepping);
> +
> + switch (microarch)
> {
> - case 0x1c:
> - case 0x26:
> - /* BSF is slow on Atom. */
> + /* Atom / KNL tuning. */
> + case INTEL_ATOM_BONNELL:
Since Saltwell is a shrink of Bonnell, INTEL_ATOM_SALTWELL
should be added here.
> + /* BSF is slow on Bonnell. */
> cpu_features->preferred[index_arch_Slow_BSF]
> - |= bit_arch_Slow_BSF;
> + |= bit_arch_Slow_BSF;
> break;
>
> - case 0x57:
> - /* Knights Landing. Enable Silvermont optimizations. */
> -
> - case 0x7a:
> - /* Unaligned load versions are faster than SSSE3
> - on Goldmont Plus. */
> -
> - case 0x5c:
> - case 0x5f:
> /* Unaligned load versions are faster than SSSE3
> - on Goldmont. */
> + on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
> + case INTEL_ATOM_AIRMONT:
> + case INTEL_ATOM_SILVERMONT:
> + case INTEL_ATOM_GOLDMONT:
> + case INTEL_ATOM_GOLDMONT_PLUS:
>
> - case 0x4c:
> - case 0x5a:
> - case 0x75:
> - /* Airmont is a die shrink of Silvermont. */
> + /* Knights Landing. Enable Silvermont optimizations. */
> + case INTEL_KNIGHTS_LANDING:
>
> - case 0x37:
> - case 0x4a:
> - case 0x4d:
> - case 0x5d:
> - /* Unaligned load versions are faster than SSSE3
> - on Silvermont. */
> cpu_features->preferred[index_arch_Fast_Unaligned_Load]
> - |= (bit_arch_Fast_Unaligned_Load
> - | bit_arch_Fast_Unaligned_Copy
> - | bit_arch_Prefer_PMINUB_for_stringop
> - | bit_arch_Slow_SSE4_2);
> + |= (bit_arch_Fast_Unaligned_Load
> + | bit_arch_Fast_Unaligned_Copy
> + | bit_arch_Prefer_PMINUB_for_stringop
> + | bit_arch_Slow_SSE4_2);
> break;
>
> - case 0x86:
> - case 0x96:
> - case 0x9c:
> + case INTEL_ATOM_TREMONT:
> /* Enable rep string instructions, unaligned load, unaligned
> - copy, pminub and avoid SSE 4.2 on Tremont. */
> + copy, pminub and avoid SSE 4.2 on Tremont. */
> cpu_features->preferred[index_arch_Fast_Rep_String]
> - |= (bit_arch_Fast_Rep_String
> - | bit_arch_Fast_Unaligned_Load
> - | bit_arch_Fast_Unaligned_Copy
> - | bit_arch_Prefer_PMINUB_for_stringop
> - | bit_arch_Slow_SSE4_2);
> + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load
> + | bit_arch_Fast_Unaligned_Copy
> + | bit_arch_Prefer_PMINUB_for_stringop
> + | bit_arch_Slow_SSE4_2);
> + break;
> +
> + /* Untuned KNL microarch. */
> + case INTEL_KNIGHTS_MILL:
> + /* Untuned atom microarch. */
> + case INTEL_ATOM_SIERRAFOREST:
> + case INTEL_ATOM_GRANDRIDGE:
> + case INTEL_ATOM_SALTWELL:
> break;
"break" should be removed to enable the optimizations
for processors with AVX.
>
> + /* Bigcore Tuning. */
> + case INTEL_UNKNOWN:
> default:
> /* Unknown family 0x06 processors. Assuming this is one
> of Core i3/i5/i7 processors if AVX is available. */
> if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
> break;
> - /* Fall through. */
> -
> - case 0x1a:
> - case 0x1e:
> - case 0x1f:
> - case 0x25:
> - case 0x2c:
> - case 0x2e:
> - case 0x2f:
> + case INTEL_BIGCORE_NEHALEM:
> + case INTEL_BIGCORE_WESTMERE:
> /* Rep string instructions, unaligned load, unaligned copy,
> and pminub are fast on Intel Core i3, i5 and i7. */
> cpu_features->preferred[index_arch_Fast_Rep_String]
> - |= (bit_arch_Fast_Rep_String
> - | bit_arch_Fast_Unaligned_Load
> - | bit_arch_Fast_Unaligned_Copy
> - | bit_arch_Prefer_PMINUB_for_stringop);
> + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load
> + | bit_arch_Fast_Unaligned_Copy
> + | bit_arch_Prefer_PMINUB_for_stringop);
> + break;
> +
> + /* Untuned Bigcore microarch. */
> + case INTEL_BIGCORE_SANDYBRIDGE:
> + case INTEL_BIGCORE_IVYBRIDGE:
> + case INTEL_BIGCORE_HASWELL:
> + case INTEL_BIGCORE_BROADWELL:
> + case INTEL_BIGCORE_SKYLAKE:
> + case INTEL_BIGCORE_AMBERLAKE:
> + case INTEL_BIGCORE_COFFEELAKE:
> + case INTEL_BIGCORE_WHISKEYLAKE:
> + case INTEL_BIGCORE_KABYLAKE:
> + case INTEL_BIGCORE_COMETLAKE:
> + case INTEL_BIGCORE_SKYLAKE_AVX512:
> + case INTEL_BIGCORE_CASCADELAKE:
> + case INTEL_BIGCORE_COOPERLAKE:
> + case INTEL_BIGCORE_CANNONLAKE:
> + case INTEL_BIGCORE_ICELAKE:
> + case INTEL_BIGCORE_TIGERLAKE:
> + case INTEL_BIGCORE_ROCKETLAKE:
> + case INTEL_BIGCORE_RAPTORLAKE:
> + case INTEL_BIGCORE_METEORLAKE:
> + case INTEL_BIGCORE_LUNARLAKE:
> + case INTEL_BIGCORE_ARROWLAKE:
> + case INTEL_BIGCORE_SAPPHIRERAPIDS:
> + case INTEL_BIGCORE_EMERALDRAPIDS:
> + case INTEL_BIGCORE_GRANITERAPIDS:
> + break;
> +
> + /* Untuned Mixed (bigcore + atom SOC). */
> + case INTEL_MIXED_LAKEFIELD:
> + case INTEL_MIXED_ALDERLAKE:
All these processors should be treated as default.
> break;
> }
>
> - /* Disable TSX on some processors to avoid TSX on kernels that
> - weren't updated with the latest microcode package (which
> - disables broken feature by default). */
> - switch (model)
> + /* Disable TSX on some processors to avoid TSX on kernels that
> + weren't updated with the latest microcode package (which
> + disables broken feature by default). */
> + switch (microarch)
> {
> - case 0x55:
> - if (stepping <= 5)
> + case INTEL_BIGCORE_SKYLAKE_AVX512:
> + /* 0x55 && stepping <= 5 is SKYLAKE_AVX512. Cascadelake and
> + Cooperlake also have model == 0x55 so double check the
> + stepping to be safe. */
> + if (model == 0x55 && stepping <= 5)
No need to check model == 0x55.
> goto disable_tsx;
> break;
> - case 0x8e:
> - /* NB: Although the errata documents that for model == 0x8e,
> - only 0xb stepping or lower are impacted, the intention of
> - the errata was to disable TSX on all client processors on
> - all steppings. Include 0xc stepping which is an Intel
> - Core i7-8665U, a client mobile processor. */
> - case 0x9e:
> - if (stepping > 0xc)
> +
> + case INTEL_BIGCORE_SKYLAKE:
> + case INTEL_BIGCORE_AMBERLAKE:
> + case INTEL_BIGCORE_COFFEELAKE:
> + case INTEL_BIGCORE_WHISKEYLAKE:
> + case INTEL_BIGCORE_KABYLAKE:
> + /* NB: Although the errata documents that for model == 0x8e
> + (skylake client), only 0xb stepping or lower are impacted,
> + the intention of the errata was to disable TSX on all client
> + processors on all steppings. Include 0xc stepping which is
> + an Intel Core i7-8665U, a client mobile processor. */
> + if ((model == 0x8e || model == 0x9e) && stepping > 0xc)
> break;
> - /* Fall through. */
> - case 0x4e:
> - case 0x5e:
> - {
> +
> /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
> processors listed in:
>
> https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
> */
> -disable_tsx:
> + disable_tsx:
> CPU_FEATURE_UNSET (cpu_features, HLE);
> CPU_FEATURE_UNSET (cpu_features, RTM);
> CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
> - }
> - break;
> - case 0x3f:
> - /* Xeon E7 v3 with stepping >= 4 has working TSX. */
> - if (stepping >= 4)
> break;
> - /* Fall through. */
> - case 0x3c:
> - case 0x45:
> - case 0x46:
> - /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
> - with stepping >= 4) to avoid TSX on kernels that weren't
> - updated with the latest microcode package (which disables
> - broken feature by default). */
> - CPU_FEATURE_UNSET (cpu_features, RTM);
> - break;
> +
> + case INTEL_BIGCORE_HASWELL:
> + /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
> + TSX. Haswell also include other model numbers that have
> + working TSX. */
> + if (model == 0x3f && stepping >= 4)
> + break;
> +
> + CPU_FEATURE_UNSET (cpu_features, RTM);
> + break;
> }
> }
>
> --
> 2.34.1
>
On Tue, May 9, 2023 at 4:59 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, May 8, 2023 at 8:13 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > This patch should have no affect on existing functionality.
> >
> > The current code, which has a single switch for model detection and
> > setting prefered features, is difficult to follow/extend. The cases
> > use magic numbers and many microarchitectures are missing. This makes
> > it difficult to reason about what is implemented so far and/or
> > how/where to add support for new features.
> >
> > This patch splits the model detection and preference setting stages so
> > that CPU preferences can be set based on a complete list of available
> > microarchitectures, rather than based on model magic numbers.
> > ---
> > sysdeps/x86/cpu-features.c | 401 +++++++++++++++++++++++++++++--------
> > 1 file changed, 316 insertions(+), 85 deletions(-)
> >
> > diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> > index 5bff8ec0b4..bec70c3c49 100644
> > --- a/sysdeps/x86/cpu-features.c
> > +++ b/sysdeps/x86/cpu-features.c
> > @@ -417,6 +417,217 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
> > == index_arch_Fast_Copy_Backward)),
> > "Incorrect index_arch_Fast_Unaligned_Load");
> >
> > +
> > +/* Intel Family-6 microarch list. */
> > +enum
> > +{
> > + /* Atom processors. */
> > + INTEL_ATOM_BONNELL,
> > + INTEL_ATOM_SALTWELL,
> > + INTEL_ATOM_SILVERMONT,
> > + INTEL_ATOM_AIRMONT,
> > + INTEL_ATOM_GOLDMONT,
> > + INTEL_ATOM_GOLDMONT_PLUS,
> > + INTEL_ATOM_SIERRAFOREST,
> > + INTEL_ATOM_GRANDRIDGE,
> > + INTEL_ATOM_TREMONT,
> > +
> > + /* Bigcore processors. */
> > + INTEL_BIGCORE_MEROM,
> > + INTEL_BIGCORE_PENRYN,
> > + INTEL_BIGCORE_DUNNINGTON,
> > + INTEL_BIGCORE_NEHALEM,
> > + INTEL_BIGCORE_WESTMERE,
> > + INTEL_BIGCORE_SANDYBRIDGE,
> > + INTEL_BIGCORE_IVYBRIDGE,
> > + INTEL_BIGCORE_HASWELL,
> > + INTEL_BIGCORE_BROADWELL,
> > + INTEL_BIGCORE_SKYLAKE,
> > + INTEL_BIGCORE_AMBERLAKE,
> > + INTEL_BIGCORE_COFFEELAKE,
> > + INTEL_BIGCORE_WHISKEYLAKE,
> > + INTEL_BIGCORE_KABYLAKE,
> > + INTEL_BIGCORE_COMETLAKE,
> > + INTEL_BIGCORE_SKYLAKE_AVX512,
> > + INTEL_BIGCORE_CANNONLAKE,
> > + INTEL_BIGCORE_CASCADELAKE,
> > + INTEL_BIGCORE_COOPERLAKE,
> > + INTEL_BIGCORE_ICELAKE,
> > + INTEL_BIGCORE_TIGERLAKE,
> > + INTEL_BIGCORE_ROCKETLAKE,
> > + INTEL_BIGCORE_SAPPHIRERAPIDS,
> > + INTEL_BIGCORE_RAPTORLAKE,
> > + INTEL_BIGCORE_EMERALDRAPIDS,
> > + INTEL_BIGCORE_METEORLAKE,
> > + INTEL_BIGCORE_LUNARLAKE,
> > + INTEL_BIGCORE_ARROWLAKE,
> > + INTEL_BIGCORE_GRANITERAPIDS,
> > +
> > + /* Mixed (bigcore + atom SOC). */
> > + INTEL_MIXED_LAKEFIELD,
> > + INTEL_MIXED_ALDERLAKE,
> > +
> > + /* KNL. */
> > + INTEL_KNIGHTS_MILL,
> > + INTEL_KNIGHTS_LANDING,
> > +
> > + /* Unknown. */
> > + INTEL_UNKNOWN,
> > +};
> > +
> > +static unsigned int
> > +intel_get_fam6_microarch (unsigned int model, unsigned int stepping)
> > +{
> > + switch (model)
> > + {
> > + case 0x1C:
> > + case 0x26:
> > + return INTEL_ATOM_BONNELL;
> > + case 0x27:
> > + case 0x35:
> > + case 0x36:
> > + return INTEL_ATOM_SALTWELL;
> > + case 0x37:
> > + case 0x4A:
> > + case 0x4D:
> > + case 0x5D:
> > + return INTEL_ATOM_SILVERMONT;
> > + case 0x4C:
> > + case 0x5A:
> > + case 0x75:
> > + return INTEL_ATOM_AIRMONT;
> > + case 0x5C:
> > + case 0x5F:
> > + return INTEL_ATOM_GOLDMONT;
> > + case 0x7A:
> > + return INTEL_ATOM_GOLDMONT_PLUS;
> > + case 0xAF:
> > + return INTEL_ATOM_SIERRAFOREST;
> > + case 0xB6:
> > + return INTEL_ATOM_GRANDRIDGE;
> > + case 0x86:
> > + case 0x96:
> > + case 0x9C:
> > + return INTEL_ATOM_TREMONT;
> > + case 0x0F:
> > + case 0x16:
> > + return INTEL_BIGCORE_MEROM;
> > + case 0x17:
> > + return INTEL_BIGCORE_PENRYN;
> > + case 0x1D:
> > + return INTEL_BIGCORE_DUNNINGTON;
> > + case 0x1A:
> > + case 0x1E:
> > + case 0x1F:
> > + case 0x2E:
> > + return INTEL_BIGCORE_NEHALEM;
> > + case 0x25:
> > + case 0x2C:
> > + case 0x2F:
> > + return INTEL_BIGCORE_WESTMERE;
> > + case 0x2A:
> > + case 0x2D:
> > + return INTEL_BIGCORE_SANDYBRIDGE;
> > + case 0x3A:
> > + case 0x3E:
> > + return INTEL_BIGCORE_IVYBRIDGE;
> > + case 0x3C:
> > + case 0x3F:
> > + case 0x45:
> > + case 0x46:
> > + return INTEL_BIGCORE_HASWELL;
> > + case 0x3D:
> > + case 0x47:
> > + case 0x4F:
> > + case 0x56:
> > + return INTEL_BIGCORE_BROADWELL;
> > + case 0x4E:
> > + case 0x5E:
> > + return INTEL_BIGCORE_SKYLAKE;
> > + case 0x8E:
> > + switch (stepping)
> > + {
> > + case 0x09:
> > + return INTEL_BIGCORE_AMBERLAKE;
> > + case 0x0A:
> > + return INTEL_BIGCORE_COFFEELAKE;
> > + case 0x0B:
> > + case 0x0C:
> > + return INTEL_BIGCORE_WHISKEYLAKE;
> > + default:
> > + return INTEL_BIGCORE_KABYLAKE;
> > + }
> > + case 0x9E:
> > + switch (stepping)
> > + {
> > + case 0x0A:
> > + case 0x0B:
> > + case 0x0C:
> > + case 0x0D:
> > + return INTEL_BIGCORE_COFFEELAKE;
> > + default:
> > + return INTEL_BIGCORE_KABYLAKE;
> > + }
> > + case 0xA5:
> > + case 0xA6:
> > + return INTEL_BIGCORE_COMETLAKE;
> > + case 0x66:
> > + return INTEL_BIGCORE_CANNONLAKE;
> > + case 0x55:
> > + switch (stepping)
> > + {
> > + case 0x06:
> > + case 0x07:
> > + return INTEL_BIGCORE_CASCADELAKE;
> > + case 0x0b:
> > + return INTEL_BIGCORE_COOPERLAKE;
> > + default:
> > + return INTEL_BIGCORE_SKYLAKE_AVX512;
> > + }
> > + case 0x6A:
> > + case 0x6C:
> > + case 0x7D:
> > + case 0x7E:
> > + case 0x9D:
> > + return INTEL_BIGCORE_ICELAKE;
> > + case 0x8C:
> > + case 0x8D:
> > + return INTEL_BIGCORE_TIGERLAKE;
> > + case 0xA7:
> > + return INTEL_BIGCORE_ROCKETLAKE;
> > + case 0x8F:
> > + return INTEL_BIGCORE_SAPPHIRERAPIDS;
> > + case 0xB7:
> > + case 0xBA:
> > + case 0xBF:
> > + return INTEL_BIGCORE_RAPTORLAKE;
> > + case 0xCF:
> > + return INTEL_BIGCORE_EMERALDRAPIDS;
> > + case 0xAA:
> > + case 0xAC:
> > + return INTEL_BIGCORE_METEORLAKE;
> > + case 0xbd:
> > + return INTEL_BIGCORE_LUNARLAKE;
> > + case 0xc6:
> > + return INTEL_BIGCORE_ARROWLAKE;
> > + case 0xAD:
> > + case 0xAE:
> > + return INTEL_BIGCORE_GRANITERAPIDS;
> > + case 0x8A:
> > + return INTEL_MIXED_LAKEFIELD;
> > + case 0x97:
> > + case 0x9A:
> > + case 0xBE:
> > + return INTEL_MIXED_ALDERLAKE;
> > + case 0x85:
> > + return INTEL_KNIGHTS_MILL;
> > + case 0x57:
> > + return INTEL_KNIGHTS_LANDING;
> > + default:
> > + return INTEL_UNKNOWN;
> > + }
> > +}
> > +
> > static inline void
> > init_cpu_features (struct cpu_features *cpu_features)
> > {
> > @@ -453,129 +664,149 @@ init_cpu_features (struct cpu_features *cpu_features)
> > if (family == 0x06)
> > {
> > model += extended_model;
> > - switch (model)
> > + unsigned int microarch
> > + = intel_get_fam6_microarch (model, stepping);
> > +
> > + switch (microarch)
> > {
> > - case 0x1c:
> > - case 0x26:
> > - /* BSF is slow on Atom. */
> > + /* Atom / KNL tuning. */
> > + case INTEL_ATOM_BONNELL:
>
> Since Saltwell is a shrink of Bonnell, INTEL_ATOM_SALTWELL
> should be added here.
>
Would rather leave this patch as no-functionality change. Will do so
in follow up patch.
> > + /* BSF is slow on Bonnell. */
> > cpu_features->preferred[index_arch_Slow_BSF]
> > - |= bit_arch_Slow_BSF;
> > + |= bit_arch_Slow_BSF;
> > break;
> >
> > - case 0x57:
> > - /* Knights Landing. Enable Silvermont optimizations. */
> > -
> > - case 0x7a:
> > - /* Unaligned load versions are faster than SSSE3
> > - on Goldmont Plus. */
> > -
> > - case 0x5c:
> > - case 0x5f:
> > /* Unaligned load versions are faster than SSSE3
> > - on Goldmont. */
> > + on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
> > + case INTEL_ATOM_AIRMONT:
> > + case INTEL_ATOM_SILVERMONT:
> > + case INTEL_ATOM_GOLDMONT:
> > + case INTEL_ATOM_GOLDMONT_PLUS:
> >
> > - case 0x4c:
> > - case 0x5a:
> > - case 0x75:
> > - /* Airmont is a die shrink of Silvermont. */
> > + /* Knights Landing. Enable Silvermont optimizations. */
> > + case INTEL_KNIGHTS_LANDING:
> >
> > - case 0x37:
> > - case 0x4a:
> > - case 0x4d:
> > - case 0x5d:
> > - /* Unaligned load versions are faster than SSSE3
> > - on Silvermont. */
> > cpu_features->preferred[index_arch_Fast_Unaligned_Load]
> > - |= (bit_arch_Fast_Unaligned_Load
> > - | bit_arch_Fast_Unaligned_Copy
> > - | bit_arch_Prefer_PMINUB_for_stringop
> > - | bit_arch_Slow_SSE4_2);
> > + |= (bit_arch_Fast_Unaligned_Load
> > + | bit_arch_Fast_Unaligned_Copy
> > + | bit_arch_Prefer_PMINUB_for_stringop
> > + | bit_arch_Slow_SSE4_2);
> > break;
> >
> > - case 0x86:
> > - case 0x96:
> > - case 0x9c:
> > + case INTEL_ATOM_TREMONT:
> > /* Enable rep string instructions, unaligned load, unaligned
> > - copy, pminub and avoid SSE 4.2 on Tremont. */
> > + copy, pminub and avoid SSE 4.2 on Tremont. */
> > cpu_features->preferred[index_arch_Fast_Rep_String]
> > - |= (bit_arch_Fast_Rep_String
> > - | bit_arch_Fast_Unaligned_Load
> > - | bit_arch_Fast_Unaligned_Copy
> > - | bit_arch_Prefer_PMINUB_for_stringop
> > - | bit_arch_Slow_SSE4_2);
> > + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load
> > + | bit_arch_Fast_Unaligned_Copy
> > + | bit_arch_Prefer_PMINUB_for_stringop
> > + | bit_arch_Slow_SSE4_2);
> > + break;
> > +
> > + /* Untuned KNL microarch. */
> > + case INTEL_KNIGHTS_MILL:
> > + /* Untuned atom microarch. */
> > + case INTEL_ATOM_SIERRAFOREST:
> > + case INTEL_ATOM_GRANDRIDGE:
> > + case INTEL_ATOM_SALTWELL:
> > break;
>
> "break" should be removed to enable the optimizations
> for processors with AVX.
Done.
>
> >
> > + /* Bigcore Tuning. */
> > + case INTEL_UNKNOWN:
> > default:
> > /* Unknown family 0x06 processors. Assuming this is one
> > of Core i3/i5/i7 processors if AVX is available. */
> > if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
> > break;
> > - /* Fall through. */
> > -
> > - case 0x1a:
> > - case 0x1e:
> > - case 0x1f:
> > - case 0x25:
> > - case 0x2c:
> > - case 0x2e:
> > - case 0x2f:
> > + case INTEL_BIGCORE_NEHALEM:
> > + case INTEL_BIGCORE_WESTMERE:
> > /* Rep string instructions, unaligned load, unaligned copy,
> > and pminub are fast on Intel Core i3, i5 and i7. */
> > cpu_features->preferred[index_arch_Fast_Rep_String]
> > - |= (bit_arch_Fast_Rep_String
> > - | bit_arch_Fast_Unaligned_Load
> > - | bit_arch_Fast_Unaligned_Copy
> > - | bit_arch_Prefer_PMINUB_for_stringop);
> > + |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load
> > + | bit_arch_Fast_Unaligned_Copy
> > + | bit_arch_Prefer_PMINUB_for_stringop);
> > + break;
> > +
> > + /* Untuned Bigcore microarch. */
> > + case INTEL_BIGCORE_SANDYBRIDGE:
> > + case INTEL_BIGCORE_IVYBRIDGE:
> > + case INTEL_BIGCORE_HASWELL:
> > + case INTEL_BIGCORE_BROADWELL:
> > + case INTEL_BIGCORE_SKYLAKE:
> > + case INTEL_BIGCORE_AMBERLAKE:
> > + case INTEL_BIGCORE_COFFEELAKE:
> > + case INTEL_BIGCORE_WHISKEYLAKE:
> > + case INTEL_BIGCORE_KABYLAKE:
> > + case INTEL_BIGCORE_COMETLAKE:
> > + case INTEL_BIGCORE_SKYLAKE_AVX512:
> > + case INTEL_BIGCORE_CASCADELAKE:
> > + case INTEL_BIGCORE_COOPERLAKE:
> > + case INTEL_BIGCORE_CANNONLAKE:
> > + case INTEL_BIGCORE_ICELAKE:
> > + case INTEL_BIGCORE_TIGERLAKE:
> > + case INTEL_BIGCORE_ROCKETLAKE:
> > + case INTEL_BIGCORE_RAPTORLAKE:
> > + case INTEL_BIGCORE_METEORLAKE:
> > + case INTEL_BIGCORE_LUNARLAKE:
> > + case INTEL_BIGCORE_ARROWLAKE:
> > + case INTEL_BIGCORE_SAPPHIRERAPIDS:
> > + case INTEL_BIGCORE_EMERALDRAPIDS:
> > + case INTEL_BIGCORE_GRANITERAPIDS:
> > + break;
> > +
> > + /* Untuned Mixed (bigcore + atom SOC). */
> > + case INTEL_MIXED_LAKEFIELD:
> > + case INTEL_MIXED_ALDERLAKE:
>
> All these processors should be treated as default.
Done.
>
> > break;
> > }
> >
> > - /* Disable TSX on some processors to avoid TSX on kernels that
> > - weren't updated with the latest microcode package (which
> > - disables broken feature by default). */
> > - switch (model)
> > + /* Disable TSX on some processors to avoid TSX on kernels that
> > + weren't updated with the latest microcode package (which
> > + disables broken feature by default). */
> > + switch (microarch)
> > {
> > - case 0x55:
> > - if (stepping <= 5)
> > + case INTEL_BIGCORE_SKYLAKE_AVX512:
> > + /* 0x55 && stepping <= 5 is SKYLAKE_AVX512. Cascadelake and
> > + Cooperlake also have model == 0x55 so double check the
> > + stepping to be safe. */
> > + if (model == 0x55 && stepping <= 5)
>
> No need to check model == 0x55.
Okay.
>
> > goto disable_tsx;
> > break;
> > - case 0x8e:
> > - /* NB: Although the errata documents that for model == 0x8e,
> > - only 0xb stepping or lower are impacted, the intention of
> > - the errata was to disable TSX on all client processors on
> > - all steppings. Include 0xc stepping which is an Intel
> > - Core i7-8665U, a client mobile processor. */
> > - case 0x9e:
> > - if (stepping > 0xc)
> > +
> > + case INTEL_BIGCORE_SKYLAKE:
> > + case INTEL_BIGCORE_AMBERLAKE:
> > + case INTEL_BIGCORE_COFFEELAKE:
> > + case INTEL_BIGCORE_WHISKEYLAKE:
> > + case INTEL_BIGCORE_KABYLAKE:
> > + /* NB: Although the errata documents that for model == 0x8e
> > + (skylake client), only 0xb stepping or lower are impacted,
> > + the intention of the errata was to disable TSX on all client
> > + processors on all steppings. Include 0xc stepping which is
> > + an Intel Core i7-8665U, a client mobile processor. */
> > + if ((model == 0x8e || model == 0x9e) && stepping > 0xc)
> > break;
> > - /* Fall through. */
> > - case 0x4e:
> > - case 0x5e:
> > - {
> > +
> > /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
> > processors listed in:
> >
> > https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
> > */
> > -disable_tsx:
> > + disable_tsx:
> > CPU_FEATURE_UNSET (cpu_features, HLE);
> > CPU_FEATURE_UNSET (cpu_features, RTM);
> > CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
> > - }
> > - break;
> > - case 0x3f:
> > - /* Xeon E7 v3 with stepping >= 4 has working TSX. */
> > - if (stepping >= 4)
> > break;
> > - /* Fall through. */
> > - case 0x3c:
> > - case 0x45:
> > - case 0x46:
> > - /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
> > - with stepping >= 4) to avoid TSX on kernels that weren't
> > - updated with the latest microcode package (which disables
> > - broken feature by default). */
> > - CPU_FEATURE_UNSET (cpu_features, RTM);
> > - break;
> > +
> > + case INTEL_BIGCORE_HASWELL:
> > + /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
> > + TSX. Haswell also include other model numbers that have
> > + working TSX. */
> > + if (model == 0x3f && stepping >= 4)
> > + break;
> > +
> > + CPU_FEATURE_UNSET (cpu_features, RTM);
> > + break;
> > }
> > }
> >
> > --
> > 2.34.1
> >
>
>
> --
> H.J.
@@ -417,6 +417,217 @@ _Static_assert (((index_arch_Fast_Unaligned_Load
== index_arch_Fast_Copy_Backward)),
"Incorrect index_arch_Fast_Unaligned_Load");
+
+/* Intel Family-6 microarch list. */
+enum
+{
+ /* Atom processors. */
+ INTEL_ATOM_BONNELL,
+ INTEL_ATOM_SALTWELL,
+ INTEL_ATOM_SILVERMONT,
+ INTEL_ATOM_AIRMONT,
+ INTEL_ATOM_GOLDMONT,
+ INTEL_ATOM_GOLDMONT_PLUS,
+ INTEL_ATOM_SIERRAFOREST,
+ INTEL_ATOM_GRANDRIDGE,
+ INTEL_ATOM_TREMONT,
+
+ /* Bigcore processors. */
+ INTEL_BIGCORE_MEROM,
+ INTEL_BIGCORE_PENRYN,
+ INTEL_BIGCORE_DUNNINGTON,
+ INTEL_BIGCORE_NEHALEM,
+ INTEL_BIGCORE_WESTMERE,
+ INTEL_BIGCORE_SANDYBRIDGE,
+ INTEL_BIGCORE_IVYBRIDGE,
+ INTEL_BIGCORE_HASWELL,
+ INTEL_BIGCORE_BROADWELL,
+ INTEL_BIGCORE_SKYLAKE,
+ INTEL_BIGCORE_AMBERLAKE,
+ INTEL_BIGCORE_COFFEELAKE,
+ INTEL_BIGCORE_WHISKEYLAKE,
+ INTEL_BIGCORE_KABYLAKE,
+ INTEL_BIGCORE_COMETLAKE,
+ INTEL_BIGCORE_SKYLAKE_AVX512,
+ INTEL_BIGCORE_CANNONLAKE,
+ INTEL_BIGCORE_CASCADELAKE,
+ INTEL_BIGCORE_COOPERLAKE,
+ INTEL_BIGCORE_ICELAKE,
+ INTEL_BIGCORE_TIGERLAKE,
+ INTEL_BIGCORE_ROCKETLAKE,
+ INTEL_BIGCORE_SAPPHIRERAPIDS,
+ INTEL_BIGCORE_RAPTORLAKE,
+ INTEL_BIGCORE_EMERALDRAPIDS,
+ INTEL_BIGCORE_METEORLAKE,
+ INTEL_BIGCORE_LUNARLAKE,
+ INTEL_BIGCORE_ARROWLAKE,
+ INTEL_BIGCORE_GRANITERAPIDS,
+
+ /* Mixed (bigcore + atom SOC). */
+ INTEL_MIXED_LAKEFIELD,
+ INTEL_MIXED_ALDERLAKE,
+
+ /* KNL. */
+ INTEL_KNIGHTS_MILL,
+ INTEL_KNIGHTS_LANDING,
+
+ /* Unknown. */
+ INTEL_UNKNOWN,
+};
+
+static unsigned int
+intel_get_fam6_microarch (unsigned int model, unsigned int stepping)
+{
+ switch (model)
+ {
+ case 0x1C:
+ case 0x26:
+ return INTEL_ATOM_BONNELL;
+ case 0x27:
+ case 0x35:
+ case 0x36:
+ return INTEL_ATOM_SALTWELL;
+ case 0x37:
+ case 0x4A:
+ case 0x4D:
+ case 0x5D:
+ return INTEL_ATOM_SILVERMONT;
+ case 0x4C:
+ case 0x5A:
+ case 0x75:
+ return INTEL_ATOM_AIRMONT;
+ case 0x5C:
+ case 0x5F:
+ return INTEL_ATOM_GOLDMONT;
+ case 0x7A:
+ return INTEL_ATOM_GOLDMONT_PLUS;
+ case 0xAF:
+ return INTEL_ATOM_SIERRAFOREST;
+ case 0xB6:
+ return INTEL_ATOM_GRANDRIDGE;
+ case 0x86:
+ case 0x96:
+ case 0x9C:
+ return INTEL_ATOM_TREMONT;
+ case 0x0F:
+ case 0x16:
+ return INTEL_BIGCORE_MEROM;
+ case 0x17:
+ return INTEL_BIGCORE_PENRYN;
+ case 0x1D:
+ return INTEL_BIGCORE_DUNNINGTON;
+ case 0x1A:
+ case 0x1E:
+ case 0x1F:
+ case 0x2E:
+ return INTEL_BIGCORE_NEHALEM;
+ case 0x25:
+ case 0x2C:
+ case 0x2F:
+ return INTEL_BIGCORE_WESTMERE;
+ case 0x2A:
+ case 0x2D:
+ return INTEL_BIGCORE_SANDYBRIDGE;
+ case 0x3A:
+ case 0x3E:
+ return INTEL_BIGCORE_IVYBRIDGE;
+ case 0x3C:
+ case 0x3F:
+ case 0x45:
+ case 0x46:
+ return INTEL_BIGCORE_HASWELL;
+ case 0x3D:
+ case 0x47:
+ case 0x4F:
+ case 0x56:
+ return INTEL_BIGCORE_BROADWELL;
+ case 0x4E:
+ case 0x5E:
+ return INTEL_BIGCORE_SKYLAKE;
+ case 0x8E:
+ switch (stepping)
+ {
+ case 0x09:
+ return INTEL_BIGCORE_AMBERLAKE;
+ case 0x0A:
+ return INTEL_BIGCORE_COFFEELAKE;
+ case 0x0B:
+ case 0x0C:
+ return INTEL_BIGCORE_WHISKEYLAKE;
+ default:
+ return INTEL_BIGCORE_KABYLAKE;
+ }
+ case 0x9E:
+ switch (stepping)
+ {
+ case 0x0A:
+ case 0x0B:
+ case 0x0C:
+ case 0x0D:
+ return INTEL_BIGCORE_COFFEELAKE;
+ default:
+ return INTEL_BIGCORE_KABYLAKE;
+ }
+ case 0xA5:
+ case 0xA6:
+ return INTEL_BIGCORE_COMETLAKE;
+ case 0x66:
+ return INTEL_BIGCORE_CANNONLAKE;
+ case 0x55:
+ switch (stepping)
+ {
+ case 0x06:
+ case 0x07:
+ return INTEL_BIGCORE_CASCADELAKE;
+ case 0x0b:
+ return INTEL_BIGCORE_COOPERLAKE;
+ default:
+ return INTEL_BIGCORE_SKYLAKE_AVX512;
+ }
+ case 0x6A:
+ case 0x6C:
+ case 0x7D:
+ case 0x7E:
+ case 0x9D:
+ return INTEL_BIGCORE_ICELAKE;
+ case 0x8C:
+ case 0x8D:
+ return INTEL_BIGCORE_TIGERLAKE;
+ case 0xA7:
+ return INTEL_BIGCORE_ROCKETLAKE;
+ case 0x8F:
+ return INTEL_BIGCORE_SAPPHIRERAPIDS;
+ case 0xB7:
+ case 0xBA:
+ case 0xBF:
+ return INTEL_BIGCORE_RAPTORLAKE;
+ case 0xCF:
+ return INTEL_BIGCORE_EMERALDRAPIDS;
+ case 0xAA:
+ case 0xAC:
+ return INTEL_BIGCORE_METEORLAKE;
+ case 0xbd:
+ return INTEL_BIGCORE_LUNARLAKE;
+ case 0xc6:
+ return INTEL_BIGCORE_ARROWLAKE;
+ case 0xAD:
+ case 0xAE:
+ return INTEL_BIGCORE_GRANITERAPIDS;
+ case 0x8A:
+ return INTEL_MIXED_LAKEFIELD;
+ case 0x97:
+ case 0x9A:
+ case 0xBE:
+ return INTEL_MIXED_ALDERLAKE;
+ case 0x85:
+ return INTEL_KNIGHTS_MILL;
+ case 0x57:
+ return INTEL_KNIGHTS_LANDING;
+ default:
+ return INTEL_UNKNOWN;
+ }
+}
+
static inline void
init_cpu_features (struct cpu_features *cpu_features)
{
@@ -453,129 +664,149 @@ init_cpu_features (struct cpu_features *cpu_features)
if (family == 0x06)
{
model += extended_model;
- switch (model)
+ unsigned int microarch
+ = intel_get_fam6_microarch (model, stepping);
+
+ switch (microarch)
{
- case 0x1c:
- case 0x26:
- /* BSF is slow on Atom. */
+ /* Atom / KNL tuning. */
+ case INTEL_ATOM_BONNELL:
+ /* BSF is slow on Bonnell. */
cpu_features->preferred[index_arch_Slow_BSF]
- |= bit_arch_Slow_BSF;
+ |= bit_arch_Slow_BSF;
break;
- case 0x57:
- /* Knights Landing. Enable Silvermont optimizations. */
-
- case 0x7a:
- /* Unaligned load versions are faster than SSSE3
- on Goldmont Plus. */
-
- case 0x5c:
- case 0x5f:
/* Unaligned load versions are faster than SSSE3
- on Goldmont. */
+ on Airmont, Silvermont, Goldmont, and Goldmont Plus. */
+ case INTEL_ATOM_AIRMONT:
+ case INTEL_ATOM_SILVERMONT:
+ case INTEL_ATOM_GOLDMONT:
+ case INTEL_ATOM_GOLDMONT_PLUS:
- case 0x4c:
- case 0x5a:
- case 0x75:
- /* Airmont is a die shrink of Silvermont. */
+ /* Knights Landing. Enable Silvermont optimizations. */
+ case INTEL_KNIGHTS_LANDING:
- case 0x37:
- case 0x4a:
- case 0x4d:
- case 0x5d:
- /* Unaligned load versions are faster than SSSE3
- on Silvermont. */
cpu_features->preferred[index_arch_Fast_Unaligned_Load]
- |= (bit_arch_Fast_Unaligned_Load
- | bit_arch_Fast_Unaligned_Copy
- | bit_arch_Prefer_PMINUB_for_stringop
- | bit_arch_Slow_SSE4_2);
+ |= (bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop
+ | bit_arch_Slow_SSE4_2);
break;
- case 0x86:
- case 0x96:
- case 0x9c:
+ case INTEL_ATOM_TREMONT:
/* Enable rep string instructions, unaligned load, unaligned
- copy, pminub and avoid SSE 4.2 on Tremont. */
+ copy, pminub and avoid SSE 4.2 on Tremont. */
cpu_features->preferred[index_arch_Fast_Rep_String]
- |= (bit_arch_Fast_Rep_String
- | bit_arch_Fast_Unaligned_Load
- | bit_arch_Fast_Unaligned_Copy
- | bit_arch_Prefer_PMINUB_for_stringop
- | bit_arch_Slow_SSE4_2);
+ |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop
+ | bit_arch_Slow_SSE4_2);
+ break;
+
+ /* Untuned KNL microarch. */
+ case INTEL_KNIGHTS_MILL:
+ /* Untuned atom microarch. */
+ case INTEL_ATOM_SIERRAFOREST:
+ case INTEL_ATOM_GRANDRIDGE:
+ case INTEL_ATOM_SALTWELL:
break;
+ /* Bigcore Tuning. */
+ case INTEL_UNKNOWN:
default:
/* Unknown family 0x06 processors. Assuming this is one
of Core i3/i5/i7 processors if AVX is available. */
if (!CPU_FEATURES_CPU_P (cpu_features, AVX))
break;
- /* Fall through. */
-
- case 0x1a:
- case 0x1e:
- case 0x1f:
- case 0x25:
- case 0x2c:
- case 0x2e:
- case 0x2f:
+ case INTEL_BIGCORE_NEHALEM:
+ case INTEL_BIGCORE_WESTMERE:
/* Rep string instructions, unaligned load, unaligned copy,
and pminub are fast on Intel Core i3, i5 and i7. */
cpu_features->preferred[index_arch_Fast_Rep_String]
- |= (bit_arch_Fast_Rep_String
- | bit_arch_Fast_Unaligned_Load
- | bit_arch_Fast_Unaligned_Copy
- | bit_arch_Prefer_PMINUB_for_stringop);
+ |= (bit_arch_Fast_Rep_String | bit_arch_Fast_Unaligned_Load
+ | bit_arch_Fast_Unaligned_Copy
+ | bit_arch_Prefer_PMINUB_for_stringop);
+ break;
+
+ /* Untuned Bigcore microarch. */
+ case INTEL_BIGCORE_SANDYBRIDGE:
+ case INTEL_BIGCORE_IVYBRIDGE:
+ case INTEL_BIGCORE_HASWELL:
+ case INTEL_BIGCORE_BROADWELL:
+ case INTEL_BIGCORE_SKYLAKE:
+ case INTEL_BIGCORE_AMBERLAKE:
+ case INTEL_BIGCORE_COFFEELAKE:
+ case INTEL_BIGCORE_WHISKEYLAKE:
+ case INTEL_BIGCORE_KABYLAKE:
+ case INTEL_BIGCORE_COMETLAKE:
+ case INTEL_BIGCORE_SKYLAKE_AVX512:
+ case INTEL_BIGCORE_CASCADELAKE:
+ case INTEL_BIGCORE_COOPERLAKE:
+ case INTEL_BIGCORE_CANNONLAKE:
+ case INTEL_BIGCORE_ICELAKE:
+ case INTEL_BIGCORE_TIGERLAKE:
+ case INTEL_BIGCORE_ROCKETLAKE:
+ case INTEL_BIGCORE_RAPTORLAKE:
+ case INTEL_BIGCORE_METEORLAKE:
+ case INTEL_BIGCORE_LUNARLAKE:
+ case INTEL_BIGCORE_ARROWLAKE:
+ case INTEL_BIGCORE_SAPPHIRERAPIDS:
+ case INTEL_BIGCORE_EMERALDRAPIDS:
+ case INTEL_BIGCORE_GRANITERAPIDS:
+ break;
+
+ /* Untuned Mixed (bigcore + atom SOC). */
+ case INTEL_MIXED_LAKEFIELD:
+ case INTEL_MIXED_ALDERLAKE:
break;
}
- /* Disable TSX on some processors to avoid TSX on kernels that
- weren't updated with the latest microcode package (which
- disables broken feature by default). */
- switch (model)
+ /* Disable TSX on some processors to avoid TSX on kernels that
+ weren't updated with the latest microcode package (which
+ disables broken feature by default). */
+ switch (microarch)
{
- case 0x55:
- if (stepping <= 5)
+ case INTEL_BIGCORE_SKYLAKE_AVX512:
+ /* 0x55 && stepping <= 5 is SKYLAKE_AVX512. Cascadelake and
+ Cooperlake also have model == 0x55 so double check the
+ stepping to be safe. */
+ if (model == 0x55 && stepping <= 5)
goto disable_tsx;
break;
- case 0x8e:
- /* NB: Although the errata documents that for model == 0x8e,
- only 0xb stepping or lower are impacted, the intention of
- the errata was to disable TSX on all client processors on
- all steppings. Include 0xc stepping which is an Intel
- Core i7-8665U, a client mobile processor. */
- case 0x9e:
- if (stepping > 0xc)
+
+ case INTEL_BIGCORE_SKYLAKE:
+ case INTEL_BIGCORE_AMBERLAKE:
+ case INTEL_BIGCORE_COFFEELAKE:
+ case INTEL_BIGCORE_WHISKEYLAKE:
+ case INTEL_BIGCORE_KABYLAKE:
+ /* NB: Although the errata documents that for model == 0x8e
+ (skylake client), only 0xb stepping or lower are impacted,
+ the intention of the errata was to disable TSX on all client
+ processors on all steppings. Include 0xc stepping which is
+ an Intel Core i7-8665U, a client mobile processor. */
+ if ((model == 0x8e || model == 0x9e) && stepping > 0xc)
break;
- /* Fall through. */
- case 0x4e:
- case 0x5e:
- {
+
/* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
processors listed in:
https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
*/
-disable_tsx:
+ disable_tsx:
CPU_FEATURE_UNSET (cpu_features, HLE);
CPU_FEATURE_UNSET (cpu_features, RTM);
CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
- }
- break;
- case 0x3f:
- /* Xeon E7 v3 with stepping >= 4 has working TSX. */
- if (stepping >= 4)
break;
- /* Fall through. */
- case 0x3c:
- case 0x45:
- case 0x46:
- /* Disable Intel TSX on Haswell processors (except Xeon E7 v3
- with stepping >= 4) to avoid TSX on kernels that weren't
- updated with the latest microcode package (which disables
- broken feature by default). */
- CPU_FEATURE_UNSET (cpu_features, RTM);
- break;
+
+ case INTEL_BIGCORE_HASWELL:
+ /* Xeon E7 v3 (model == 0x3f) with stepping >= 4 has working
+ TSX. Haswell also include other model numbers that have
+ working TSX. */
+ if (model == 0x3f && stepping >= 4)
+ break;
+
+ CPU_FEATURE_UNSET (cpu_features, RTM);
+ break;
}
}