x86: Update F16C detection [BZ #26133]
Commit Message
Since F16C requires AVX, set F16C usable only when AVX is usable.
---
sysdeps/x86/cpu-features.c | 4 ++++
sysdeps/x86/cpu-features.h | 6 +++---
2 files changed, 7 insertions(+), 3 deletions(-)
Comments
On Thu, Jun 18, 2020 at 6:20 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Since F16C requires AVX, set F16C usable only when AVX is usable.
> ---
> sysdeps/x86/cpu-features.c | 4 ++++
> sysdeps/x86/cpu-features.h | 6 +++---
> 2 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
> index 489c370348..f873b55e8d 100644
> --- a/sysdeps/x86/cpu-features.c
> +++ b/sysdeps/x86/cpu-features.c
> @@ -153,6 +153,10 @@ get_common_indices (struct cpu_features *cpu_features,
> if (CPU_FEATURES_CPU_P (cpu_features, XOP))
> cpu_features->feature[index_arch_XOP_Usable]
> |= bit_arch_XOP_Usable;
> + /* Determine if F16C is usable. */
> + if (CPU_FEATURES_CPU_P (cpu_features, F16C))
> + cpu_features->feature[index_arch_F16C_Usable]
> + |= bit_arch_F16C_Usable;
> }
>
> /* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
> diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
> index de20044417..969c61773f 100644
> --- a/sysdeps/x86/cpu-features.h
> +++ b/sysdeps/x86/cpu-features.h
> @@ -145,6 +145,7 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define bit_arch_XSAVEC_Usable (1u << 22)
> #define bit_arch_AVX512_VP2INTERSECT_Usable (1u << 23)
> #define bit_arch_AVX512_BF16_Usable (1u << 24)
> +#define bit_arch_F16C_Usable (1u << 25)
>
> #define index_arch_AVX_Usable FEATURE_INDEX_1
> #define index_arch_AVX2_Usable FEATURE_INDEX_1
> @@ -171,6 +172,7 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define index_arch_XSAVEC_Usable FEATURE_INDEX_1
> #define index_arch_AVX512_VP2INTERSECT_Usable FEATURE_INDEX_1
> #define index_arch_AVX512_BF16_Usable FEATURE_INDEX_1
> +#define index_arch_F16C_Usable FEATURE_INDEX_1
>
> /* Unused. Compiler will optimize them out. */
> #define bit_arch_SSE3_Usable (1u << 0)
> @@ -184,7 +186,6 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define bit_arch_AES_Usable (1u << 0)
> #define bit_arch_XSAVE_Usable (1u << 0)
> #define bit_arch_OSXSAVE_Usable (1u << 0)
> -#define bit_arch_F16C_Usable (1u << 0)
> #define bit_arch_RDRAND_Usable (1u << 0)
> #define bit_arch_FPU_Usable (1u << 0)
> #define bit_arch_TSC_Usable (1u << 0)
> @@ -241,7 +242,6 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define index_arch_AES_Usable FEATURE_INDEX_1
> #define index_arch_XSAVE_Usable FEATURE_INDEX_1
> #define index_arch_OSXSAVE_Usable FEATURE_INDEX_1
> -#define index_arch_F16C_Usable FEATURE_INDEX_1
> #define index_arch_RDRAND_Usable FEATURE_INDEX_1
> #define index_arch_FPU_Usable FEATURE_INDEX_1
> #define index_arch_TSC_Usable FEATURE_INDEX_1
> @@ -301,7 +301,7 @@ extern const struct cpu_features *__get_cpu_features (void)
> #define need_arch_feature_XSAVE 0
> #define need_arch_feature_OSXSAVE 0
> #define need_arch_feature_AVX 1
> -#define need_arch_feature_F16C 0
> +#define need_arch_feature_F16C 1
> #define need_arch_feature_RDRAND 0
>
> /* EDX. */
> --
> 2.26.2
>
I am checking it in.
@@ -153,6 +153,10 @@ get_common_indices (struct cpu_features *cpu_features,
if (CPU_FEATURES_CPU_P (cpu_features, XOP))
cpu_features->feature[index_arch_XOP_Usable]
|= bit_arch_XOP_Usable;
+ /* Determine if F16C is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, F16C))
+ cpu_features->feature[index_arch_F16C_Usable]
+ |= bit_arch_F16C_Usable;
}
/* Check if OPMASK state, upper 256-bit of ZMM0-ZMM15 and
@@ -145,6 +145,7 @@ extern const struct cpu_features *__get_cpu_features (void)
#define bit_arch_XSAVEC_Usable (1u << 22)
#define bit_arch_AVX512_VP2INTERSECT_Usable (1u << 23)
#define bit_arch_AVX512_BF16_Usable (1u << 24)
+#define bit_arch_F16C_Usable (1u << 25)
#define index_arch_AVX_Usable FEATURE_INDEX_1
#define index_arch_AVX2_Usable FEATURE_INDEX_1
@@ -171,6 +172,7 @@ extern const struct cpu_features *__get_cpu_features (void)
#define index_arch_XSAVEC_Usable FEATURE_INDEX_1
#define index_arch_AVX512_VP2INTERSECT_Usable FEATURE_INDEX_1
#define index_arch_AVX512_BF16_Usable FEATURE_INDEX_1
+#define index_arch_F16C_Usable FEATURE_INDEX_1
/* Unused. Compiler will optimize them out. */
#define bit_arch_SSE3_Usable (1u << 0)
@@ -184,7 +186,6 @@ extern const struct cpu_features *__get_cpu_features (void)
#define bit_arch_AES_Usable (1u << 0)
#define bit_arch_XSAVE_Usable (1u << 0)
#define bit_arch_OSXSAVE_Usable (1u << 0)
-#define bit_arch_F16C_Usable (1u << 0)
#define bit_arch_RDRAND_Usable (1u << 0)
#define bit_arch_FPU_Usable (1u << 0)
#define bit_arch_TSC_Usable (1u << 0)
@@ -241,7 +242,6 @@ extern const struct cpu_features *__get_cpu_features (void)
#define index_arch_AES_Usable FEATURE_INDEX_1
#define index_arch_XSAVE_Usable FEATURE_INDEX_1
#define index_arch_OSXSAVE_Usable FEATURE_INDEX_1
-#define index_arch_F16C_Usable FEATURE_INDEX_1
#define index_arch_RDRAND_Usable FEATURE_INDEX_1
#define index_arch_FPU_Usable FEATURE_INDEX_1
#define index_arch_TSC_Usable FEATURE_INDEX_1
@@ -301,7 +301,7 @@ extern const struct cpu_features *__get_cpu_features (void)
#define need_arch_feature_XSAVE 0
#define need_arch_feature_OSXSAVE 0
#define need_arch_feature_AVX 1
-#define need_arch_feature_F16C 0
+#define need_arch_feature_F16C 1
#define need_arch_feature_RDRAND 0
/* EDX. */