[v2,2/2] x86: Add CPU-specific diagnostics to ld.so --list-diagnostics

Message ID 87tupt27ha.fsf@oldenburg.str.redhat.com
State Superseded
Headers
Series [v2] ld.so: Implement the --list-diagnostics option |

Commit Message

Florian Weimer March 2, 2021, 10:08 a.m. UTC
  ---
v2: Adjustments to generic changes.  Add comments to cpu-features.h
    pointing towards the diagnostics printer.

 sysdeps/x86/dl-diagnostics-cpu.c   | 152 +++++++++++++++++++++++++++++++++++++
 sysdeps/x86/include/cpu-features.h |   5 +-
 2 files changed, 156 insertions(+), 1 deletion(-)
  

Comments

H.J. Lu March 2, 2021, 1:11 p.m. UTC | #1
On Tue, Mar 2, 2021 at 2:34 AM Florian Weimer via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> ---
> v2: Adjustments to generic changes.  Add comments to cpu-features.h
>     pointing towards the diagnostics printer.
>
>  sysdeps/x86/dl-diagnostics-cpu.c   | 152 +++++++++++++++++++++++++++++++++++++
>  sysdeps/x86/include/cpu-features.h |   5 +-
>  2 files changed, 156 insertions(+), 1 deletion(-)
>
> diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c
> new file mode 100644
> index 0000000000..a2d47dd657
> --- /dev/null
> +++ b/sysdeps/x86/dl-diagnostics-cpu.c
> @@ -0,0 +1,152 @@
> +/* Print CPU diagnostics data in ld.so.  x86 version.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <dl-diagnostics.h>
> +#include <ldsodefs.h>
> +
> +static void
> +print_cpu_features_value (const char *label, uint64_t value)
> +{
> +  _dl_printf ("x86.cpu_features.");
> +  _dl_diagnostics_print_labeled_value (label, value);
> +}
> +
> +static void
> +print_cpu_feature_internal (unsigned int index, const char *kind,
> +                            unsigned int reg, uint32_t value)
> +{
> +  _dl_printf ("x86.cpu_features.features[0x%x].%s[0x%x]=0x%x\n",
> +              index, kind, reg, value);
> +}
> +
> +static void
> +print_cpu_feature_preferred (const char *label, unsigned int flag)
> +{
> +  _dl_printf("x86.cpu_features.preferred.%s=0x%x\n", label, flag);
> +}
> +
> +void
> +_dl_diagnostics_cpu (void)
> +{
> +  const struct cpu_features *cpu_features = __get_cpu_features ();
> +
> +  print_cpu_features_value ("basic.kind", cpu_features->basic.kind);
> +  print_cpu_features_value ("basic.max_cpuid", cpu_features->basic.max_cpuid);
> +  print_cpu_features_value ("basic.family", cpu_features->basic.family);
> +  print_cpu_features_value ("basic.model", cpu_features->basic.model);
> +  print_cpu_features_value ("basic.stepping", cpu_features->basic.stepping);
> +
> +  for (unsigned int index = 0; index < CPUID_INDEX_MAX; ++index)
> +    {
> +      /* The index values are part of the ABI via
> +         <sys/platform/x86.h>, so translating them to strings is not
> +         necessary.  */
> +      for (unsigned int reg = 0; reg < 4; ++reg)
> +        print_cpu_feature_internal
> +          (index, "cpuid", reg,
> +           cpu_features->features[index].cpuid_array[reg]);
> +      for (unsigned int reg = 0; reg < 4; ++reg)
> +        print_cpu_feature_internal
> +          (index, "usable", reg,
> +           cpu_features->features[index].usable_array[reg]);
> +    }
> +
> +  /* The preferred indicators are not part of the ABI and need to be
> +     translated.  */
> +  print_cpu_feature_preferred
> +    ("i586", CPU_FEATURE_PREFERRED_P (cpu_features, I586));
> +  print_cpu_feature_preferred
> +    ("i686", CPU_FEATURE_PREFERRED_P (cpu_features, I686));
> +  print_cpu_feature_preferred
> +    ("fast_rep_string",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, Fast_Rep_String));
> +  print_cpu_feature_preferred
> +    ("fast_copy_backward",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, Fast_Copy_Backward));
> +  print_cpu_feature_preferred
> +    ("fast_unaligned_load",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, Fast_Unaligned_Load));
> +  print_cpu_feature_preferred
> +    ("fast_unaligned_copy",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, Fast_Unaligned_Copy));
> +  print_cpu_feature_preferred
> +    ("slow_bsf", CPU_FEATURE_PREFERRED_P (cpu_features, Slow_BSF));
> +  print_cpu_feature_preferred
> +    ("slow_sse4_2", CPU_FEATURE_PREFERRED_P (cpu_features, Slow_SSE4_2));
> +  print_cpu_feature_preferred
> +    ("avx_fast_unaligned_load",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, AVX_Fast_Unaligned_Load));
> +  print_cpu_feature_preferred
> +    ("prefer_map_32bit_exec",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_MAP_32BIT_EXEC));
> +  print_cpu_feature_preferred
> +    ("prefer_pminub_for_stringop",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_PMINUB_for_stringop));
> +  print_cpu_feature_preferred
> +    ("prefer_no_vzeroupper",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_VZEROUPPER));
> +  print_cpu_feature_preferred
> +    ("prefer_erms", CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_ERMS));
> +  print_cpu_feature_preferred
> +    ("prefer_fsrm", CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_FSRM));
> +  print_cpu_feature_preferred
> +    ("prefer_no_avx512",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512));
> +  print_cpu_feature_preferred
> +    ("mathvec_prefer_no_avx512",
> +     CPU_FEATURE_PREFERRED_P (cpu_features, MathVec_Prefer_No_AVX512));

$ grep "#define index_arch_" sysdeps/x86/include/cpu-features.h | awk
'{ print $2 }' | sed -e "s/index_arch_//"
Fast_Rep_String
Fast_Copy_Backward
Slow_BSF
Fast_Unaligned_Load
Prefer_PMINUB_for_stringop
Fast_Unaligned_Copy
I586
I686
Slow_SSE4_2
AVX_Fast_Unaligned_Load
Prefer_MAP_32BIT_EXEC
Prefer_No_VZEROUPPER
Prefer_ERMS
Prefer_No_AVX512
MathVec_Prefer_No_AVX512
Prefer_FSRM

sysdeps/x86/configure.ac can generate the list.

> +  print_cpu_features_value ("isa_1", cpu_features->isa_1);
> +  print_cpu_features_value ("xsave_state_size",
> +                            cpu_features->xsave_state_size);
> +  print_cpu_features_value ("xsave_state_full_size",
> +                            cpu_features->xsave_state_full_size);
> +  print_cpu_features_value ("data_cache_size", cpu_features->data_cache_size);
> +  print_cpu_features_value ("shared_cache_size",
> +                            cpu_features->shared_cache_size);
> +  print_cpu_features_value ("non_temporal_threshold",
> +                            cpu_features->non_temporal_threshold);
> +  print_cpu_features_value ("rep_movsb_threshold",
> +                            cpu_features->rep_movsb_threshold);
> +  print_cpu_features_value ("rep_movsb_stop_threshold",
> +                            cpu_features->rep_movsb_stop_threshold);
> +  print_cpu_features_value ("rep_stosb_threshold",
> +                            cpu_features->rep_stosb_threshold);
> +  print_cpu_features_value ("level1_icache_size",
> +                            cpu_features->level1_icache_size);
> +  print_cpu_features_value ("level1_dcache_size",
> +                            cpu_features->level1_dcache_size);
> +  print_cpu_features_value ("level1_dcache_assoc",
> +                            cpu_features->level1_dcache_assoc);
> +  print_cpu_features_value ("level1_dcache_linesize",
> +                            cpu_features->level1_dcache_linesize);
> +  print_cpu_features_value ("level2_cache_size",
> +                            cpu_features->level2_cache_size);
> +  print_cpu_features_value ("level2_cache_assoc",
> +                            cpu_features->level2_cache_assoc);
> +  print_cpu_features_value ("level2_cache_linesize",
> +                            cpu_features->level2_cache_linesize);
> +  print_cpu_features_value ("level3_cache_size",
> +                            cpu_features->level3_cache_size);
> +  print_cpu_features_value ("level3_cache_assoc",
> +                            cpu_features->level3_cache_assoc);
> +  print_cpu_features_value ("level3_cache_linesize",
> +                            cpu_features->level3_cache_linesize);
> +  print_cpu_features_value ("level4_cache_size",
> +                            cpu_features->level4_cache_size);

Please assert size of *cpu_features == offfset of level4_cache_size
+ sizeof (level4_cache_size).

> +}
> diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
> index ae5cb24be5..3646150164 100644
> --- a/sysdeps/x86/include/cpu-features.h
> +++ b/sysdeps/x86/include/cpu-features.h
> @@ -772,7 +772,8 @@ enum
>  /* EBX.  */
>  #define reg_PTWRITE            ebx
>
> -/* PREFERRED_FEATURE_INDEX_1.  */
> +/* PREFERRED_FEATURE_INDEX_1.  NB: When adding new bits here, update
> +   sysdeps/x86/dl-diagnostics-cpu.c.  */
>  #define bit_arch_I586                          (1u << 0)
>  #define bit_arch_I686                          (1u << 1)
>  #define bit_arch_Fast_Rep_String               (1u << 2)
> @@ -856,6 +857,8 @@ struct cpuid_feature_internal
>      };
>  };
>
> +/* NB: When adding new fields, update sysdeps/x86/dl-diagnostics-cpu.c
> +   to print them.  */
>  struct cpu_features
>  {
>    struct cpu_features_basic basic;
>
  
Florian Weimer March 2, 2021, 1:42 p.m. UTC | #2
* H. J. Lu via Libc-alpha:

>> +  print_cpu_feature_preferred
>> +    ("prefer_no_avx512",
>> +     CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512));
>> +  print_cpu_feature_preferred
>> +    ("mathvec_prefer_no_avx512",
>> +     CPU_FEATURE_PREFERRED_P (cpu_features, MathVec_Prefer_No_AVX512));
>
> $ grep "#define index_arch_" sysdeps/x86/include/cpu-features.h | awk
> '{ print $2 }' | sed -e "s/index_arch_//"
> Fast_Rep_String
> Fast_Copy_Backward
> Slow_BSF
> Fast_Unaligned_Load
> Prefer_PMINUB_for_stringop
> Fast_Unaligned_Copy
> I586
> I686
> Slow_SSE4_2
> AVX_Fast_Unaligned_Load
> Prefer_MAP_32BIT_EXEC
> Prefer_No_VZEROUPPER
> Prefer_ERMS
> Prefer_No_AVX512
> MathVec_Prefer_No_AVX512
> Prefer_FSRM
>
> sysdeps/x86/configure.ac can generate the list.

Okay, I will automate this in some way.

>> +  print_cpu_features_value ("level3_cache_linesize",
>> +                            cpu_features->level3_cache_linesize);
>> +  print_cpu_features_value ("level4_cache_size",
>> +                            cpu_features->level4_cache_size);
>
> Please assert size of *cpu_features == offfset of level4_cache_size
> + sizeof (level4_cache_size).

I'm going to add a static assert.

Thanks,
Florian
  

Patch

diff --git a/sysdeps/x86/dl-diagnostics-cpu.c b/sysdeps/x86/dl-diagnostics-cpu.c
new file mode 100644
index 0000000000..a2d47dd657
--- /dev/null
+++ b/sysdeps/x86/dl-diagnostics-cpu.c
@@ -0,0 +1,152 @@ 
+/* Print CPU diagnostics data in ld.so.  x86 version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <dl-diagnostics.h>
+#include <ldsodefs.h>
+
+static void
+print_cpu_features_value (const char *label, uint64_t value)
+{
+  _dl_printf ("x86.cpu_features.");
+  _dl_diagnostics_print_labeled_value (label, value);
+}
+
+static void
+print_cpu_feature_internal (unsigned int index, const char *kind,
+                            unsigned int reg, uint32_t value)
+{
+  _dl_printf ("x86.cpu_features.features[0x%x].%s[0x%x]=0x%x\n",
+              index, kind, reg, value);
+}
+
+static void
+print_cpu_feature_preferred (const char *label, unsigned int flag)
+{
+  _dl_printf("x86.cpu_features.preferred.%s=0x%x\n", label, flag);
+}
+
+void
+_dl_diagnostics_cpu (void)
+{
+  const struct cpu_features *cpu_features = __get_cpu_features ();
+
+  print_cpu_features_value ("basic.kind", cpu_features->basic.kind);
+  print_cpu_features_value ("basic.max_cpuid", cpu_features->basic.max_cpuid);
+  print_cpu_features_value ("basic.family", cpu_features->basic.family);
+  print_cpu_features_value ("basic.model", cpu_features->basic.model);
+  print_cpu_features_value ("basic.stepping", cpu_features->basic.stepping);
+
+  for (unsigned int index = 0; index < CPUID_INDEX_MAX; ++index)
+    {
+      /* The index values are part of the ABI via
+         <sys/platform/x86.h>, so translating them to strings is not
+         necessary.  */
+      for (unsigned int reg = 0; reg < 4; ++reg)
+        print_cpu_feature_internal
+          (index, "cpuid", reg,
+           cpu_features->features[index].cpuid_array[reg]);
+      for (unsigned int reg = 0; reg < 4; ++reg)
+        print_cpu_feature_internal
+          (index, "usable", reg,
+           cpu_features->features[index].usable_array[reg]);
+    }
+
+  /* The preferred indicators are not part of the ABI and need to be
+     translated.  */
+  print_cpu_feature_preferred
+    ("i586", CPU_FEATURE_PREFERRED_P (cpu_features, I586));
+  print_cpu_feature_preferred
+    ("i686", CPU_FEATURE_PREFERRED_P (cpu_features, I686));
+  print_cpu_feature_preferred
+    ("fast_rep_string",
+     CPU_FEATURE_PREFERRED_P (cpu_features, Fast_Rep_String));
+  print_cpu_feature_preferred
+    ("fast_copy_backward",
+     CPU_FEATURE_PREFERRED_P (cpu_features, Fast_Copy_Backward));
+  print_cpu_feature_preferred
+    ("fast_unaligned_load",
+     CPU_FEATURE_PREFERRED_P (cpu_features, Fast_Unaligned_Load));
+  print_cpu_feature_preferred
+    ("fast_unaligned_copy",
+     CPU_FEATURE_PREFERRED_P (cpu_features, Fast_Unaligned_Copy));
+  print_cpu_feature_preferred
+    ("slow_bsf", CPU_FEATURE_PREFERRED_P (cpu_features, Slow_BSF));
+  print_cpu_feature_preferred
+    ("slow_sse4_2", CPU_FEATURE_PREFERRED_P (cpu_features, Slow_SSE4_2));
+  print_cpu_feature_preferred
+    ("avx_fast_unaligned_load",
+     CPU_FEATURE_PREFERRED_P (cpu_features, AVX_Fast_Unaligned_Load));
+  print_cpu_feature_preferred
+    ("prefer_map_32bit_exec",
+     CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_MAP_32BIT_EXEC));
+  print_cpu_feature_preferred
+    ("prefer_pminub_for_stringop",
+     CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_PMINUB_for_stringop));
+  print_cpu_feature_preferred
+    ("prefer_no_vzeroupper",
+     CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_VZEROUPPER));
+  print_cpu_feature_preferred
+    ("prefer_erms", CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_ERMS));
+  print_cpu_feature_preferred
+    ("prefer_fsrm", CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_FSRM));
+  print_cpu_feature_preferred
+    ("prefer_no_avx512",
+     CPU_FEATURE_PREFERRED_P (cpu_features, Prefer_No_AVX512));
+  print_cpu_feature_preferred
+    ("mathvec_prefer_no_avx512",
+     CPU_FEATURE_PREFERRED_P (cpu_features, MathVec_Prefer_No_AVX512));
+
+  print_cpu_features_value ("isa_1", cpu_features->isa_1);
+  print_cpu_features_value ("xsave_state_size",
+                            cpu_features->xsave_state_size);
+  print_cpu_features_value ("xsave_state_full_size",
+                            cpu_features->xsave_state_full_size);
+  print_cpu_features_value ("data_cache_size", cpu_features->data_cache_size);
+  print_cpu_features_value ("shared_cache_size",
+                            cpu_features->shared_cache_size);
+  print_cpu_features_value ("non_temporal_threshold",
+                            cpu_features->non_temporal_threshold);
+  print_cpu_features_value ("rep_movsb_threshold",
+                            cpu_features->rep_movsb_threshold);
+  print_cpu_features_value ("rep_movsb_stop_threshold",
+                            cpu_features->rep_movsb_stop_threshold);
+  print_cpu_features_value ("rep_stosb_threshold",
+                            cpu_features->rep_stosb_threshold);
+  print_cpu_features_value ("level1_icache_size",
+                            cpu_features->level1_icache_size);
+  print_cpu_features_value ("level1_dcache_size",
+                            cpu_features->level1_dcache_size);
+  print_cpu_features_value ("level1_dcache_assoc",
+                            cpu_features->level1_dcache_assoc);
+  print_cpu_features_value ("level1_dcache_linesize",
+                            cpu_features->level1_dcache_linesize);
+  print_cpu_features_value ("level2_cache_size",
+                            cpu_features->level2_cache_size);
+  print_cpu_features_value ("level2_cache_assoc",
+                            cpu_features->level2_cache_assoc);
+  print_cpu_features_value ("level2_cache_linesize",
+                            cpu_features->level2_cache_linesize);
+  print_cpu_features_value ("level3_cache_size",
+                            cpu_features->level3_cache_size);
+  print_cpu_features_value ("level3_cache_assoc",
+                            cpu_features->level3_cache_assoc);
+  print_cpu_features_value ("level3_cache_linesize",
+                            cpu_features->level3_cache_linesize);
+  print_cpu_features_value ("level4_cache_size",
+                            cpu_features->level4_cache_size);
+}
diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h
index ae5cb24be5..3646150164 100644
--- a/sysdeps/x86/include/cpu-features.h
+++ b/sysdeps/x86/include/cpu-features.h
@@ -772,7 +772,8 @@  enum
 /* EBX.  */
 #define reg_PTWRITE		ebx
 
-/* PREFERRED_FEATURE_INDEX_1.  */
+/* PREFERRED_FEATURE_INDEX_1.  NB: When adding new bits here, update
+   sysdeps/x86/dl-diagnostics-cpu.c.  */
 #define bit_arch_I586				(1u << 0)
 #define bit_arch_I686				(1u << 1)
 #define bit_arch_Fast_Rep_String		(1u << 2)
@@ -856,6 +857,8 @@  struct cpuid_feature_internal
     };
 };
 
+/* NB: When adding new fields, update sysdeps/x86/dl-diagnostics-cpu.c
+   to print them.  */
 struct cpu_features
 {
   struct cpu_features_basic basic;