Patchwork RFC: Check GLIBC_IFUNC to enable/disable ifunc features

login
register
mail settings
Submitter H.J. Lu
Date June 28, 2016, 9:56 p.m.
Message ID <CAMe9rOqGyieoo+UuwqCjZzQG5By+1dWit5yR2_JnZ_Q0K2gVnA@mail.gmail.com>
Download mbox | patch
Permalink /patch/13455/
State New
Headers show

Comments

H.J. Lu - June 28, 2016, 9:56 p.m.
On Sun, Jun 26, 2016 at 3:08 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Sun, Jun 26, 2016 at 11:55 AM, Carlos O'Donell <carlos@redhat.com> wrote:
>> On 06/26/2016 10:50 AM, H.J. Lu wrote:
>>> Hi,
>>>
>>> I'd like to check IFUNC_XXX environmental variable to enable/disable
>>> the ifunc feature, XXX.  That is
>>>
>>> 1.  IFUNC_XXX=1 to enable XXX.
>>> 2.  IFUNC_XXX=0 to disable XXX.
>>>
>>> Any comments?
>>
>> How would it work? Does it just mean that the IFUNC selector bypasses
>> the disabled choices?
>
> It will override bits in dl_x86_cpu_features.
>
>> Also it does not meet Tunables consensus:
>> "Tunable namespace should be clearly defined"
>> https://sourceware.org/glibc/wiki/TuningLibraryRuntimeBehavior?highlight=%28Tunables%29
>>
>> Should perhaps be "GLIBC_<VER>_IFUNC_<ARCH>_<PUBLIC_IFUNC_NAME>"?
>
> They won't be version-dependent.  How about
>
> GLIBC_IFUNC_X86=sse2=0:....
>
> or
>
> GLIBC_IFUNC=sse2=0:....
>
> since IFUNC is target dependent.

Something like this.
Mike Frysinger - June 29, 2016, 2:28 a.m.
should have a comment or two mentioning why you need to code all of
these things ad-hoc

also needs a manual update

seems like the feature compare is case-sensitive ?  but some of the
feature strings are inconsistent case ?  would be nice if they were
standardized (like all lower case).
-mike

Patch

From 76a9cfc61fc655ca5d53c6bed26b91783ec0fe01 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Mon, 27 Jun 2016 15:13:50 -0700
Subject: [PATCH] Check GLIBC_IFUNC=

---
 sysdeps/i386/dl-machine.h   |   3 +-
 sysdeps/x86/cpu-features.c  | 200 +++++++++++++++++++++++++++++++++++++++++++-
 sysdeps/x86/libc-start.c    |   2 +-
 sysdeps/x86_64/dl-machine.h |   3 +-
 4 files changed, 204 insertions(+), 4 deletions(-)

diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
index 4e3968a..7584931 100644
--- a/sysdeps/i386/dl-machine.h
+++ b/sysdeps/i386/dl-machine.h
@@ -240,7 +240,8 @@  dl_platform_init (void)
 #ifdef SHARED
   /* init_cpu_features has been called early from __libc_start_main in
      static executable.  */
-  init_cpu_features (&GLRO(dl_x86_cpu_features));
+  init_cpu_features (&GLRO(dl_x86_cpu_features),
+		     &_dl_argv[_dl_argc + 1]);
 #endif
 }
 
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index 9ce4b49..4b55037 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -91,8 +91,109 @@  get_common_indeces (struct cpu_features *cpu_features,
     }
 }
 
+static bool
+equal (const char *a, const char *b, size_t len)
+{
+  do
+    {
+      size_t op_len = len % 8;
+      switch (op_len)
+	{
+	case 0:
+	  if (*(long long *) a != *(long long *) b)
+	    return false;
+	  op_len = 8;
+	  break;
+	case 1:
+	  if (*(char *) a != *(char *) b)
+	    return false;
+	  break;
+	case 2:
+	  if (*(short *) a != *(short *) b)
+	    return false;
+	  break;
+	case 3:
+	  if (*(short *) a != *(short *) b
+	      || *(char *) (a + 2) != *(char *) (b + 2))
+	    return false;
+	  break;
+	case 4:
+	  if (*(int *) a != *(int *) b)
+	    return false;
+	  break;
+	default:
+	  if (*(int *) a != *(int *) b
+	      || *(int *) (a + op_len - 4) != *(int *) (b + op_len - 4))
+	    return false;
+	  break;
+	}
+      len -= op_len;
+      if (len == 0)
+	return true;
+      a += op_len;
+      b += op_len;
+    }
+  while (1);
+}
+
+#define CHECK_GLIBC_IFUNC_CPU_OFF(name)					\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->cpuid[index_cpu_##name].reg_##name		\
+	  &= ~bit_cpu_##name;						\
+      break;								\
+    }
+
+#define CHECK_GLIBC_IFUNC_ARCH_OFF(name)				\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      break;								\
+    }
+
+#define CHECK_GLIBC_IFUNC_ARCH_BOTH(name)				\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (p[sizeof (#name)] == '1')				\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH(name, need)		\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (p[sizeof (#name)] == '1'					\
+	       && CPU_FEATURES_ARCH_P (cpu_features, need))		\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
+#define CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH(name, need)		\
+  if (equal (p, #name "=", sizeof (#name)))				\
+    {									\
+      if (p[sizeof (#name)] == '0')					\
+	cpu_features->feature[index_arch_##name]			\
+	  &= ~bit_arch_##name;						\
+      else if (p[sizeof (#name)] == '1'					\
+	       && CPU_FEATURES_CPU_P (cpu_features, need))		\
+	cpu_features->feature[index_arch_##name]			\
+	  |= bit_arch_##name;						\
+      break;								\
+    }
+
 static inline void
-init_cpu_features (struct cpu_features *cpu_features)
+init_cpu_features (struct cpu_features *cpu_features, char **env)
 {
   unsigned int ebx, ecx, edx;
   unsigned int family = 0;
@@ -268,4 +369,101 @@  no_cpuid:
   cpu_features->family = family;
   cpu_features->model = model;
   cpu_features->kind = kind;
+
+  if (env == NULL)
+    return;
+
+  while (*env != NULL)
+    {
+      const char *p, *end;
+      size_t len = 13;
+      end = *env;
+      for (p = end; *p != '\0'; p++)
+	if (--len == 0 && equal (end, "GLIBC_IFUNC=", 12))
+	  {
+	    len = strlen (p);
+	    end = p + len;
+	    do
+	      {
+		const char *c;
+		for (c = p; *c != ':'; c++)
+		  if (c >= end)
+		    break;
+		len = c - p;
+		switch (len)
+		  {
+		  default:
+		    break;
+		  case 5:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (CX8);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (FMA);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (HTT);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (RTM);
+		    break;
+		  case 6:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX2);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (CMOV);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (ERMS);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (FMA4);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSE2);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (I586);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (I686);
+		    break;
+		  case 7:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSSE3);
+		    break;
+		  case 8:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_1);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (SSE4_2);
+		    break;
+		  case 9:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX512F);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (OSXSAVE);
+		    break;
+		  case 10:
+		    CHECK_GLIBC_IFUNC_CPU_OFF (AVX512DQ);
+		    CHECK_GLIBC_IFUNC_CPU_OFF (POPCOUNT);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Slow_BSF);
+		    break;
+		  case 12:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (FMA_Usable);
+		    break;
+		  case 13:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX2_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (FMA4_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_CPU_BOTH (Slow_SSE4_2,
+							  SSE4_2);
+		    break;
+		  case 15:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512F_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_NEED_ARCH_BOTH
+		      (AVX_Fast_Unaligned_Load, AVX_Usable);
+		    break;
+		  case 17:
+		    CHECK_GLIBC_IFUNC_ARCH_OFF (AVX512DQ_Usable);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Rep_String);
+		    break;
+		  case 20:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Copy_Backward);
+		    break;
+		  case 21:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Load);
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Fast_Unaligned_Copy);
+		    break;
+		  case 22:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Prefer_No_VZEROUPPER);
+		    break;
+		  case 28:
+		    CHECK_GLIBC_IFUNC_ARCH_BOTH (Prefer_PMINUB_for_stringop);
+		    break;
+		  }
+		p += len + 1;
+	      }
+	    while (p < end);
+	    return;
+	  }
+      env++;
+    }
 }
diff --git a/sysdeps/x86/libc-start.c b/sysdeps/x86/libc-start.c
index 3b5ea6e..7dec1ca 100644
--- a/sysdeps/x86/libc-start.c
+++ b/sysdeps/x86/libc-start.c
@@ -34,7 +34,7 @@  __libc_start_main (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
 		   void (*fini) (void),
 		   void (*rtld_fini) (void), void *stack_end)
 {
-  init_cpu_features (&_dl_x86_cpu_features);
+  init_cpu_features (&_dl_x86_cpu_features, &argv[argc + 1]);
   return generic_start_main (main, argc, argv, init, fini, rtld_fini,
 			     stack_end);
 }
diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
index ed0c1a8..071a2e1 100644
--- a/sysdeps/x86_64/dl-machine.h
+++ b/sysdeps/x86_64/dl-machine.h
@@ -227,7 +227,8 @@  dl_platform_init (void)
 #ifdef SHARED
   /* init_cpu_features has been called early from __libc_start_main in
      static executable.  */
-  init_cpu_features (&GLRO(dl_x86_cpu_features));
+  init_cpu_features (&GLRO(dl_x86_cpu_features),
+		     &_dl_argv[_dl_argc + 1]);
 #endif
 }
 
-- 
2.7.4