x86-64: Remove plt bypassing of ifuncs.

Message ID 20150615092709.GA4957@domone
State New, archived
Headers

Commit Message

Ondrej Bilka June 15, 2015, 9:27 a.m. UTC
  On Mon, Jun 08, 2015 at 08:57:39PM +0000, Joseph Myers wrote:
> On Mon, 8 Jun 2015, Ondřej Bílka wrote:
> 
> > Here I removed bypassing from x64 functions, question is how disable
> > generically bypassing in ifuncs, for normal functions plt bypassing has
> > benefits but now its something that gcc should do, see recent noplt
> > proposal on gcc list.
> > 
> > Here I did removal for x64 and guarded hidden_proto with !defined
> > __x86_64__, I don't know if its better to let other maintainers do same
> > or come with generic solution.
> 
> I think it should work to make the hidden alias be an alias for the IFUNC 
> (in the multi-arch case - obviously with --disable-multi-arch it needs to 
> continue to exist as an alias for the non-IFUNC, and it's desirable to run 
> tests for both cases for any such patch).  That way you shouldn't need to 
> change include/string.h at all.  The hidden aliases will get PLT entries, 
> but such entries don't cause the localplt test to fail and there are a few 
> already anyway.
> 
Ok, here is new version. I still don't know why memcpy causes problems,
I needed to add manual redirection to ifunc.


        * sysdeps/generic/symbol-hacks.h [__x86_64__]: Disable.
        * sysdeps/x86_64/memcpy.S: Remove plt bypassing.
        * sysdeps/x86_64/multiarch/memcmp.S: Likewise.
        * sysdeps/x86_64/multiarch/memcpy.S: Likewise.
        * sysdeps/x86_64/multiarch/mempcpy.S: Likewise.
        * sysdeps/x86_64/multiarch/memset.S: Likewise.
        * sysdeps/x86_64/multiarch/strcat.S: Likewise.
        * sysdeps/x86_64/multiarch/strchr.S: Likewise.
        * sysdeps/x86_64/multiarch/strcmp.S: Likewise.
        * sysdeps/x86_64/multiarch/strcpy.S: Likewise.
        * sysdeps/x86_64/multiarch/stpncpy-c.c: Remove.
        * sysdeps/x86_64/multiarch/strncat-c.c: Likewise.
        * sysdeps/x86_64/multiarch/strncpy-c.c: Likewise.
  

Patch

diff --git a/sysdeps/generic/symbol-hacks.h b/sysdeps/generic/symbol-hacks.h
index ce576c9..fe13b37 100644
--- a/sysdeps/generic/symbol-hacks.h
+++ b/sysdeps/generic/symbol-hacks.h
@@ -1,6 +1,7 @@ 
 /* Some compiler optimizations may transform loops into memset/memmove
    calls and without proper declaration it may generate PLT calls.  */
-#if !defined __ASSEMBLER__ && IS_IN (libc) && defined SHARED
+#if !defined __ASSEMBLER__ && IS_IN (libc) && defined SHARED && \
+    !defined __x86_64__
 asm ("memmove = __GI_memmove");
 asm ("memset = __GI_memset");
 asm ("memcpy = __GI_memcpy");
diff --git a/sysdeps/x86_64/memcpy.S b/sysdeps/x86_64/memcpy.S
index eea8c2a..8356259 100644
--- a/sysdeps/x86_64/memcpy.S
+++ b/sysdeps/x86_64/memcpy.S
@@ -32,9 +32,6 @@ 
 #  define RETVAL	(-8)
 #  if defined SHARED && !defined USE_MULTIARCH && IS_IN (libc)
 #    define memcpy	__memcpy
-#    undef libc_hidden_builtin_def
-#    define libc_hidden_builtin_def(name) \
-	.globl __GI_memcpy; __GI_memcpy = __memcpy
 #  endif
 #endif
 #define SAVE0	(RETVAL - 8)
diff --git a/sysdeps/x86_64/multiarch/memcmp.S b/sysdeps/x86_64/multiarch/memcmp.S
index f8b4636..3af38a1 100644
--- a/sysdeps/x86_64/multiarch/memcmp.S
+++ b/sysdeps/x86_64/multiarch/memcmp.S
@@ -57,14 +57,6 @@  END(memcmp)
 # define END(name) \
 	cfi_endproc; .size __memcmp_sse2, .-__memcmp_sse2
 
-# ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* IFUNC doesn't work with the hidden functions in shared library since
-   they will be called without setting up EBX needed for PLT which is
-   used by IFUNC.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_memcmp; __GI_memcmp = __memcmp_sse2
-# endif
 #endif
 
 #include "../memcmp.S"
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index 4e18cd3..8ea096a 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -70,14 +70,14 @@  END(__new_memcpy)
 # define END_CHK(name) \
 	cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2
 
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal memcpy calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_memcpy; __GI_memcpy = __memcpy_sse2
 
 versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
+libc_hidden_builtin_def (memcpy);
+
 #endif
 
 #include "../memcpy.S"
+
+#if defined SHARED && IS_IN (libc)
+.globl __GI_memcpy; __GI_memcpy = __new_memcpy;
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index 2eaacdf..c360b94 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -66,15 +66,6 @@  END(__mempcpy)
 # define END_CHK(name) \
 	cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2
 
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal mempcpy calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_def(name) \
-	.globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2
-# define libc_hidden_builtin_def(name) \
-	.globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2
 #endif
 
 #include "../mempcpy.S"
diff --git a/sysdeps/x86_64/multiarch/memset.S b/sysdeps/x86_64/multiarch/memset.S
index c5f1fb3..61b4ed8 100644
--- a/sysdeps/x86_64/multiarch/memset.S
+++ b/sysdeps/x86_64/multiarch/memset.S
@@ -35,6 +35,8 @@  ENTRY(memset)
 	leaq	__memset_avx2(%rip), %rax
 2:	ret
 END(memset)
+
+libc_hidden_builtin_def (memset)
 # endif
 
 # if IS_IN (libc)
@@ -44,14 +46,6 @@  END(memset)
 #  undef __memset_chk
 #  define __memset_chk __memset_chk_sse2
 
-#  ifdef SHARED
-#  undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal memset calls through a PLT.
-   The speedup we get from using GPR instruction is likely eaten away
-   by the indirect call in the PLT.  */
-#  define libc_hidden_builtin_def(name) \
-	.globl __GI_memset; __GI_memset = __memset_sse2
-#  endif
 
 #  undef strong_alias
 #  define strong_alias(original, alias)
diff --git a/sysdeps/x86_64/multiarch/stpncpy-c.c b/sysdeps/x86_64/multiarch/stpncpy-c.c
index 2fde77d..7a1f1c0 100644
--- a/sysdeps/x86_64/multiarch/stpncpy-c.c
+++ b/sysdeps/x86_64/multiarch/stpncpy-c.c
@@ -1,8 +1,7 @@ 
 #define STPNCPY __stpncpy_sse2
 #ifdef SHARED
 #undef libc_hidden_def
-#define libc_hidden_def(name) \
-  __hidden_ver1 (__stpncpy_sse2, __GI___stpncpy, __stpncpy_sse2);
+#define libc_hidden_def(x)
 #endif
 
 #include "stpncpy.c"
diff --git a/sysdeps/x86_64/multiarch/stpncpy.S b/sysdeps/x86_64/multiarch/stpncpy.S
index 2698ca6..3ce2d46 100644
--- a/sysdeps/x86_64/multiarch/stpncpy.S
+++ b/sysdeps/x86_64/multiarch/stpncpy.S
@@ -5,4 +5,5 @@ 
 #define USE_AS_STRNCPY
 #include "strcpy.S"
 
+libc_hidden_builtin_def(__stpncpy)
 weak_alias (__stpncpy, stpncpy)
diff --git a/sysdeps/x86_64/multiarch/strcat.S b/sysdeps/x86_64/multiarch/strcat.S
index 44993fa..f396f8d 100644
--- a/sysdeps/x86_64/multiarch/strcat.S
+++ b/sysdeps/x86_64/multiarch/strcat.S
@@ -23,7 +23,7 @@ 
 
 #ifndef USE_AS_STRNCAT
 # ifndef STRCAT
-#  define STRCAT strcat
+#  define STRCAT __strcat
 # endif
 #endif
 
@@ -31,14 +31,10 @@ 
 # define STRCAT_SSSE3	         	__strncat_ssse3
 # define STRCAT_SSE2	            	__strncat_sse2
 # define STRCAT_SSE2_UNALIGNED    	__strncat_sse2_unaligned
-# define __GI_STRCAT	            	__GI_strncat
-# define __GI___STRCAT              __GI___strncat
 #else
 # define STRCAT_SSSE3	         	__strcat_ssse3
 # define STRCAT_SSE2	            	__strcat_sse2
 # define STRCAT_SSE2_UNALIGNED    	__strcat_sse2_unaligned
-# define __GI_STRCAT	            	__GI_strcat
-# define __GI___STRCAT              __GI___strcat
 #endif
 
 
@@ -71,17 +67,21 @@  END(STRCAT)
 # undef END
 # define END(name) \
 	cfi_endproc; .size STRCAT_SSE2, .-STRCAT_SSE2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcat calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCAT; __GI_STRCAT = STRCAT_SSE2
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
-	.globl __GI___STRCAT; __GI___STRCAT = STRCAT_SSE2
+
 #endif
 
 #ifndef USE_AS_STRNCAT
 # include "../strcat.S"
 #endif
+
+#if IS_IN (libc)
+# ifdef USE_AS_STRNCAT
+weak_alias (__strncat, strncat)
+libc_hidden_builtin_def (strncat)
+libc_hidden_builtin_def (__strncat)
+# else
+weak_alias (__strcat, strcat)
+libc_hidden_builtin_def (strcat)
+libc_hidden_builtin_def (__strcat)
+# endif
+#endif
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index af55fac..c375854 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -48,12 +48,6 @@  END(strchr)
 # undef END
 # define END(name) \
 	cfi_endproc; .size __strchr_sse2, .-__strchr_sse2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strchr calls through a PLT.
-   The speedup we get from using SSE4.2 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_strchr; __GI_strchr = __strchr_sse2
 #endif
 
 #include "../strchr.S"
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index f50f26c..9799410 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -216,12 +216,6 @@  weak_alias (__strncasecmp, strncasecmp)
 	cfi_endproc; .size __strncasecmp_sse2, .-__strncasecmp_sse2
 # endif
 
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcmp calls through a PLT.
-   The speedup we get from using SSE4.2 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCMP; __GI_STRCMP = STRCMP_SSE2
 #endif
 
 #include "../strcmp.S"
diff --git a/sysdeps/x86_64/multiarch/strcpy.S b/sysdeps/x86_64/multiarch/strcpy.S
index 9464ee8..3224e25 100644
--- a/sysdeps/x86_64/multiarch/strcpy.S
+++ b/sysdeps/x86_64/multiarch/strcpy.S
@@ -85,15 +85,6 @@  END(STRCPY)
 # undef END
 # define END(name) \
 	cfi_endproc; .size STRCPY_SSE2, .-STRCPY_SSE2
-# undef libc_hidden_builtin_def
-/* It doesn't make sense to send libc-internal strcpy calls through a PLT.
-   The speedup we get from using SSSE3 instruction is likely eaten away
-   by the indirect call in the PLT.  */
-# define libc_hidden_builtin_def(name) \
-	.globl __GI_STRCPY; __GI_STRCPY = STRCPY_SSE2
-# undef libc_hidden_def
-# define libc_hidden_def(name) \
-	.globl __GI___STRCPY; __GI___STRCPY = STRCPY_SSE2
 #endif
 
 #ifndef USE_AS_STRNCPY
diff --git a/sysdeps/x86_64/multiarch/strncat-c.c b/sysdeps/x86_64/multiarch/strncat-c.c
index a3cdbff..126e491 100644
--- a/sysdeps/x86_64/multiarch/strncat-c.c
+++ b/sysdeps/x86_64/multiarch/strncat-c.c
@@ -1,8 +1,6 @@ 
 #define STRNCAT __strncat_sse2
 #ifdef SHARED
 #undef libc_hidden_def
-#define libc_hidden_def(name) \
-  __hidden_ver1 (__strncat_sse2, __GI___strncat, __strncat_sse2);
 #endif
 
 #include "string/strncat.c"
diff --git a/sysdeps/x86_64/multiarch/strncat.S b/sysdeps/x86_64/multiarch/strncat.S
index 5c1bf41..84cd429 100644
--- a/sysdeps/x86_64/multiarch/strncat.S
+++ b/sysdeps/x86_64/multiarch/strncat.S
@@ -1,5 +1,5 @@ 
 /* Multiple versions of strncat
    All versions must be listed in ifunc-impl-list.c.  */
-#define STRCAT strncat
+#define STRCAT __strncat
 #define USE_AS_STRNCAT
 #include "strcat.S"
diff --git a/sysdeps/x86_64/multiarch/strncpy-c.c b/sysdeps/x86_64/multiarch/strncpy-c.c
index 296c32c..108c3d4 100644
--- a/sysdeps/x86_64/multiarch/strncpy-c.c
+++ b/sysdeps/x86_64/multiarch/strncpy-c.c
@@ -1,8 +1,7 @@ 
 #define STRNCPY __strncpy_sse2
 #ifdef SHARED
 #undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name) \
-  __hidden_ver1 (__strncpy_sse2, __GI_strncpy, __strncpy_sse2);
+#define libc_hidden_builtin_def(x)
 #endif
 
 #include "strncpy.c"
diff --git a/sysdeps/x86_64/multiarch/strncpy.S b/sysdeps/x86_64/multiarch/strncpy.S
index 6d87a0b..ce47aad 100644
--- a/sysdeps/x86_64/multiarch/strncpy.S
+++ b/sysdeps/x86_64/multiarch/strncpy.S
@@ -3,3 +3,5 @@ 
 #define STRCPY strncpy
 #define USE_AS_STRNCPY
 #include "strcpy.S"
+
+libc_hidden_builtin_def(strncpy)