[2.27] x86-64: Use IFUNC memcpy and mempcpy in libc.a

Message ID 20170709145903.GA6521@gmail.com
State New, archived
Headers

Commit Message

H.J. Lu July 9, 2017, 2:59 p.m. UTC
  On Sat, Jul 08, 2017 at 04:59:04PM -0700, H.J. Lu wrote:
> On Sat, Jul 8, 2017 at 4:12 PM, Tulio Magno Quites Machado Filho
> <tuliom@linux.vnet.ibm.com> wrote:
> > "H.J. Lu" <hjl.tools@gmail.com> writes:
> >
> >> On Sat, Jul 8, 2017 at 11:31 AM, Tulio Magno Quites Machado Filho
> >> <tuliom@linux.vnet.ibm.com> wrote:
> >>> Changes since version 1:
> >>>
> >>>  - Added a testcase.  This is now validating both statically and
> >>>    dynamically linked executables.
> >>>  - Fixed an issue in the $(foreach ..) in sysdeps/powerpc/powerpc64le/Makefile.
> >>>  - Added a comment to csu/libc-start.c
> >>>  - Added a comment to csu/libc-tls.c
> >>>
> >>> -- 8< --
> >>>
> >>> The patch proposed by Peter Bergner [1] to libgc in order to fix
> >>> [BZ #21707] adds a dependency on a symbol provided by the loader,
> >>> forcing the loader to be linked to tests after libgcc was linked.
> >>>
> >>> It also requires to read the thread pointer during IRELA relocations.
> >>>
> >>> Tested on powerpc, powerpc64, powerpc64le, s390x and x86_64.
> >>>
> >>> [1] https://sourceware.org/ml/libc-alpha/2017-06/msg01383.html
> >>>
> >>> 2017-07-08  Tulio Magno Quites Machado Filho  <tuliom@linux.vnet.ibm.com>
> >>>
> >>>         [BZ #21707]
> >>>         * csu/libc-start.c (LIBC_START_MAIN): Perform IREL{,A}
> >>>         relocations after initializing the TCB on statically linked
> >>>         executables..
> >>>         * csu/libc-tls.c (__libc_setup_tls): Add a comment about
> >>>         IREL{,A} relocations.
> >>>         * elf/Makefile (tests-static-normal): Add tst-tlsifunc-static.
> >>>         (tests): Add tst-tlsifunc.
> >>>         * elf/tst-tlsifunc.c: New file.
> >>>         * elf/tst-tlsifunc-static.c: Likewise.
> >>>         * sysdeps/powerpc/powerpc64le/Makefile (f128-loader-link): New
> >>>         variable.
> >>>         [$(subdir) = math] (test-float128% test-ifloat128%): Force
> >>>         linking to the loader after linking to libgcc.
> >>>         [$(subdir) = wcsmbs stdlib] (bug-strtod bug-strtod2 bug-strtod2)
> >>>         (tst-strtod-round tst-wcstod-round tst-strtod6 tst-strrom)
> >>>         (tst-strfrom-locale strfrom-skeleton): Likewise.
> >>> ---
> >>>  csu/libc-start.c                     | 11 +++---
> >>>  csu/libc-tls.c                       |  2 ++
> >>>  elf/Makefile                         |  5 +--
> >>>  elf/tst-tlsifunc-static.c            | 19 +++++++++++
> >>>  elf/tst-tlsifunc.c                   | 66 ++++++++++++++++++++++++++++++++++++
> >>>  sysdeps/powerpc/powerpc64le/Makefile | 10 ++++++
> >>>  6 files changed, 107 insertions(+), 6 deletions(-)
> >>>  create mode 100644 elf/tst-tlsifunc-static.c
> >>>  create mode 100644 elf/tst-tlsifunc.c
> >>>
> >>> diff --git a/csu/libc-start.c b/csu/libc-start.c
> >>> index c2dd159..84b7f99 100644
> >>> --- a/csu/libc-start.c
> >>> +++ b/csu/libc-start.c
> >>> @@ -188,12 +188,15 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
> >>>
> >>>    ARCH_INIT_CPU_FEATURES ();
> >>>
> >>> -  /* Perform IREL{,A} relocations.  */
> >>> -  apply_irel ();
> >>> -
> >>>    /* The stack guard goes into the TCB, so initialize it early.  */
> >>>    __libc_setup_tls ();
> >>>
> >>> +  /* Perform IREL{,A} relocations.
> >>> +     Note: the relocations must happen after TLS initialization so that
> >>> +     IFUNC resolvers can benefit from thread-local storage, e.g. powerpc's
> >>> +     hwcap and platform fields available in the TCB.  */
> >>> +  apply_irel ();
> >>> +
> >>>    /* Set up the stack checker's canary.  */
> >>>    uintptr_t stack_chk_guard = _dl_setup_stack_chk_guard (_dl_random);
> >>>  # ifdef THREAD_SET_STACK_GUARD
> >>> @@ -224,7 +227,7 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
> >>>    __pointer_chk_guard_local = pointer_chk_guard;
> >>>  # endif
> >>>
> >>> -#endif
> >>> +#endif /* !SHARED  */
> >>>
> >>
> >> apply_irel should be called as early as possible.
> >
> > Why?  Could you elaborate, please?
> >
> 
> To use IFUNC in static executables, apply_irel should be called before
> any functions with IFUNC implementation is called.  At the moment,
> a few functions are used before apply_irel is called.  To address it,
> we can move apply_irel forward.  Call it later makes it worse.
> 

FYI, this is the patch I am submitting for glibc 2.27.

H.J.
---
Since apply_irel is called before memcpy and mempcpy are called, we
an use IFUNC memcpy and mempcpy in libc.a.

	* sysdeps/x86_64/memmove.S (MEMCPY_SYMBOL): Don't check SHARED.
	(MEMPCPY_SYMBOL): Likewise.
	* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Also include
	in libc.a.
	* sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise.
	* sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S:
	Likewise.
	* sysdeps/x86_64/multiarch/memcpy.c: Also include in libc.a.
	(__hidden_ver1): Don't use in libc.a.
	* sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
	(__mempcpy): Don't create a weak alias in libc.a.
	* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: Don't
	check SHARED.
	* sysdeps/x86_64/multiarch/mempcpy.c: Also include in libc.a.
	(__hidden_ver1): Don't use in libc.a.
---
 sysdeps/x86_64/memmove.S                           |  4 ++--
 sysdeps/x86_64/multiarch/memcpy-ssse3-back.S       |  5 +----
 sysdeps/x86_64/multiarch/memcpy-ssse3.S            |  5 +----
 sysdeps/x86_64/multiarch/memcpy.c                  |  8 ++++----
 .../multiarch/memmove-avx512-no-vzeroupper.S       |  6 ------
 .../x86_64/multiarch/memmove-sse2-unaligned-erms.S |  4 +---
 .../x86_64/multiarch/memmove-vec-unaligned-erms.S  | 22 +++++-----------------
 sysdeps/x86_64/multiarch/mempcpy.c                 |  8 ++++----
 8 files changed, 18 insertions(+), 44 deletions(-)
  

Comments

H.J. Lu Aug. 2, 2017, 3:53 p.m. UTC | #1
On Sun, Jul 9, 2017 at 7:59 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
 test-ifloat128%): Force
>
> FYI, this is the patch I am submitting for glibc 2.27.

Any comments or objections?

H.J.
> H.J.
> ---
> Since apply_irel is called before memcpy and mempcpy are called, we
> an use IFUNC memcpy and mempcpy in libc.a.
>
>         * sysdeps/x86_64/memmove.S (MEMCPY_SYMBOL): Don't check SHARED.
>         (MEMPCPY_SYMBOL): Likewise.
>         * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Also include
>         in libc.a.
>         * sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise.
>         * sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S:
>         Likewise.
>         * sysdeps/x86_64/multiarch/memcpy.c: Also include in libc.a.
>         (__hidden_ver1): Don't use in libc.a.
>         * sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
>         (__mempcpy): Don't create a weak alias in libc.a.
>         * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: Don't
>         check SHARED.
>         * sysdeps/x86_64/multiarch/mempcpy.c: Also include in libc.a.
>         (__hidden_ver1): Don't use in libc.a.
> ---
>  sysdeps/x86_64/memmove.S                           |  4 ++--
>  sysdeps/x86_64/multiarch/memcpy-ssse3-back.S       |  5 +----
>  sysdeps/x86_64/multiarch/memcpy-ssse3.S            |  5 +----
>  sysdeps/x86_64/multiarch/memcpy.c                  |  8 ++++----
>  .../multiarch/memmove-avx512-no-vzeroupper.S       |  6 ------
>  .../x86_64/multiarch/memmove-sse2-unaligned-erms.S |  4 +---
>  .../x86_64/multiarch/memmove-vec-unaligned-erms.S  | 22 +++++-----------------
>  sysdeps/x86_64/multiarch/mempcpy.c                 |  8 ++++----
>  8 files changed, 18 insertions(+), 44 deletions(-)
>
> diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S
> index 5bbae990..24efe83 100644
> --- a/sysdeps/x86_64/memmove.S
> +++ b/sysdeps/x86_64/memmove.S
> @@ -29,7 +29,7 @@
>  #define SECTION(p)             p
>
>  #ifdef USE_MULTIARCH
> -# if !defined SHARED || !IS_IN (libc)
> +# if !IS_IN (libc)
>  #  define MEMCPY_SYMBOL(p,s)           memcpy
>  # endif
>  #else
> @@ -39,7 +39,7 @@
>  #  define MEMCPY_SYMBOL(p,s)           memcpy
>  # endif
>  #endif
> -#if !defined SHARED || !defined USE_MULTIARCH || !IS_IN (libc)
> +#if !defined USE_MULTIARCH || !IS_IN (libc)
>  # define MEMPCPY_SYMBOL(p,s)           __mempcpy
>  #endif
>  #ifndef MEMMOVE_SYMBOL
> diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
> index 4e060a2..ce53993 100644
> --- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
> +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
> @@ -19,10 +19,7 @@
>
>  #include <sysdep.h>
>
> -#if IS_IN (libc) \
> -    && (defined SHARED \
> -        || defined USE_AS_MEMMOVE \
> -       || !defined USE_MULTIARCH)
> +#if IS_IN (libc)
>
>  #include "asm-syntax.h"
>
> diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
> index f3ea52a..0ac4c21 100644
> --- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
> +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
> @@ -19,10 +19,7 @@
>
>  #include <sysdep.h>
>
> -#if IS_IN (libc) \
> -    && (defined SHARED \
> -        || defined USE_AS_MEMMOVE \
> -       || !defined USE_MULTIARCH)
> +#if IS_IN (libc)
>
>  #include "asm-syntax.h"
>
> diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
> index 6a2d353..273bc61 100644
> --- a/sysdeps/x86_64/multiarch/memcpy.c
> +++ b/sysdeps/x86_64/multiarch/memcpy.c
> @@ -17,10 +17,8 @@
>     License along with the GNU C Library; if not, see
>     <http://www.gnu.org/licenses/>.  */
>
> -/* Define multiple versions only for the definition in lib and for
> -   DSO.  In static binaries we need memcpy before the initialization
> -   happened.  */
> -#if defined SHARED && IS_IN (libc)
> +/* Define multiple versions only for the definition in libc.  */
> +#if IS_IN (libc)
>  # define memcpy __redirect_memcpy
>  # include <string.h>
>  # undef memcpy
> @@ -31,8 +29,10 @@
>  libc_ifunc_redirected (__redirect_memcpy, __new_memcpy,
>                        IFUNC_SELECTOR ());
>
> +# ifdef SHARED
>  __hidden_ver1 (__new_memcpy, __GI_memcpy, __redirect_memcpy)
>    __attribute__ ((visibility ("hidden")));
> +# endif
>
>  # include <shlib-compat.h>
>  versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
> diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
> index f3ef105..7ca365a 100644
> --- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
> +++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
> @@ -23,7 +23,6 @@
>  # include "asm-syntax.h"
>
>         .section .text.avx512,"ax",@progbits
> -# if defined SHARED && !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
>  ENTRY (__mempcpy_chk_avx512_no_vzeroupper)
>         cmpq    %rdx, %rcx
>         jb      HIDDEN_JUMPTARGET (__chk_fail)
> @@ -34,14 +33,11 @@ ENTRY (__mempcpy_avx512_no_vzeroupper)
>         addq    %rdx, %rax
>         jmp     L(start)
>  END (__mempcpy_avx512_no_vzeroupper)
> -# endif
>
> -# ifdef SHARED
>  ENTRY (__memmove_chk_avx512_no_vzeroupper)
>         cmpq    %rdx, %rcx
>         jb      HIDDEN_JUMPTARGET (__chk_fail)
>  END (__memmove_chk_avx512_no_vzeroupper)
> -# endif
>
>  ENTRY (__memmove_avx512_no_vzeroupper)
>         mov     %rdi, %rax
> @@ -413,8 +409,6 @@ L(gobble_256bytes_nt_loop_bkw):
>         jmp     L(check)
>  END (__memmove_avx512_no_vzeroupper)
>
> -# ifdef SHARED
>  strong_alias (__memmove_avx512_no_vzeroupper, __memcpy_avx512_no_vzeroupper)
>  strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper)
> -# endif
>  #endif
> diff --git a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
> index 743064b..cfb604d 100644
> --- a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
> @@ -18,9 +18,7 @@
>
>  #if IS_IN (libc)
>  # define MEMMOVE_SYMBOL(p,s)   p##_sse2_##s
> -#endif
> -
> -#if !defined SHARED || !IS_IN (libc)
> +#else
>  weak_alias (__mempcpy, mempcpy)
>  #endif
>
> diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> index d694e8b..0fad756 100644
> --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> @@ -105,22 +105,20 @@
>  #endif
>
>         .section SECTION(.text),"ax",@progbits
> -#if defined SHARED && IS_IN (libc)
> +#if IS_IN (libc)
>  ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
>         cmpq    %rdx, %rcx
>         jb      HIDDEN_JUMPTARGET (__chk_fail)
>  END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
>  #endif
>
> -#if VEC_SIZE == 16 || defined SHARED
>  ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned))
>         movq    %rdi, %rax
>         addq    %rdx, %rax
>         jmp     L(start)
>  END (MEMPCPY_SYMBOL (__mempcpy, unaligned))
> -#endif
>
> -#if defined SHARED && IS_IN (libc)
> +#if IS_IN (libc)
>  ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
>         cmpq    %rdx, %rcx
>         jb      HIDDEN_JUMPTARGET (__chk_fail)
> @@ -151,7 +149,6 @@ L(nop):
>  END (MEMMOVE_SYMBOL (__memmove, unaligned))
>
>  # if VEC_SIZE == 16
> -#  if defined SHARED
>  ENTRY (__mempcpy_chk_erms)
>         cmpq    %rdx, %rcx
>         jb      HIDDEN_JUMPTARGET (__chk_fail)
> @@ -163,7 +160,6 @@ ENTRY (__mempcpy_erms)
>         addq    %rdx, %rax
>         jmp     L(start_movsb)
>  END (__mempcpy_erms)
> -#  endif
>
>  ENTRY (__memmove_chk_erms)
>         cmpq    %rdx, %rcx
> @@ -193,13 +189,10 @@ L(movsb_backward):
>         cld
>         ret
>  END (__memmove_erms)
> -#  if defined SHARED
>  strong_alias (__memmove_erms, __memcpy_erms)
>  strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
> -#  endif
>  # endif
>
> -# ifdef SHARED
>  ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
>         cmpq    %rdx, %rcx
>         jb      HIDDEN_JUMPTARGET (__chk_fail)
> @@ -215,7 +208,6 @@ ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
>         cmpq    %rdx, %rcx
>         jb      HIDDEN_JUMPTARGET (__chk_fail)
>  END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
> -# endif
>
>  ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
>         movq    %rdi, %rax
> @@ -546,19 +538,15 @@ L(loop_large_backward):
>  #endif
>  END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
>
> -#ifdef SHARED
> -# if IS_IN (libc)
> -#  ifdef USE_MULTIARCH
> +#if IS_IN (libc)
> +# ifdef USE_MULTIARCH
>  strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
>               MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
>  strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
>               MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
> -#  endif
> +# endif
>  strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned),
>               MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned))
> -# endif
>  #endif
> -#if VEC_SIZE == 16 || defined SHARED
>  strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned),
>               MEMCPY_SYMBOL (__memcpy, unaligned))
> -#endif
> diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c
> index e627b00..49e9896 100644
> --- a/sysdeps/x86_64/multiarch/mempcpy.c
> +++ b/sysdeps/x86_64/multiarch/mempcpy.c
> @@ -17,10 +17,8 @@
>     License along with the GNU C Library; if not, see
>     <http://www.gnu.org/licenses/>.  */
>
> -/* Define multiple versions only for the definition in lib and for
> -   DSO.  In static binaries we need mempcpy before the initialization
> -   happened.  */
> -#if defined SHARED && IS_IN (libc)
> +/* Define multiple versions only for the definition in libc.  */
> +#if IS_IN (libc)
>  # define mempcpy __redirect_mempcpy
>  # define __mempcpy __redirect___mempcpy
>  # define NO_MEMPCPY_STPCPY_REDIRECT
> @@ -35,8 +33,10 @@
>  libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR ());
>
>  weak_alias (__mempcpy, mempcpy)
> +# ifdef SHARED
>  __hidden_ver1 (__mempcpy, __GI___mempcpy, __redirect___mempcpy)
>    __attribute__ ((visibility ("hidden")));
>  __hidden_ver1 (mempcpy, __GI_mempcpy, __redirect_mempcpy)
>    __attribute__ ((visibility ("hidden")));
> +# endif
>  #endif
> --
> 2.9.4
>
  
Florian Weimer Aug. 2, 2017, 6:27 p.m. UTC | #2
On 08/02/2017 05:53 PM, H.J. Lu wrote:
> On Sun, Jul 9, 2017 at 7:59 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>  test-ifloat128%): Force
>>
>> FYI, this is the patch I am submitting for glibc 2.27.
> 
> Any comments or objections?

I have not reviewed if there are any static library initialization
issues caused by IFUNC use for string functions, but I agree in
principle that something like this should be possible.

Thanks,
Florian
  
H.J. Lu Aug. 2, 2017, 6:36 p.m. UTC | #3
On Wed, Aug 2, 2017 at 11:27 AM, Florian Weimer <fweimer@redhat.com> wrote:
> On 08/02/2017 05:53 PM, H.J. Lu wrote:
>> On Sun, Jul 9, 2017 at 7:59 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>>  test-ifloat128%): Force
>>>
>>> FYI, this is the patch I am submitting for glibc 2.27.
>>
>> Any comments or objections?
>
> I have not reviewed if there are any static library initialization
> issues caused by IFUNC use for string functions, but I agree in
> principle that something like this should be possible.
>

The entry point of static executable is

STATIC int
LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
int argc, char **argv,
#ifdef LIBC_START_MAIN_AUXVEC_ARG
ElfW(auxv_t) *auxvec,
#endif
__typeof (main) init,
void (*fini) (void),
void (*rtld_fini) (void), void *stack_end)
{
  /* Result of the 'main' function.  */
  int result;

  __libc_multiple_libcs = &_dl_starting_up && !_dl_starting_up;

#ifndef SHARED
  char **ev = &argv[argc + 1];

  __environ = ev;

  /* Store the lowest stack address.  This is done in ld.so if this is
     the code for the DSO.  */
  __libc_stack_end = stack_end;

# ifdef HAVE_AUX_VECTOR
  /* First process the auxiliary vector since we need to find the
     program header to locate an eventually present PT_TLS entry.  */
#  ifndef LIBC_START_MAIN_AUXVEC_ARG
  ElfW(auxv_t) *auxvec;
  {
    char **evp = ev;
    while (*evp++ != NULL)
      ;
    auxvec = (ElfW(auxv_t) *) evp;
  }
#  endif
  _dl_aux_init (auxvec);
  if (GL(dl_phdr) == NULL)
# endif
    {
      /* Starting from binutils-2.23, the linker will define the
         magic symbol __ehdr_start to point to our own ELF header
         if it is visible in a segment that also includes the phdrs.
         So we can set up _dl_phdr and _dl_phnum even without any
         information from auxv.  */

      extern const ElfW(Ehdr) __ehdr_start
__attribute__ ((weak, visibility ("hidden")));
      if (&__ehdr_start != NULL)
        {
          assert (__ehdr_start.e_phentsize == sizeof *GL(dl_phdr));
          GL(dl_phdr) = (const void *) &__ehdr_start + __ehdr_start.e_phoff;
          GL(dl_phnum) = __ehdr_start.e_phnum;
        }
    }

  /* Initialize very early so that tunables can use it.  */
  __libc_init_secure ();

  __tunables_init (__environ);

  ARCH_INIT_CPU_FEATURES ();

  /* Perform IREL{,A} relocations.  */
  ARCH_SETUP_IREL ();

If there is no memcpy/mempcpy call before ARCH_SETUP_IREL (),
we can use IFUNC memcpy and mempcpy in static executable.
Since the code path up to ARCH_SETUP_IREL () is the same for
all static executables, the static executable tests within glibc are
sufficient to verify that it is safe to do so.
  
Florian Weimer Aug. 2, 2017, 6:51 p.m. UTC | #4
On 08/02/2017 08:36 PM, H.J. Lu wrote:
> If there is no memcpy/mempcpy call before ARCH_SETUP_IREL (),
> we can use IFUNC memcpy and mempcpy in static executable.
> Since the code path up to ARCH_SETUP_IREL () is the same for
> all static executables, the static executable tests within glibc are
> sufficient to verify that it is safe to do so.

Ah, this is a good point.  Thanks.

Florian
  
H.J. Lu Aug. 4, 2017, 1:18 p.m. UTC | #5
On Wed, Aug 2, 2017 at 11:51 AM, Florian Weimer <fweimer@redhat.com> wrote:
> On 08/02/2017 08:36 PM, H.J. Lu wrote:
>> If there is no memcpy/mempcpy call before ARCH_SETUP_IREL (),
>> we can use IFUNC memcpy and mempcpy in static executable.
>> Since the code path up to ARCH_SETUP_IREL () is the same for
>> all static executables, the static executable tests within glibc are
>> sufficient to verify that it is safe to do so.
>
> Ah, this is a good point.  Thanks.
>

I am checking it in today.
  

Patch

diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S
index 5bbae990..24efe83 100644
--- a/sysdeps/x86_64/memmove.S
+++ b/sysdeps/x86_64/memmove.S
@@ -29,7 +29,7 @@ 
 #define SECTION(p)		p
 
 #ifdef USE_MULTIARCH
-# if !defined SHARED || !IS_IN (libc)
+# if !IS_IN (libc)
 #  define MEMCPY_SYMBOL(p,s)		memcpy
 # endif
 #else
@@ -39,7 +39,7 @@ 
 #  define MEMCPY_SYMBOL(p,s)		memcpy
 # endif
 #endif
-#if !defined SHARED || !defined USE_MULTIARCH || !IS_IN (libc)
+#if !defined USE_MULTIARCH || !IS_IN (libc)
 # define MEMPCPY_SYMBOL(p,s)		__mempcpy
 #endif
 #ifndef MEMMOVE_SYMBOL
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index 4e060a2..ce53993 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -19,10 +19,7 @@ 
 
 #include <sysdep.h>
 
-#if IS_IN (libc) \
-    && (defined SHARED \
-        || defined USE_AS_MEMMOVE \
-	|| !defined USE_MULTIARCH)
+#if IS_IN (libc)
 
 #include "asm-syntax.h"
 
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index f3ea52a..0ac4c21 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -19,10 +19,7 @@ 
 
 #include <sysdep.h>
 
-#if IS_IN (libc) \
-    && (defined SHARED \
-        || defined USE_AS_MEMMOVE \
-	|| !defined USE_MULTIARCH)
+#if IS_IN (libc)
 
 #include "asm-syntax.h"
 
diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
index 6a2d353..273bc61 100644
--- a/sysdeps/x86_64/multiarch/memcpy.c
+++ b/sysdeps/x86_64/multiarch/memcpy.c
@@ -17,10 +17,8 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-/* Define multiple versions only for the definition in lib and for
-   DSO.  In static binaries we need memcpy before the initialization
-   happened.  */
-#if defined SHARED && IS_IN (libc)
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
 # define memcpy __redirect_memcpy
 # include <string.h>
 # undef memcpy
@@ -31,8 +29,10 @@ 
 libc_ifunc_redirected (__redirect_memcpy, __new_memcpy,
 		       IFUNC_SELECTOR ());
 
+# ifdef SHARED
 __hidden_ver1 (__new_memcpy, __GI_memcpy, __redirect_memcpy)
   __attribute__ ((visibility ("hidden")));
+# endif
 
 # include <shlib-compat.h>
 versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
index f3ef105..7ca365a 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
@@ -23,7 +23,6 @@ 
 # include "asm-syntax.h"
 
 	.section .text.avx512,"ax",@progbits
-# if defined SHARED && !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
 ENTRY (__mempcpy_chk_avx512_no_vzeroupper)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
@@ -34,14 +33,11 @@  ENTRY (__mempcpy_avx512_no_vzeroupper)
 	addq	%rdx, %rax
 	jmp	L(start)
 END (__mempcpy_avx512_no_vzeroupper)
-# endif
 
-# ifdef SHARED
 ENTRY (__memmove_chk_avx512_no_vzeroupper)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (__memmove_chk_avx512_no_vzeroupper)
-# endif
 
 ENTRY (__memmove_avx512_no_vzeroupper)
 	mov	%rdi, %rax
@@ -413,8 +409,6 @@  L(gobble_256bytes_nt_loop_bkw):
 	jmp	L(check)
 END (__memmove_avx512_no_vzeroupper)
 
-# ifdef SHARED
 strong_alias (__memmove_avx512_no_vzeroupper, __memcpy_avx512_no_vzeroupper)
 strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper)
-# endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
index 743064b..cfb604d 100644
--- a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
@@ -18,9 +18,7 @@ 
 
 #if IS_IN (libc)
 # define MEMMOVE_SYMBOL(p,s)	p##_sse2_##s
-#endif
-
-#if !defined SHARED || !IS_IN (libc)
+#else
 weak_alias (__mempcpy, mempcpy)
 #endif
 
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index d694e8b..0fad756 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -105,22 +105,20 @@ 
 #endif
 
 	.section SECTION(.text),"ax",@progbits
-#if defined SHARED && IS_IN (libc)
+#if IS_IN (libc)
 ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned))
 #endif
 
-#if VEC_SIZE == 16 || defined SHARED
 ENTRY (MEMPCPY_SYMBOL (__mempcpy, unaligned))
 	movq	%rdi, %rax
 	addq	%rdx, %rax
 	jmp	L(start)
 END (MEMPCPY_SYMBOL (__mempcpy, unaligned))
-#endif
 
-#if defined SHARED && IS_IN (libc)
+#if IS_IN (libc)
 ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned))
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
@@ -151,7 +149,6 @@  L(nop):
 END (MEMMOVE_SYMBOL (__memmove, unaligned))
 
 # if VEC_SIZE == 16
-#  if defined SHARED
 ENTRY (__mempcpy_chk_erms)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
@@ -163,7 +160,6 @@  ENTRY (__mempcpy_erms)
 	addq	%rdx, %rax
 	jmp	L(start_movsb)
 END (__mempcpy_erms)
-#  endif
 
 ENTRY (__memmove_chk_erms)
 	cmpq	%rdx, %rcx
@@ -193,13 +189,10 @@  L(movsb_backward):
 	cld
 	ret
 END (__memmove_erms)
-#  if defined SHARED
 strong_alias (__memmove_erms, __memcpy_erms)
 strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
-#  endif
 # endif
 
-# ifdef SHARED
 ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
@@ -215,7 +208,6 @@  ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
-# endif
 
 ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
 	movq	%rdi, %rax
@@ -546,19 +538,15 @@  L(loop_large_backward):
 #endif
 END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
 
-#ifdef SHARED
-# if IS_IN (libc)
-#  ifdef USE_MULTIARCH
+#if IS_IN (libc)
+# ifdef USE_MULTIARCH
 strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
 	      MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
 strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
 	      MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
-#  endif
+# endif
 strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned),
 	      MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned))
-# endif
 #endif
-#if VEC_SIZE == 16 || defined SHARED
 strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned),
 	      MEMCPY_SYMBOL (__memcpy, unaligned))
-#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c
index e627b00..49e9896 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.c
+++ b/sysdeps/x86_64/multiarch/mempcpy.c
@@ -17,10 +17,8 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-/* Define multiple versions only for the definition in lib and for
-   DSO.  In static binaries we need mempcpy before the initialization
-   happened.  */
-#if defined SHARED && IS_IN (libc)
+/* Define multiple versions only for the definition in libc.  */
+#if IS_IN (libc)
 # define mempcpy __redirect_mempcpy
 # define __mempcpy __redirect___mempcpy
 # define NO_MEMPCPY_STPCPY_REDIRECT
@@ -35,8 +33,10 @@ 
 libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR ());
 
 weak_alias (__mempcpy, mempcpy)
+# ifdef SHARED
 __hidden_ver1 (__mempcpy, __GI___mempcpy, __redirect___mempcpy)
   __attribute__ ((visibility ("hidden")));
 __hidden_ver1 (mempcpy, __GI_mempcpy, __redirect_mempcpy)
   __attribute__ ((visibility ("hidden")));
+# endif
 #endif