elf: Remove -fno-tree-loop-distribute-patterns usage on dl-support

Message ID 20220808200021.763123-1-adhemerval.zanella@linaro.org
State Superseded
Headers
Series elf: Remove -fno-tree-loop-distribute-patterns usage on dl-support |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Adhemerval Zanella Aug. 8, 2022, 8 p.m. UTC
  Besides the option being gcc specific, this approach is still fragile
and not future proof since we do not if this will be the only
optimization option gcc will add that transform loops to memset
(or any libcall).

The patch add a new internal __memset_generic internal symbol on all
ports and an extra symbol redirection to header to include on TU that
required it.  The new header with the asm redirection is set per TU
because the issue is very specific for glibc on binary loading, the
default hidden_def symbol is used on default code.

Checked on x86_64-linux-gnu and aarch64-linux-gnu.
---
 benchtests/bench-bzero-large.c                |  1 +
 benchtests/bench-bzero-walk.c                 |  1 +
 benchtests/bench-bzero.c                      |  1 +
 benchtests/bench-memset-large.c               |  1 +
 benchtests/bench-memset-walk.c                |  1 +
 benchtests/bench-memset.c                     |  1 +
 elf/Makefile                                  |  5 ----
 elf/dl-support.c                              |  1 +
 elf/dl-symbol-hacks.h                         | 29 +++++++++++++++++++
 string/memset.c                               |  1 +
 sysdeps/aarch64/multiarch/rtld-memset.S       |  1 +
 sysdeps/alpha/memset.S                        |  1 +
 sysdeps/arm/memset.S                          |  1 +
 sysdeps/csky/abiv2/memset.S                   |  1 +
 sysdeps/i386/i586/memset.S                    |  1 +
 sysdeps/i386/i686/memset.S                    |  1 +
 sysdeps/i386/i686/multiarch/memset-ia32.S     |  5 ++++
 sysdeps/i386/memset.S                         |  1 +
 sysdeps/ia64/memset.S                         |  1 +
 sysdeps/mips/memset.S                         |  1 +
 sysdeps/powerpc/powerpc32/memset.S            |  1 +
 sysdeps/powerpc/powerpc32/power4/memset.S     |  1 +
 .../powerpc32/power4/multiarch/memset-ppc32.S |  1 +
 sysdeps/powerpc/powerpc32/power6/memset.S     |  1 +
 sysdeps/powerpc/powerpc32/power7/memset.S     |  1 +
 sysdeps/powerpc/powerpc64/le/power10/memset.S |  1 +
 .../powerpc64/multiarch/memset-power4.S       |  1 +
 .../powerpc64/multiarch/memset-ppc64.S        |  1 +
 sysdeps/powerpc/powerpc64/power4/memset.S     |  1 +
 sysdeps/powerpc/powerpc64/power6/memset.S     |  1 +
 sysdeps/powerpc/powerpc64/power7/memset.S     |  1 +
 sysdeps/powerpc/powerpc64/power8/memset.S     |  1 +
 sysdeps/s390/memset-z900.S                    |  1 +
 sysdeps/sh/memset.S                           |  1 +
 sysdeps/sparc/sparc32/memset.S                |  1 +
 .../sparc32/sparcv9/multiarch/memset-ultra1.S |  1 +
 sysdeps/sparc/sparc64/memset.S                |  1 +
 .../sparc/sparc64/multiarch/memset-ultra1.S   |  1 +
 sysdeps/x86_64/memset.S                       |  2 ++
 .../multiarch/memset-avx2-unaligned-erms.S    |  3 ++
 .../multiarch/memset-evex-unaligned-erms.S    |  3 ++
 .../multiarch/memset-sse2-unaligned-erms.S    |  3 ++
 42 files changed, 80 insertions(+), 5 deletions(-)
 create mode 100644 elf/dl-symbol-hacks.h
  

Comments

H.J. Lu Aug. 8, 2022, 10:16 p.m. UTC | #1
On Mon, Aug 8, 2022 at 1:00 PM Adhemerval Zanella via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> Besides the option being gcc specific, this approach is still fragile
> and not future proof since we do not if this will be the only
> optimization option gcc will add that transform loops to memset
> (or any libcall).
>
> The patch add a new internal __memset_generic internal symbol on all
> ports and an extra symbol redirection to header to include on TU that
> required it.  The new header with the asm redirection is set per TU
> because the issue is very specific for glibc on binary loading, the
> default hidden_def symbol is used on default code.
>
> Checked on x86_64-linux-gnu and aarch64-linux-gnu.
> ---
>  benchtests/bench-bzero-large.c                |  1 +
>  benchtests/bench-bzero-walk.c                 |  1 +
>  benchtests/bench-bzero.c                      |  1 +
>  benchtests/bench-memset-large.c               |  1 +
>  benchtests/bench-memset-walk.c                |  1 +
>  benchtests/bench-memset.c                     |  1 +
>  elf/Makefile                                  |  5 ----
>  elf/dl-support.c                              |  1 +
>  elf/dl-symbol-hacks.h                         | 29 +++++++++++++++++++
>  string/memset.c                               |  1 +
>  sysdeps/aarch64/multiarch/rtld-memset.S       |  1 +
>  sysdeps/alpha/memset.S                        |  1 +
>  sysdeps/arm/memset.S                          |  1 +
>  sysdeps/csky/abiv2/memset.S                   |  1 +
>  sysdeps/i386/i586/memset.S                    |  1 +
>  sysdeps/i386/i686/memset.S                    |  1 +
>  sysdeps/i386/i686/multiarch/memset-ia32.S     |  5 ++++
>  sysdeps/i386/memset.S                         |  1 +
>  sysdeps/ia64/memset.S                         |  1 +
>  sysdeps/mips/memset.S                         |  1 +
>  sysdeps/powerpc/powerpc32/memset.S            |  1 +
>  sysdeps/powerpc/powerpc32/power4/memset.S     |  1 +
>  .../powerpc32/power4/multiarch/memset-ppc32.S |  1 +
>  sysdeps/powerpc/powerpc32/power6/memset.S     |  1 +
>  sysdeps/powerpc/powerpc32/power7/memset.S     |  1 +
>  sysdeps/powerpc/powerpc64/le/power10/memset.S |  1 +
>  .../powerpc64/multiarch/memset-power4.S       |  1 +
>  .../powerpc64/multiarch/memset-ppc64.S        |  1 +
>  sysdeps/powerpc/powerpc64/power4/memset.S     |  1 +
>  sysdeps/powerpc/powerpc64/power6/memset.S     |  1 +
>  sysdeps/powerpc/powerpc64/power7/memset.S     |  1 +
>  sysdeps/powerpc/powerpc64/power8/memset.S     |  1 +
>  sysdeps/s390/memset-z900.S                    |  1 +
>  sysdeps/sh/memset.S                           |  1 +
>  sysdeps/sparc/sparc32/memset.S                |  1 +
>  .../sparc32/sparcv9/multiarch/memset-ultra1.S |  1 +
>  sysdeps/sparc/sparc64/memset.S                |  1 +
>  .../sparc/sparc64/multiarch/memset-ultra1.S   |  1 +
>  sysdeps/x86_64/memset.S                       |  2 ++
>  .../multiarch/memset-avx2-unaligned-erms.S    |  3 ++
>  .../multiarch/memset-evex-unaligned-erms.S    |  3 ++
>  .../multiarch/memset-sse2-unaligned-erms.S    |  3 ++
>  42 files changed, 80 insertions(+), 5 deletions(-)
>  create mode 100644 elf/dl-symbol-hacks.h
>
> diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
> index cfc7b81cdf..f016165d45 100644
> --- a/benchtests/bench-bzero-large.c
> +++ b/benchtests/bench-bzero-large.c
> @@ -137,6 +137,7 @@ test_main (void)
>  # define libc_hidden_builtin_def(X)
>  # define libc_hidden_def(X)
>  # define libc_hidden_weak(X)
> +# define strong_alias(X,Y)
>  # define weak_alias(X,Y)
>  # undef MEMSET
>  # define MEMSET generic_memset
> diff --git a/benchtests/bench-bzero-walk.c b/benchtests/bench-bzero-walk.c
> index 851a72b137..25a1a38fa3 100644
> --- a/benchtests/bench-bzero-walk.c
> +++ b/benchtests/bench-bzero-walk.c
> @@ -136,6 +136,7 @@ test_main (void)
>  # define libc_hidden_builtin_def(X)
>  # define libc_hidden_def(X)
>  # define libc_hidden_weak(X)
> +# define strong_alias(X,Y)
>  # define weak_alias(X,Y)
>  # undef MEMSET
>  # define MEMSET generic_memset
> diff --git a/benchtests/bench-bzero.c b/benchtests/bench-bzero.c
> index 500b7eba96..90b807c98a 100644
> --- a/benchtests/bench-bzero.c
> +++ b/benchtests/bench-bzero.c
> @@ -155,6 +155,7 @@ test_main (void)
>  # define libc_hidden_builtin_def(X)
>  # define libc_hidden_def(X)
>  # define libc_hidden_weak(X)
> +# define strong_alias(X,Y)
>  # define weak_alias(X,Y)
>  # undef MEMSET
>  # define MEMSET generic_memset
> diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
> index 0ea1e3811b..6605e2d414 100644
> --- a/benchtests/bench-memset-large.c
> +++ b/benchtests/bench-memset-large.c
> @@ -118,6 +118,7 @@ test_main (void)
>  #define libc_hidden_builtin_def(X)
>  #define libc_hidden_def(X)
>  #define libc_hidden_weak(X)
> +#define strong_alias(X,Y)
>  #define weak_alias(X,Y)
>  #undef MEMSET
>  #define MEMSET generic_memset
> diff --git a/benchtests/bench-memset-walk.c b/benchtests/bench-memset-walk.c
> index 466ee1b8d6..f7fcdf90ba 100644
> --- a/benchtests/bench-memset-walk.c
> +++ b/benchtests/bench-memset-walk.c
> @@ -116,6 +116,7 @@ test_main (void)
>  #define libc_hidden_builtin_def(X)
>  #define libc_hidden_def(X)
>  #define libc_hidden_weak(X)
> +#define strong_alias(X,Y)
>  #define weak_alias(X,Y)
>  #undef MEMSET
>  #define MEMSET generic_memset
> diff --git a/benchtests/bench-memset.c b/benchtests/bench-memset.c
> index 6c95a1e965..c801c86b73 100644
> --- a/benchtests/bench-memset.c
> +++ b/benchtests/bench-memset.c
> @@ -147,6 +147,7 @@ test_main (void)
>  #define libc_hidden_builtin_def(X)
>  #define libc_hidden_def(X)
>  #define libc_hidden_weak(X)
> +#define strong_alias(X,Y)
>  #define weak_alias(X,Y)
>  #ifndef WIDE
>  # undef MEMSET
> diff --git a/elf/Makefile b/elf/Makefile
> index 3386f0ce77..4b992bd348 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -162,11 +162,6 @@ ifeq (yes,$(have-loop-to-function))
>  CFLAGS-rtld.c += -fno-tree-loop-distribute-patterns
>  endif
>
> -ifeq (yes,$(have-loop-to-function))
> -# Likewise, during static library startup, memset is not yet available.
> -CFLAGS-dl-support.c = -fno-tree-loop-distribute-patterns
> -endif
> -
>  # Compile rtld itself without stack protection.
>  # Also compile all routines in the static library that are elided from
>  # the shared libc because they are in libc.a in the same way.
> diff --git a/elf/dl-support.c b/elf/dl-support.c
> index 4af0b5b2ce..6d1996ae6e 100644
> --- a/elf/dl-support.c
> +++ b/elf/dl-support.c
> @@ -44,6 +44,7 @@
>  #include <dl-auxv.h>
>  #include <dl-find_object.h>
>  #include <array_length.h>
> +#include <dl-symbol-hacks.h>
>
>  extern char *__progname;
>  char **_dl_argv = &__progname; /* This is checked for some error messages.  */
> diff --git a/elf/dl-symbol-hacks.h b/elf/dl-symbol-hacks.h
> new file mode 100644
> index 0000000000..b8caccb570
> --- /dev/null
> +++ b/elf/dl-symbol-hacks.h
> @@ -0,0 +1,29 @@
> +/* Symbol rediretion for loader/static initialization code.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _DL_SYMBOL_HACKS_H
> +#define _DL_SYMBOL_HACKS_H
> +
> +/* Some compiler optimizations may transform loops into memset/memmove
> +   calls and without proper redirection it might call PLT throught
> +   ifunc without relocations being processed.  */
> +#ifndef SHARED
> +asm ("memset = __memset_generic");
> +#endif
> +
> +#endif
> diff --git a/string/memset.c b/string/memset.c
> index 1303dd7ad3..b98bad7095 100644
> --- a/string/memset.c
> +++ b/string/memset.c
> @@ -88,3 +88,4 @@ MEMSET (void *dstpp, int c, size_t len)
>    return dstpp;
>  }
>  libc_hidden_builtin_def (MEMSET)
> +strong_alias (MEMSET, __memset_generic)
> diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S
> index 7968d25e48..70591781d9 100644
> --- a/sysdeps/aarch64/multiarch/rtld-memset.S
> +++ b/sysdeps/aarch64/multiarch/rtld-memset.S
> @@ -22,4 +22,5 @@
>  #if IS_IN (rtld)
>  # define MEMSET memset
>  # include <sysdeps/aarch64/memset.S>
> +strong_alias (memset, __memset_generic)
>  #endif
> diff --git a/sysdeps/alpha/memset.S b/sysdeps/alpha/memset.S
> index 9249663d37..c5adae754b 100644
> --- a/sysdeps/alpha/memset.S
> +++ b/sysdeps/alpha/memset.S
> @@ -124,3 +124,4 @@ $done:      ret
>
>         cfi_endproc
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/arm/memset.S b/sysdeps/arm/memset.S
> index 9c05669864..9b9a3cb264 100644
> --- a/sysdeps/arm/memset.S
> +++ b/sysdeps/arm/memset.S
> @@ -66,3 +66,4 @@ ENTRY(memset)
>         DO_RET(lr)
>  END(memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/csky/abiv2/memset.S b/sysdeps/csky/abiv2/memset.S
> index 41df8e2bb8..7e23edc787 100644
> --- a/sysdeps/csky/abiv2/memset.S
> +++ b/sysdeps/csky/abiv2/memset.S
> @@ -96,3 +96,4 @@ END (memset)
>
>  libc_hidden_builtin_def (memset)
>  .weak memset
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/i386/i586/memset.S b/sysdeps/i386/i586/memset.S
> index 672af41398..9d8dd40ad5 100644
> --- a/sysdeps/i386/i586/memset.S
> +++ b/sysdeps/i386/i586/memset.S
> @@ -101,3 +101,4 @@ L(2):       shrl    $2, %ecx        /* convert byte count to longword count */
>         ret
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
> index 3cb86c016d..3462cb99ab 100644
> --- a/sysdeps/i386/i686/memset.S
> +++ b/sysdeps/i386/i686/memset.S
> @@ -77,3 +77,4 @@ ENTRY (memset)
>         ret
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/i386/i686/multiarch/memset-ia32.S b/sysdeps/i386/i686/multiarch/memset-ia32.S
> index 8f0ae56cec..9a1b9c2b05 100644
> --- a/sysdeps/i386/i686/multiarch/memset-ia32.S
> +++ b/sysdeps/i386/i686/multiarch/memset-ia32.S
> @@ -31,3 +31,8 @@
>  #endif
>
>  #include <sysdeps/i386/i686/memset.S>
> +#if IS_IN(rtld)
> +strong_alias (memset, __memset_generic)
> +#else
> +strong_alias (__memset_ia32, __memset_generic)
> +#endif
> diff --git a/sysdeps/i386/memset.S b/sysdeps/i386/memset.S
> index db2753eb2f..c105090f03 100644
> --- a/sysdeps/i386/memset.S
> +++ b/sysdeps/i386/memset.S
> @@ -54,3 +54,4 @@ ENTRY (memset)
>  END (memset)
>
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/ia64/memset.S b/sysdeps/ia64/memset.S
> index d52f23dd93..70ad1140b8 100644
> --- a/sysdeps/ia64/memset.S
> +++ b/sysdeps/ia64/memset.S
> @@ -395,3 +395,4 @@ store_words:
>  ;; }
>  END(memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/mips/memset.S b/sysdeps/mips/memset.S
> index c5ffab1d8c..cb5c9a140f 100644
> --- a/sysdeps/mips/memset.S
> +++ b/sysdeps/mips/memset.S
> @@ -424,3 +424,4 @@ END(MEMSET_NAME)
>  libc_hidden_builtin_def (MEMSET_NAME)
>  # endif
>  #endif
> +strong_alias(MEMSET_NAME, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
> index c125934f55..63ad3d2d2b 100644
> --- a/sysdeps/powerpc/powerpc32/memset.S
> +++ b/sysdeps/powerpc/powerpc32/memset.S
> @@ -302,3 +302,4 @@ L(handletail32):
>
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/power4/memset.S b/sysdeps/powerpc/powerpc32/power4/memset.S
> index 40b140c841..98811c1a66 100644
> --- a/sysdeps/powerpc/powerpc32/power4/memset.S
> +++ b/sysdeps/powerpc/powerpc32/power4/memset.S
> @@ -224,3 +224,4 @@ L(medium_28t):
>         blr
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S b/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S
> index 0937ba2215..4c5724bc60 100644
> --- a/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S
> +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S
> @@ -39,3 +39,4 @@
>  #endif
>
>  #include <sysdeps/powerpc/powerpc32/power4/memset.S>
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/power6/memset.S b/sysdeps/powerpc/powerpc32/power6/memset.S
> index d86701ff7c..5f4c8518bd 100644
> --- a/sysdeps/powerpc/powerpc32/power6/memset.S
> +++ b/sysdeps/powerpc/powerpc32/power6/memset.S
> @@ -537,3 +537,4 @@ L(medium_28t):
>         blr
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/power7/memset.S b/sysdeps/powerpc/powerpc32/power7/memset.S
> index 368e8b3939..68a6ec18d8 100644
> --- a/sysdeps/powerpc/powerpc32/power7/memset.S
> +++ b/sysdeps/powerpc/powerpc32/power7/memset.S
> @@ -428,3 +428,4 @@ L(small):
>
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/le/power10/memset.S b/sysdeps/powerpc/powerpc64/le/power10/memset.S
> index 0f43b002bf..551e221fd2 100644
> --- a/sysdeps/powerpc/powerpc64/le/power10/memset.S
> +++ b/sysdeps/powerpc/powerpc64/le/power10/memset.S
> @@ -242,3 +242,4 @@ L(bcdz_tail):
>
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S b/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S
> index 4ee567c6f9..451de98f08 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S
> +++ b/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S
> @@ -22,3 +22,4 @@
>  #define libc_hidden_builtin_def(name)
>
>  #include <sysdeps/powerpc/powerpc64/power4/memset.S>
> +strong_alias (__memset_power4, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S
> index 30b25ef15f..91b9775edd 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S
> +++ b/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S
> @@ -26,3 +26,4 @@
>  #endif
>
>  #include <sysdeps/powerpc/powerpc64/memset.S>
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/power4/memset.S b/sysdeps/powerpc/powerpc64/power4/memset.S
> index 0f14a5198a..91557f0442 100644
> --- a/sysdeps/powerpc/powerpc64/power4/memset.S
> +++ b/sysdeps/powerpc/powerpc64/power4/memset.S
> @@ -237,3 +237,4 @@ L(medium_28t):
>         blr
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/power6/memset.S b/sysdeps/powerpc/powerpc64/power6/memset.S
> index 140a756348..29ec7f200f 100644
> --- a/sysdeps/powerpc/powerpc64/power6/memset.S
> +++ b/sysdeps/powerpc/powerpc64/power6/memset.S
> @@ -381,3 +381,4 @@ L(medium_28t):
>         blr
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/power7/memset.S b/sysdeps/powerpc/powerpc64/power7/memset.S
> index 358199a805..1afeaf754d 100644
> --- a/sysdeps/powerpc/powerpc64/power7/memset.S
> +++ b/sysdeps/powerpc/powerpc64/power7/memset.S
> @@ -384,3 +384,4 @@ L(small):
>
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/power8/memset.S b/sysdeps/powerpc/powerpc64/power8/memset.S
> index 70cace14ef..73676cdb88 100644
> --- a/sysdeps/powerpc/powerpc64/power8/memset.S
> +++ b/sysdeps/powerpc/powerpc64/power8/memset.S
> @@ -504,3 +504,4 @@ L(LE7_tail5):
>
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/s390/memset-z900.S b/sysdeps/s390/memset-z900.S
> index 7adb466bb1..beecfa2815 100644
> --- a/sysdeps/s390/memset-z900.S
> +++ b/sysdeps/s390/memset-z900.S
> @@ -185,3 +185,4 @@ strong_alias (MEMSET_DEFAULT, memset)
>     Compare to libc_hidden_builtin_def (memset) in string/memset.c.  */
>  strong_alias (MEMSET_DEFAULT, __GI_memset)
>  #endif
> +strong_alias (MEMSET_DEFAULT, __memset_generic)
> diff --git a/sysdeps/sh/memset.S b/sysdeps/sh/memset.S
> index 7852b472eb..fc36bc836a 100644
> --- a/sysdeps/sh/memset.S
> +++ b/sysdeps/sh/memset.S
> @@ -84,3 +84,4 @@ L_byte_exit:
>         mov     r7,r0
>  END(memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/sparc/sparc32/memset.S b/sysdeps/sparc/sparc32/memset.S
> index b1b67cb2d1..5026275239 100644
> --- a/sysdeps/sparc/sparc32/memset.S
> +++ b/sysdeps/sparc/sparc32/memset.S
> @@ -142,4 +142,5 @@ ENTRY(memset)
>         b               4b
>          sub            %o0, %o2, %o0
>  END(memset)
> +strong_alias (memset, __memset_generic)
>  libc_hidden_builtin_def (memset)
> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
> index 2dda6f1ed6..e5082fa161 100644
> --- a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
> +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
> @@ -27,3 +27,4 @@
>  # define memset  __memset_ultra1
>  # include <sysdeps/sparc/sparc32/sparcv9/memset.S>
>  #endif
> +strong_alias (__memset_ultra1, __memset_generic)
> diff --git a/sysdeps/sparc/sparc64/memset.S b/sysdeps/sparc/sparc64/memset.S
> index 33ecbc93fe..6a68ab52cb 100644
> --- a/sysdeps/sparc/sparc64/memset.S
> +++ b/sysdeps/sparc/sparc64/memset.S
> @@ -303,4 +303,5 @@ ENTRY(memset)
>  0:     retl
>          mov            %o5, %o0
>  END(memset)
> +strong_alias (memset, __memset_generic)
>  libc_hidden_builtin_def (memset)
> diff --git a/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
> index 3c3add791e..478a106c83 100644
> --- a/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
> +++ b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
> @@ -27,3 +27,4 @@
>  # define memset  __memset_ultra1
>  # include <sysdeps/sparc/sparc64/memset.S>
>  #endif
> +strong_alias (__memset_ultra1, __memset_generic)
> diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
> index f4e1bab601..52fb00b17b 100644
> --- a/sysdeps/x86_64/memset.S
> +++ b/sysdeps/x86_64/memset.S
> @@ -25,6 +25,7 @@
>  #define WMEMSET_SYMBOL(p,s)    __wmemset
>  #define WMEMSET_CHK_SYMBOL(p,s) p
>
> +#define NO_MULTIARCH
>  #define DEFAULT_IMPL_V1        "multiarch/memset-sse2-unaligned-erms.S"
>  #define DEFAULT_IMPL_V3        "multiarch/memset-avx2-unaligned-erms.S"
>  #define DEFAULT_IMPL_V4        "multiarch/memset-evex-unaligned-erms.S"
> @@ -32,6 +33,7 @@
>  #include "isa-default-impl.h"
>
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
>
>  #if IS_IN (libc)
>  libc_hidden_def (__wmemset)
> diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
> index a9054a9122..c2da716d7c 100644
> --- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
> @@ -38,4 +38,7 @@
>
>  # define USE_XMM_LESS_VEC
>  # include "memset-vec-unaligned-erms.S"
> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 3
> +strong_alias (__memset_avx2_unaligned, __memset_generic)
> +# endif
>  #endif
> diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> index ac4b2d2d50..034d19b35e 100644
> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> @@ -44,4 +44,7 @@
>
>  # define USE_LESS_VEC_MASK_STORE       1
>  # include "memset-vec-unaligned-erms.S"
> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
> +strong_alias (__memset_evex_unaligned, __memset_generic)
> +# endif
>  #endif

This will define 2 __memset_generic for -march=x86-64-v4.

> diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
> index 44f9b8888b..143055adff 100644
> --- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
> @@ -63,5 +63,8 @@
>  # endif
>
>  # include "memset-vec-unaligned-erms.S"
> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL <= 2
> +strong_alias (__memset_sse2_unaligned, __memset_generic)
> +# endif
>
>  #endif
> --
> 2.34.1
>
  
Adhemerval Zanella Aug. 9, 2022, 12:11 p.m. UTC | #2
On 08/08/22 19:16, H.J. Lu wrote:

>> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>> @@ -44,4 +44,7 @@
>>
>>  # define USE_LESS_VEC_MASK_STORE       1
>>  # include "memset-vec-unaligned-erms.S"
>> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
>> +strong_alias (__memset_evex_unaligned, __memset_generic)
>> +# endif
>>  #endif
> 
> This will define 2 __memset_generic for -march=x86-64-v4.

Are you sure? Check with a x86_64 build with -march=x86-64-v4 I am seeing:

$ readelf -Ws string/memset*.os | grep -w __memset_generic | wc -l
1

And it is indeed provided only by string/memset-evex-unaligned-erms.os.
  
H.J. Lu Aug. 9, 2022, 6:31 p.m. UTC | #3
On Tue, Aug 9, 2022 at 5:11 AM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 08/08/22 19:16, H.J. Lu wrote:
>
> >> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> >> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> >> @@ -44,4 +44,7 @@
> >>
> >>  # define USE_LESS_VEC_MASK_STORE       1
> >>  # include "memset-vec-unaligned-erms.S"
> >> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
> >> +strong_alias (__memset_evex_unaligned, __memset_generic)
> >> +# endif
> >>  #endif
> >
> > This will define 2 __memset_generic for -march=x86-64-v4.
>
> Are you sure? Check with a x86_64 build with -march=x86-64-v4 I am seeing:
>
> $ readelf -Ws string/memset*.os | grep -w __memset_generic | wc -l
> 1
>
> And it is indeed provided only by string/memset-evex-unaligned-erms.os.

You are right.  Since dl-symbol-hacks.h defines memset to an alias,
should it be moved to sysdes/generic and each arch can provide a
suitable alias?
  
Adhemerval Zanella Aug. 9, 2022, 6:48 p.m. UTC | #4
On 09/08/22 15:31, H.J. Lu wrote:
> On Tue, Aug 9, 2022 at 5:11 AM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 08/08/22 19:16, H.J. Lu wrote:
>>
>>>> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>>>> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>>>> @@ -44,4 +44,7 @@
>>>>
>>>>  # define USE_LESS_VEC_MASK_STORE       1
>>>>  # include "memset-vec-unaligned-erms.S"
>>>> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
>>>> +strong_alias (__memset_evex_unaligned, __memset_generic)
>>>> +# endif
>>>>  #endif
>>>
>>> This will define 2 __memset_generic for -march=x86-64-v4.
>>
>> Are you sure? Check with a x86_64 build with -march=x86-64-v4 I am seeing:
>>
>> $ readelf -Ws string/memset*.os | grep -w __memset_generic | wc -l
>> 1
>>
>> And it is indeed provided only by string/memset-evex-unaligned-erms.os.
> 
> You are right.  Since dl-symbol-hacks.h defines memset to an alias,
> should it be moved to sysdes/generic and each arch can provide a
> suitable alias?

The dl-symbol-hacks.h is meant to be used solely by loader code (that's
why I have added to elf/), but I don't have a strong preference.
  
H.J. Lu Aug. 9, 2022, 7:06 p.m. UTC | #5
On Tue, Aug 9, 2022 at 11:48 AM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 09/08/22 15:31, H.J. Lu wrote:
> > On Tue, Aug 9, 2022 at 5:11 AM Adhemerval Zanella Netto
> > <adhemerval.zanella@linaro.org> wrote:
> >>
> >>
> >>
> >> On 08/08/22 19:16, H.J. Lu wrote:
> >>
> >>>> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> >>>> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> >>>> @@ -44,4 +44,7 @@
> >>>>
> >>>>  # define USE_LESS_VEC_MASK_STORE       1
> >>>>  # include "memset-vec-unaligned-erms.S"
> >>>> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
> >>>> +strong_alias (__memset_evex_unaligned, __memset_generic)
> >>>> +# endif
> >>>>  #endif
> >>>
> >>> This will define 2 __memset_generic for -march=x86-64-v4.
> >>
> >> Are you sure? Check with a x86_64 build with -march=x86-64-v4 I am seeing:
> >>
> >> $ readelf -Ws string/memset*.os | grep -w __memset_generic | wc -l
> >> 1
> >>
> >> And it is indeed provided only by string/memset-evex-unaligned-erms.os.
> >
> > You are right.  Since dl-symbol-hacks.h defines memset to an alias,
> > should it be moved to sysdes/generic and each arch can provide a
> > suitable alias?
>
> The dl-symbol-hacks.h is meant to be used solely by loader code (that's
> why I have added to elf/), but I don't have a strong preference.

The default should be no alias.  Only IFUNC targets need to define a
proper alias.
  
Adhemerval Zanella Aug. 9, 2022, 7:32 p.m. UTC | #6
On 09/08/22 16:06, H.J. Lu wrote:
> On Tue, Aug 9, 2022 at 11:48 AM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 09/08/22 15:31, H.J. Lu wrote:
>>> On Tue, Aug 9, 2022 at 5:11 AM Adhemerval Zanella Netto
>>> <adhemerval.zanella@linaro.org> wrote:
>>>>
>>>>
>>>>
>>>> On 08/08/22 19:16, H.J. Lu wrote:
>>>>
>>>>>> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>>>>>> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>>>>>> @@ -44,4 +44,7 @@
>>>>>>
>>>>>>  # define USE_LESS_VEC_MASK_STORE       1
>>>>>>  # include "memset-vec-unaligned-erms.S"
>>>>>> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
>>>>>> +strong_alias (__memset_evex_unaligned, __memset_generic)
>>>>>> +# endif
>>>>>>  #endif
>>>>>
>>>>> This will define 2 __memset_generic for -march=x86-64-v4.
>>>>
>>>> Are you sure? Check with a x86_64 build with -march=x86-64-v4 I am seeing:
>>>>
>>>> $ readelf -Ws string/memset*.os | grep -w __memset_generic | wc -l
>>>> 1
>>>>
>>>> And it is indeed provided only by string/memset-evex-unaligned-erms.os.
>>>
>>> You are right.  Since dl-symbol-hacks.h defines memset to an alias,
>>> should it be moved to sysdes/generic and each arch can provide a
>>> suitable alias?
>>
>> The dl-symbol-hacks.h is meant to be used solely by loader code (that's
>> why I have added to elf/), but I don't have a strong preference.
> 
> The default should be no alias.  Only IFUNC targets need to define a
> proper alias.

Alright, although I added the alias on all targets to avoid adding another
hook to specify whether memset is implemented by IFUNC (to enable the
symbol redirection instead of use the default symbol name).  I am not sure
if making arch-specific will simplify the required code here.
  
H.J. Lu Aug. 9, 2022, 7:34 p.m. UTC | #7
On Tue, Aug 9, 2022 at 12:32 PM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 09/08/22 16:06, H.J. Lu wrote:
> > On Tue, Aug 9, 2022 at 11:48 AM Adhemerval Zanella Netto
> > <adhemerval.zanella@linaro.org> wrote:
> >>
> >>
> >>
> >> On 09/08/22 15:31, H.J. Lu wrote:
> >>> On Tue, Aug 9, 2022 at 5:11 AM Adhemerval Zanella Netto
> >>> <adhemerval.zanella@linaro.org> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 08/08/22 19:16, H.J. Lu wrote:
> >>>>
> >>>>>> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> >>>>>> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> >>>>>> @@ -44,4 +44,7 @@
> >>>>>>
> >>>>>>  # define USE_LESS_VEC_MASK_STORE       1
> >>>>>>  # include "memset-vec-unaligned-erms.S"
> >>>>>> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
> >>>>>> +strong_alias (__memset_evex_unaligned, __memset_generic)
> >>>>>> +# endif
> >>>>>>  #endif
> >>>>>
> >>>>> This will define 2 __memset_generic for -march=x86-64-v4.
> >>>>
> >>>> Are you sure? Check with a x86_64 build with -march=x86-64-v4 I am seeing:
> >>>>
> >>>> $ readelf -Ws string/memset*.os | grep -w __memset_generic | wc -l
> >>>> 1
> >>>>
> >>>> And it is indeed provided only by string/memset-evex-unaligned-erms.os.
> >>>
> >>> You are right.  Since dl-symbol-hacks.h defines memset to an alias,
> >>> should it be moved to sysdes/generic and each arch can provide a
> >>> suitable alias?
> >>
> >> The dl-symbol-hacks.h is meant to be used solely by loader code (that's
> >> why I have added to elf/), but I don't have a strong preference.
> >
> > The default should be no alias.  Only IFUNC targets need to define a
> > proper alias.
>
> Alright, although I added the alias on all targets to avoid adding another
> hook to specify whether memset is implemented by IFUNC (to enable the
> symbol redirection instead of use the default symbol name).  I am not sure
> if making arch-specific will simplify the required code here.

But your patch adds alias to all targets.
  
Adhemerval Zanella Aug. 9, 2022, 7:40 p.m. UTC | #8
On 09/08/22 16:34, H.J. Lu wrote:
> On Tue, Aug 9, 2022 at 12:32 PM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 09/08/22 16:06, H.J. Lu wrote:
>>> On Tue, Aug 9, 2022 at 11:48 AM Adhemerval Zanella Netto
>>> <adhemerval.zanella@linaro.org> wrote:
>>>>
>>>>
>>>>
>>>> On 09/08/22 15:31, H.J. Lu wrote:
>>>>> On Tue, Aug 9, 2022 at 5:11 AM Adhemerval Zanella Netto
>>>>> <adhemerval.zanella@linaro.org> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 08/08/22 19:16, H.J. Lu wrote:
>>>>>>
>>>>>>>> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>>>>>>>> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>>>>>>>> @@ -44,4 +44,7 @@
>>>>>>>>
>>>>>>>>  # define USE_LESS_VEC_MASK_STORE       1
>>>>>>>>  # include "memset-vec-unaligned-erms.S"
>>>>>>>> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
>>>>>>>> +strong_alias (__memset_evex_unaligned, __memset_generic)
>>>>>>>> +# endif
>>>>>>>>  #endif
>>>>>>>
>>>>>>> This will define 2 __memset_generic for -march=x86-64-v4.
>>>>>>
>>>>>> Are you sure? Check with a x86_64 build with -march=x86-64-v4 I am seeing:
>>>>>>
>>>>>> $ readelf -Ws string/memset*.os | grep -w __memset_generic | wc -l
>>>>>> 1
>>>>>>
>>>>>> And it is indeed provided only by string/memset-evex-unaligned-erms.os.
>>>>>
>>>>> You are right.  Since dl-symbol-hacks.h defines memset to an alias,
>>>>> should it be moved to sysdes/generic and each arch can provide a
>>>>> suitable alias?
>>>>
>>>> The dl-symbol-hacks.h is meant to be used solely by loader code (that's
>>>> why I have added to elf/), but I don't have a strong preference.
>>>
>>> The default should be no alias.  Only IFUNC targets need to define a
>>> proper alias.
>>
>> Alright, although I added the alias on all targets to avoid adding another
>> hook to specify whether memset is implemented by IFUNC (to enable the
>> symbol redirection instead of use the default symbol name).  I am not sure
>> if making arch-specific will simplify the required code here.
> 
> But your patch adds alias to all targets.

Yes, because otherwise we will need something like HAVE_MEMSET_IFUNC to add:

#if defined SHARED && HAVE_MEMSET_IFUNC
asm ("memset = __memset_generic");
#endif

I don't have a strong preference, although it is another this will require
to have kernel-features.h (which we usually add the HAVE_*) on elf objects.
  
H.J. Lu Aug. 9, 2022, 9:51 p.m. UTC | #9
On Tue, Aug 9, 2022 at 12:40 PM Adhemerval Zanella Netto
<adhemerval.zanella@linaro.org> wrote:
>
>
>
> On 09/08/22 16:34, H.J. Lu wrote:
> > On Tue, Aug 9, 2022 at 12:32 PM Adhemerval Zanella Netto
> > <adhemerval.zanella@linaro.org> wrote:
> >>
> >>
> >>
> >> On 09/08/22 16:06, H.J. Lu wrote:
> >>> On Tue, Aug 9, 2022 at 11:48 AM Adhemerval Zanella Netto
> >>> <adhemerval.zanella@linaro.org> wrote:
> >>>>
> >>>>
> >>>>
> >>>> On 09/08/22 15:31, H.J. Lu wrote:
> >>>>> On Tue, Aug 9, 2022 at 5:11 AM Adhemerval Zanella Netto
> >>>>> <adhemerval.zanella@linaro.org> wrote:
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>> On 08/08/22 19:16, H.J. Lu wrote:
> >>>>>>
> >>>>>>>> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> >>>>>>>> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> >>>>>>>> @@ -44,4 +44,7 @@
> >>>>>>>>
> >>>>>>>>  # define USE_LESS_VEC_MASK_STORE       1
> >>>>>>>>  # include "memset-vec-unaligned-erms.S"
> >>>>>>>> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
> >>>>>>>> +strong_alias (__memset_evex_unaligned, __memset_generic)
> >>>>>>>> +# endif
> >>>>>>>>  #endif
> >>>>>>>
> >>>>>>> This will define 2 __memset_generic for -march=x86-64-v4.
> >>>>>>
> >>>>>> Are you sure? Check with a x86_64 build with -march=x86-64-v4 I am seeing:
> >>>>>>
> >>>>>> $ readelf -Ws string/memset*.os | grep -w __memset_generic | wc -l
> >>>>>> 1
> >>>>>>
> >>>>>> And it is indeed provided only by string/memset-evex-unaligned-erms.os.
> >>>>>
> >>>>> You are right.  Since dl-symbol-hacks.h defines memset to an alias,
> >>>>> should it be moved to sysdes/generic and each arch can provide a
> >>>>> suitable alias?
> >>>>
> >>>> The dl-symbol-hacks.h is meant to be used solely by loader code (that's
> >>>> why I have added to elf/), but I don't have a strong preference.
> >>>
> >>> The default should be no alias.  Only IFUNC targets need to define a
> >>> proper alias.
> >>
> >> Alright, although I added the alias on all targets to avoid adding another
> >> hook to specify whether memset is implemented by IFUNC (to enable the
> >> symbol redirection instead of use the default symbol name).  I am not sure
> >> if making arch-specific will simplify the required code here.
> >
> > But your patch adds alias to all targets.
>
> Yes, because otherwise we will need something like HAVE_MEMSET_IFUNC to add:
>
> #if defined SHARED && HAVE_MEMSET_IFUNC
> asm ("memset = __memset_generic");
> #endif
>
> I don't have a strong preference, although it is another this will require
> to have kernel-features.h (which we usually add the HAVE_*) on elf objects.

There are

elf/Makefile:CFLAGS-dl-tunables.c += -fno-tree-loop-distribute-patterns
elf/Makefile:CFLAGS-rtld.c += -fno-tree-loop-distribute-patterns
elf/Makefile:CFLAGS-dl-support.c = -fno-tree-loop-distribute-patterns
include/libc-symbols.h:    __attribute__ ((__optimize__
("-fno-tree-loop-distribute-patterns")))
string/test-string.h:    __attribute__ ((__optimize__
("-fno-tree-loop-distribute-patterns")))

There should be no check on SHARED.   It can be something like

#define SYMBOL_STR1(s) #s
#define SYMBOL_STR(s) SYMBOL_STR1(s)

#ifdef DEFAULT_MEMSET
asm ("memset = " _SYMBOL_STR (DEFAULT_MEMSET));
#endif
  
Noah Goldstein Aug. 10, 2022, 3:37 a.m. UTC | #10
On Tue, Aug 9, 2022 at 4:00 AM Adhemerval Zanella via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> Besides the option being gcc specific, this approach is still fragile
> and not future proof since we do not if this will be the only
> optimization option gcc will add that transform loops to memset
> (or any libcall).
>
> The patch add a new internal __memset_generic internal symbol on all
> ports and an extra symbol redirection to header to include on TU that
> required it.  The new header with the asm redirection is set per TU
> because the issue is very specific for glibc on binary loading, the
> default hidden_def symbol is used on default code.
>
> Checked on x86_64-linux-gnu and aarch64-linux-gnu.
> ---
>  benchtests/bench-bzero-large.c                |  1 +
>  benchtests/bench-bzero-walk.c                 |  1 +
>  benchtests/bench-bzero.c                      |  1 +
>  benchtests/bench-memset-large.c               |  1 +
>  benchtests/bench-memset-walk.c                |  1 +
>  benchtests/bench-memset.c                     |  1 +
>  elf/Makefile                                  |  5 ----
>  elf/dl-support.c                              |  1 +
>  elf/dl-symbol-hacks.h                         | 29 +++++++++++++++++++
>  string/memset.c                               |  1 +
>  sysdeps/aarch64/multiarch/rtld-memset.S       |  1 +
>  sysdeps/alpha/memset.S                        |  1 +
>  sysdeps/arm/memset.S                          |  1 +
>  sysdeps/csky/abiv2/memset.S                   |  1 +
>  sysdeps/i386/i586/memset.S                    |  1 +
>  sysdeps/i386/i686/memset.S                    |  1 +
>  sysdeps/i386/i686/multiarch/memset-ia32.S     |  5 ++++
>  sysdeps/i386/memset.S                         |  1 +
>  sysdeps/ia64/memset.S                         |  1 +
>  sysdeps/mips/memset.S                         |  1 +
>  sysdeps/powerpc/powerpc32/memset.S            |  1 +
>  sysdeps/powerpc/powerpc32/power4/memset.S     |  1 +
>  .../powerpc32/power4/multiarch/memset-ppc32.S |  1 +
>  sysdeps/powerpc/powerpc32/power6/memset.S     |  1 +
>  sysdeps/powerpc/powerpc32/power7/memset.S     |  1 +
>  sysdeps/powerpc/powerpc64/le/power10/memset.S |  1 +
>  .../powerpc64/multiarch/memset-power4.S       |  1 +
>  .../powerpc64/multiarch/memset-ppc64.S        |  1 +
>  sysdeps/powerpc/powerpc64/power4/memset.S     |  1 +
>  sysdeps/powerpc/powerpc64/power6/memset.S     |  1 +
>  sysdeps/powerpc/powerpc64/power7/memset.S     |  1 +
>  sysdeps/powerpc/powerpc64/power8/memset.S     |  1 +
>  sysdeps/s390/memset-z900.S                    |  1 +
>  sysdeps/sh/memset.S                           |  1 +
>  sysdeps/sparc/sparc32/memset.S                |  1 +
>  .../sparc32/sparcv9/multiarch/memset-ultra1.S |  1 +
>  sysdeps/sparc/sparc64/memset.S                |  1 +
>  .../sparc/sparc64/multiarch/memset-ultra1.S   |  1 +
>  sysdeps/x86_64/memset.S                       |  2 ++
>  .../multiarch/memset-avx2-unaligned-erms.S    |  3 ++
>  .../multiarch/memset-evex-unaligned-erms.S    |  3 ++
>  .../multiarch/memset-sse2-unaligned-erms.S    |  3 ++
>  42 files changed, 80 insertions(+), 5 deletions(-)
>  create mode 100644 elf/dl-symbol-hacks.h
>
> diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
> index cfc7b81cdf..f016165d45 100644
> --- a/benchtests/bench-bzero-large.c
> +++ b/benchtests/bench-bzero-large.c
> @@ -137,6 +137,7 @@ test_main (void)
>  # define libc_hidden_builtin_def(X)
>  # define libc_hidden_def(X)
>  # define libc_hidden_weak(X)
> +# define strong_alias(X,Y)
>  # define weak_alias(X,Y)
>  # undef MEMSET
>  # define MEMSET generic_memset
> diff --git a/benchtests/bench-bzero-walk.c b/benchtests/bench-bzero-walk.c
> index 851a72b137..25a1a38fa3 100644
> --- a/benchtests/bench-bzero-walk.c
> +++ b/benchtests/bench-bzero-walk.c
> @@ -136,6 +136,7 @@ test_main (void)
>  # define libc_hidden_builtin_def(X)
>  # define libc_hidden_def(X)
>  # define libc_hidden_weak(X)
> +# define strong_alias(X,Y)
>  # define weak_alias(X,Y)
>  # undef MEMSET
>  # define MEMSET generic_memset
> diff --git a/benchtests/bench-bzero.c b/benchtests/bench-bzero.c
> index 500b7eba96..90b807c98a 100644
> --- a/benchtests/bench-bzero.c
> +++ b/benchtests/bench-bzero.c
> @@ -155,6 +155,7 @@ test_main (void)
>  # define libc_hidden_builtin_def(X)
>  # define libc_hidden_def(X)
>  # define libc_hidden_weak(X)
> +# define strong_alias(X,Y)
>  # define weak_alias(X,Y)
>  # undef MEMSET
>  # define MEMSET generic_memset
> diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
> index 0ea1e3811b..6605e2d414 100644
> --- a/benchtests/bench-memset-large.c
> +++ b/benchtests/bench-memset-large.c
> @@ -118,6 +118,7 @@ test_main (void)
>  #define libc_hidden_builtin_def(X)
>  #define libc_hidden_def(X)
>  #define libc_hidden_weak(X)
> +#define strong_alias(X,Y)
>  #define weak_alias(X,Y)
>  #undef MEMSET
>  #define MEMSET generic_memset
> diff --git a/benchtests/bench-memset-walk.c b/benchtests/bench-memset-walk.c
> index 466ee1b8d6..f7fcdf90ba 100644
> --- a/benchtests/bench-memset-walk.c
> +++ b/benchtests/bench-memset-walk.c
> @@ -116,6 +116,7 @@ test_main (void)
>  #define libc_hidden_builtin_def(X)
>  #define libc_hidden_def(X)
>  #define libc_hidden_weak(X)
> +#define strong_alias(X,Y)
>  #define weak_alias(X,Y)
>  #undef MEMSET
>  #define MEMSET generic_memset
> diff --git a/benchtests/bench-memset.c b/benchtests/bench-memset.c
> index 6c95a1e965..c801c86b73 100644
> --- a/benchtests/bench-memset.c
> +++ b/benchtests/bench-memset.c
> @@ -147,6 +147,7 @@ test_main (void)
>  #define libc_hidden_builtin_def(X)
>  #define libc_hidden_def(X)
>  #define libc_hidden_weak(X)
> +#define strong_alias(X,Y)
>  #define weak_alias(X,Y)
>  #ifndef WIDE
>  # undef MEMSET
> diff --git a/elf/Makefile b/elf/Makefile
> index 3386f0ce77..4b992bd348 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -162,11 +162,6 @@ ifeq (yes,$(have-loop-to-function))
>  CFLAGS-rtld.c += -fno-tree-loop-distribute-patterns
>  endif
>
> -ifeq (yes,$(have-loop-to-function))
> -# Likewise, during static library startup, memset is not yet available.
> -CFLAGS-dl-support.c = -fno-tree-loop-distribute-patterns
> -endif
> -
>  # Compile rtld itself without stack protection.
>  # Also compile all routines in the static library that are elided from
>  # the shared libc because they are in libc.a in the same way.
> diff --git a/elf/dl-support.c b/elf/dl-support.c
> index 4af0b5b2ce..6d1996ae6e 100644
> --- a/elf/dl-support.c
> +++ b/elf/dl-support.c
> @@ -44,6 +44,7 @@
>  #include <dl-auxv.h>
>  #include <dl-find_object.h>
>  #include <array_length.h>
> +#include <dl-symbol-hacks.h>
>
>  extern char *__progname;
>  char **_dl_argv = &__progname; /* This is checked for some error messages.  */
> diff --git a/elf/dl-symbol-hacks.h b/elf/dl-symbol-hacks.h
> new file mode 100644
> index 0000000000..b8caccb570
> --- /dev/null
> +++ b/elf/dl-symbol-hacks.h
> @@ -0,0 +1,29 @@
> +/* Symbol rediretion for loader/static initialization code.
> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _DL_SYMBOL_HACKS_H
> +#define _DL_SYMBOL_HACKS_H
> +
> +/* Some compiler optimizations may transform loops into memset/memmove
> +   calls and without proper redirection it might call PLT throught
> +   ifunc without relocations being processed.  */
> +#ifndef SHARED
> +asm ("memset = __memset_generic");
> +#endif
> +
> +#endif
> diff --git a/string/memset.c b/string/memset.c
> index 1303dd7ad3..b98bad7095 100644
> --- a/string/memset.c
> +++ b/string/memset.c
> @@ -88,3 +88,4 @@ MEMSET (void *dstpp, int c, size_t len)
>    return dstpp;
>  }
>  libc_hidden_builtin_def (MEMSET)
> +strong_alias (MEMSET, __memset_generic)
> diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S
> index 7968d25e48..70591781d9 100644
> --- a/sysdeps/aarch64/multiarch/rtld-memset.S
> +++ b/sysdeps/aarch64/multiarch/rtld-memset.S
> @@ -22,4 +22,5 @@
>  #if IS_IN (rtld)
>  # define MEMSET memset
>  # include <sysdeps/aarch64/memset.S>
> +strong_alias (memset, __memset_generic)
>  #endif
> diff --git a/sysdeps/alpha/memset.S b/sysdeps/alpha/memset.S
> index 9249663d37..c5adae754b 100644
> --- a/sysdeps/alpha/memset.S
> +++ b/sysdeps/alpha/memset.S
> @@ -124,3 +124,4 @@ $done:      ret
>
>         cfi_endproc
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/arm/memset.S b/sysdeps/arm/memset.S
> index 9c05669864..9b9a3cb264 100644
> --- a/sysdeps/arm/memset.S
> +++ b/sysdeps/arm/memset.S
> @@ -66,3 +66,4 @@ ENTRY(memset)
>         DO_RET(lr)
>  END(memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/csky/abiv2/memset.S b/sysdeps/csky/abiv2/memset.S
> index 41df8e2bb8..7e23edc787 100644
> --- a/sysdeps/csky/abiv2/memset.S
> +++ b/sysdeps/csky/abiv2/memset.S
> @@ -96,3 +96,4 @@ END (memset)
>
>  libc_hidden_builtin_def (memset)
>  .weak memset
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/i386/i586/memset.S b/sysdeps/i386/i586/memset.S
> index 672af41398..9d8dd40ad5 100644
> --- a/sysdeps/i386/i586/memset.S
> +++ b/sysdeps/i386/i586/memset.S
> @@ -101,3 +101,4 @@ L(2):       shrl    $2, %ecx        /* convert byte count to longword count */
>         ret
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
> index 3cb86c016d..3462cb99ab 100644
> --- a/sysdeps/i386/i686/memset.S
> +++ b/sysdeps/i386/i686/memset.S
> @@ -77,3 +77,4 @@ ENTRY (memset)
>         ret
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/i386/i686/multiarch/memset-ia32.S b/sysdeps/i386/i686/multiarch/memset-ia32.S
> index 8f0ae56cec..9a1b9c2b05 100644
> --- a/sysdeps/i386/i686/multiarch/memset-ia32.S
> +++ b/sysdeps/i386/i686/multiarch/memset-ia32.S
> @@ -31,3 +31,8 @@
>  #endif
>
>  #include <sysdeps/i386/i686/memset.S>
> +#if IS_IN(rtld)
> +strong_alias (memset, __memset_generic)
> +#else
> +strong_alias (__memset_ia32, __memset_generic)
> +#endif
> diff --git a/sysdeps/i386/memset.S b/sysdeps/i386/memset.S
> index db2753eb2f..c105090f03 100644
> --- a/sysdeps/i386/memset.S
> +++ b/sysdeps/i386/memset.S
> @@ -54,3 +54,4 @@ ENTRY (memset)
>  END (memset)
>
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/ia64/memset.S b/sysdeps/ia64/memset.S
> index d52f23dd93..70ad1140b8 100644
> --- a/sysdeps/ia64/memset.S
> +++ b/sysdeps/ia64/memset.S
> @@ -395,3 +395,4 @@ store_words:
>  ;; }
>  END(memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/mips/memset.S b/sysdeps/mips/memset.S
> index c5ffab1d8c..cb5c9a140f 100644
> --- a/sysdeps/mips/memset.S
> +++ b/sysdeps/mips/memset.S
> @@ -424,3 +424,4 @@ END(MEMSET_NAME)
>  libc_hidden_builtin_def (MEMSET_NAME)
>  # endif
>  #endif
> +strong_alias(MEMSET_NAME, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
> index c125934f55..63ad3d2d2b 100644
> --- a/sysdeps/powerpc/powerpc32/memset.S
> +++ b/sysdeps/powerpc/powerpc32/memset.S
> @@ -302,3 +302,4 @@ L(handletail32):
>
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/power4/memset.S b/sysdeps/powerpc/powerpc32/power4/memset.S
> index 40b140c841..98811c1a66 100644
> --- a/sysdeps/powerpc/powerpc32/power4/memset.S
> +++ b/sysdeps/powerpc/powerpc32/power4/memset.S
> @@ -224,3 +224,4 @@ L(medium_28t):
>         blr
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S b/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S
> index 0937ba2215..4c5724bc60 100644
> --- a/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S
> +++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S
> @@ -39,3 +39,4 @@
>  #endif
>
>  #include <sysdeps/powerpc/powerpc32/power4/memset.S>
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/power6/memset.S b/sysdeps/powerpc/powerpc32/power6/memset.S
> index d86701ff7c..5f4c8518bd 100644
> --- a/sysdeps/powerpc/powerpc32/power6/memset.S
> +++ b/sysdeps/powerpc/powerpc32/power6/memset.S
> @@ -537,3 +537,4 @@ L(medium_28t):
>         blr
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc32/power7/memset.S b/sysdeps/powerpc/powerpc32/power7/memset.S
> index 368e8b3939..68a6ec18d8 100644
> --- a/sysdeps/powerpc/powerpc32/power7/memset.S
> +++ b/sysdeps/powerpc/powerpc32/power7/memset.S
> @@ -428,3 +428,4 @@ L(small):
>
>  END (memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/le/power10/memset.S b/sysdeps/powerpc/powerpc64/le/power10/memset.S
> index 0f43b002bf..551e221fd2 100644
> --- a/sysdeps/powerpc/powerpc64/le/power10/memset.S
> +++ b/sysdeps/powerpc/powerpc64/le/power10/memset.S
> @@ -242,3 +242,4 @@ L(bcdz_tail):
>
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S b/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S
> index 4ee567c6f9..451de98f08 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S
> +++ b/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S
> @@ -22,3 +22,4 @@
>  #define libc_hidden_builtin_def(name)
>
>  #include <sysdeps/powerpc/powerpc64/power4/memset.S>
> +strong_alias (__memset_power4, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S
> index 30b25ef15f..91b9775edd 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S
> +++ b/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S
> @@ -26,3 +26,4 @@
>  #endif
>
>  #include <sysdeps/powerpc/powerpc64/memset.S>
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/power4/memset.S b/sysdeps/powerpc/powerpc64/power4/memset.S
> index 0f14a5198a..91557f0442 100644
> --- a/sysdeps/powerpc/powerpc64/power4/memset.S
> +++ b/sysdeps/powerpc/powerpc64/power4/memset.S
> @@ -237,3 +237,4 @@ L(medium_28t):
>         blr
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/power6/memset.S b/sysdeps/powerpc/powerpc64/power6/memset.S
> index 140a756348..29ec7f200f 100644
> --- a/sysdeps/powerpc/powerpc64/power6/memset.S
> +++ b/sysdeps/powerpc/powerpc64/power6/memset.S
> @@ -381,3 +381,4 @@ L(medium_28t):
>         blr
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/power7/memset.S b/sysdeps/powerpc/powerpc64/power7/memset.S
> index 358199a805..1afeaf754d 100644
> --- a/sysdeps/powerpc/powerpc64/power7/memset.S
> +++ b/sysdeps/powerpc/powerpc64/power7/memset.S
> @@ -384,3 +384,4 @@ L(small):
>
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/powerpc/powerpc64/power8/memset.S b/sysdeps/powerpc/powerpc64/power8/memset.S
> index 70cace14ef..73676cdb88 100644
> --- a/sysdeps/powerpc/powerpc64/power8/memset.S
> +++ b/sysdeps/powerpc/powerpc64/power8/memset.S
> @@ -504,3 +504,4 @@ L(LE7_tail5):
>
>  END_GEN_TB (MEMSET,TB_TOCLESS)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/s390/memset-z900.S b/sysdeps/s390/memset-z900.S
> index 7adb466bb1..beecfa2815 100644
> --- a/sysdeps/s390/memset-z900.S
> +++ b/sysdeps/s390/memset-z900.S
> @@ -185,3 +185,4 @@ strong_alias (MEMSET_DEFAULT, memset)
>     Compare to libc_hidden_builtin_def (memset) in string/memset.c.  */
>  strong_alias (MEMSET_DEFAULT, __GI_memset)
>  #endif
> +strong_alias (MEMSET_DEFAULT, __memset_generic)
> diff --git a/sysdeps/sh/memset.S b/sysdeps/sh/memset.S
> index 7852b472eb..fc36bc836a 100644
> --- a/sysdeps/sh/memset.S
> +++ b/sysdeps/sh/memset.S
> @@ -84,3 +84,4 @@ L_byte_exit:
>         mov     r7,r0
>  END(memset)
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
> diff --git a/sysdeps/sparc/sparc32/memset.S b/sysdeps/sparc/sparc32/memset.S
> index b1b67cb2d1..5026275239 100644
> --- a/sysdeps/sparc/sparc32/memset.S
> +++ b/sysdeps/sparc/sparc32/memset.S
> @@ -142,4 +142,5 @@ ENTRY(memset)
>         b               4b
>          sub            %o0, %o2, %o0
>  END(memset)
> +strong_alias (memset, __memset_generic)
>  libc_hidden_builtin_def (memset)
> diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
> index 2dda6f1ed6..e5082fa161 100644
> --- a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
> +++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
> @@ -27,3 +27,4 @@
>  # define memset  __memset_ultra1
>  # include <sysdeps/sparc/sparc32/sparcv9/memset.S>
>  #endif
> +strong_alias (__memset_ultra1, __memset_generic)
> diff --git a/sysdeps/sparc/sparc64/memset.S b/sysdeps/sparc/sparc64/memset.S
> index 33ecbc93fe..6a68ab52cb 100644
> --- a/sysdeps/sparc/sparc64/memset.S
> +++ b/sysdeps/sparc/sparc64/memset.S
> @@ -303,4 +303,5 @@ ENTRY(memset)
>  0:     retl
>          mov            %o5, %o0
>  END(memset)
> +strong_alias (memset, __memset_generic)
>  libc_hidden_builtin_def (memset)
> diff --git a/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
> index 3c3add791e..478a106c83 100644
> --- a/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
> +++ b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
> @@ -27,3 +27,4 @@
>  # define memset  __memset_ultra1
>  # include <sysdeps/sparc/sparc64/memset.S>
>  #endif
> +strong_alias (__memset_ultra1, __memset_generic)
> diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
> index f4e1bab601..52fb00b17b 100644
> --- a/sysdeps/x86_64/memset.S
> +++ b/sysdeps/x86_64/memset.S
> @@ -25,6 +25,7 @@
>  #define WMEMSET_SYMBOL(p,s)    __wmemset
>  #define WMEMSET_CHK_SYMBOL(p,s) p
>
> +#define NO_MULTIARCH
>  #define DEFAULT_IMPL_V1        "multiarch/memset-sse2-unaligned-erms.S"
>  #define DEFAULT_IMPL_V3        "multiarch/memset-avx2-unaligned-erms.S"
>  #define DEFAULT_IMPL_V4        "multiarch/memset-evex-unaligned-erms.S"
> @@ -32,6 +33,7 @@
>  #include "isa-default-impl.h"
>
>  libc_hidden_builtin_def (memset)
> +strong_alias (memset, __memset_generic)
>
>  #if IS_IN (libc)
>  libc_hidden_def (__wmemset)
> diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
> index a9054a9122..c2da716d7c 100644
> --- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
> @@ -38,4 +38,7 @@
>
>  # define USE_XMM_LESS_VEC
>  # include "memset-vec-unaligned-erms.S"
> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 3
> +strong_alias (__memset_avx2_unaligned, __memset_generic)
> +# endif
>  #endif
> diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> index ac4b2d2d50..034d19b35e 100644
> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
> @@ -44,4 +44,7 @@
>
>  # define USE_LESS_VEC_MASK_STORE       1
>  # include "memset-vec-unaligned-erms.S"
> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
> +strong_alias (__memset_evex_unaligned, __memset_generic)
> +# endif

Instead of adding aliases in each file can't you just add one alias
at the end of multiarch/rtld-memset.S?

Might be cleaner. Generally prefer keeping all the alias stuff
out of the implementation files if possible.
>  #endif
> diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
> index 44f9b8888b..143055adff 100644
> --- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
> @@ -63,5 +63,8 @@
>  # endif
>
>  # include "memset-vec-unaligned-erms.S"
> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL <= 2
> +strong_alias (__memset_sse2_unaligned, __memset_generic)
> +# endif
>
>  #endif
> --
> 2.34.1
>
  
Adhemerval Zanella Aug. 10, 2022, 1:04 p.m. UTC | #11
On 10/08/22 00:37, Noah Goldstein wrote:
>>  #endif
>> diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>> index ac4b2d2d50..034d19b35e 100644
>> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>> @@ -44,4 +44,7 @@
>>
>>  # define USE_LESS_VEC_MASK_STORE       1
>>  # include "memset-vec-unaligned-erms.S"
>> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
>> +strong_alias (__memset_evex_unaligned, __memset_generic)
>> +# endif
> 
> Instead of adding aliases in each file can't you just add one alias
> at the end of multiarch/rtld-memset.S?
> 
> Might be cleaner. Generally prefer keeping all the alias stuff
> out of the implementation files if possible.

The problem is not really the loader since we do not support calling ifunc
from it (that's why multiple ports have rtld-* overrides), the issue is the
initialization for static where either functions call are not possible 
(dl-tunables.c) or ifunc calls are not possible (dl-support.c).

In any case, I am experiment with H.J. suggestion to alias to a per-arch
symbol instead of add a generic one.
  
Adhemerval Zanella Aug. 10, 2022, 1:12 p.m. UTC | #12
On 09/08/22 18:51, H.J. Lu wrote:
> On Tue, Aug 9, 2022 at 12:40 PM Adhemerval Zanella Netto
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>>
>> On 09/08/22 16:34, H.J. Lu wrote:
>>> On Tue, Aug 9, 2022 at 12:32 PM Adhemerval Zanella Netto
>>> <adhemerval.zanella@linaro.org> wrote:
>>>>
>>>>
>>>>
>>>> On 09/08/22 16:06, H.J. Lu wrote:
>>>>> On Tue, Aug 9, 2022 at 11:48 AM Adhemerval Zanella Netto
>>>>> <adhemerval.zanella@linaro.org> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 09/08/22 15:31, H.J. Lu wrote:
>>>>>>> On Tue, Aug 9, 2022 at 5:11 AM Adhemerval Zanella Netto
>>>>>>> <adhemerval.zanella@linaro.org> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 08/08/22 19:16, H.J. Lu wrote:
>>>>>>>>
>>>>>>>>>> --- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>>>>>>>>>> +++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
>>>>>>>>>> @@ -44,4 +44,7 @@
>>>>>>>>>>
>>>>>>>>>>  # define USE_LESS_VEC_MASK_STORE       1
>>>>>>>>>>  # include "memset-vec-unaligned-erms.S"
>>>>>>>>>> +# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
>>>>>>>>>> +strong_alias (__memset_evex_unaligned, __memset_generic)
>>>>>>>>>> +# endif
>>>>>>>>>>  #endif
>>>>>>>>>
>>>>>>>>> This will define 2 __memset_generic for -march=x86-64-v4.
>>>>>>>>
>>>>>>>> Are you sure? Check with a x86_64 build with -march=x86-64-v4 I am seeing:
>>>>>>>>
>>>>>>>> $ readelf -Ws string/memset*.os | grep -w __memset_generic | wc -l
>>>>>>>> 1
>>>>>>>>
>>>>>>>> And it is indeed provided only by string/memset-evex-unaligned-erms.os.
>>>>>>>
>>>>>>> You are right.  Since dl-symbol-hacks.h defines memset to an alias,
>>>>>>> should it be moved to sysdes/generic and each arch can provide a
>>>>>>> suitable alias?
>>>>>>
>>>>>> The dl-symbol-hacks.h is meant to be used solely by loader code (that's
>>>>>> why I have added to elf/), but I don't have a strong preference.
>>>>>
>>>>> The default should be no alias.  Only IFUNC targets need to define a
>>>>> proper alias.
>>>>
>>>> Alright, although I added the alias on all targets to avoid adding another
>>>> hook to specify whether memset is implemented by IFUNC (to enable the
>>>> symbol redirection instead of use the default symbol name).  I am not sure
>>>> if making arch-specific will simplify the required code here.
>>>
>>> But your patch adds alias to all targets.
>>
>> Yes, because otherwise we will need something like HAVE_MEMSET_IFUNC to add:
>>
>> #if defined SHARED && HAVE_MEMSET_IFUNC
>> asm ("memset = __memset_generic");
>> #endif
>>
>> I don't have a strong preference, although it is another this will require
>> to have kernel-features.h (which we usually add the HAVE_*) on elf objects.
> 
> There are
> 
> elf/Makefile:CFLAGS-dl-tunables.c += -fno-tree-loop-distribute-patterns
> elf/Makefile:CFLAGS-rtld.c += -fno-tree-loop-distribute-patterns

Unfortunately we can't really remove these two without refactoring the code to
avoid the memset constructions because functions call are not really allowed.

For dl-tunables.c I am not sure if we actually need to filter out the invalid
options to create a new tunable, since we already do not set them for glibc
itself.  The program will see the original value, but it show not matter since
glibc would already have handle it anyway and it might avoid the need to strdup 
the GLIBC_TUNABLES.

> elf/Makefile:CFLAGS-dl-support.c = -fno-tree-loop-distribute-patterns
> include/libc-symbols.h:    __attribute__ ((__optimize__
> ("-fno-tree-loop-distribute-patterns")))

We need this to void the memset recursion when gcc see the memset implementation
as memset itself.

> string/test-string.h:    __attribute__ ((__optimize__
> ("-fno-tree-loop-distribute-patterns")))
> 
> There should be no check on SHARED.   It can be something like
> 
> #define SYMBOL_STR1(s) #s
> #define SYMBOL_STR(s) SYMBOL_STR1(s)
> 
> #ifdef DEFAULT_MEMSET
> asm ("memset = " _SYMBOL_STR (DEFAULT_MEMSET));
> #endif

Yeah, it could work.  I will redo based on this, it should simplify the changes
for other ports.
  

Patch

diff --git a/benchtests/bench-bzero-large.c b/benchtests/bench-bzero-large.c
index cfc7b81cdf..f016165d45 100644
--- a/benchtests/bench-bzero-large.c
+++ b/benchtests/bench-bzero-large.c
@@ -137,6 +137,7 @@  test_main (void)
 # define libc_hidden_builtin_def(X)
 # define libc_hidden_def(X)
 # define libc_hidden_weak(X)
+# define strong_alias(X,Y)
 # define weak_alias(X,Y)
 # undef MEMSET
 # define MEMSET generic_memset
diff --git a/benchtests/bench-bzero-walk.c b/benchtests/bench-bzero-walk.c
index 851a72b137..25a1a38fa3 100644
--- a/benchtests/bench-bzero-walk.c
+++ b/benchtests/bench-bzero-walk.c
@@ -136,6 +136,7 @@  test_main (void)
 # define libc_hidden_builtin_def(X)
 # define libc_hidden_def(X)
 # define libc_hidden_weak(X)
+# define strong_alias(X,Y)
 # define weak_alias(X,Y)
 # undef MEMSET
 # define MEMSET generic_memset
diff --git a/benchtests/bench-bzero.c b/benchtests/bench-bzero.c
index 500b7eba96..90b807c98a 100644
--- a/benchtests/bench-bzero.c
+++ b/benchtests/bench-bzero.c
@@ -155,6 +155,7 @@  test_main (void)
 # define libc_hidden_builtin_def(X)
 # define libc_hidden_def(X)
 # define libc_hidden_weak(X)
+# define strong_alias(X,Y)
 # define weak_alias(X,Y)
 # undef MEMSET
 # define MEMSET generic_memset
diff --git a/benchtests/bench-memset-large.c b/benchtests/bench-memset-large.c
index 0ea1e3811b..6605e2d414 100644
--- a/benchtests/bench-memset-large.c
+++ b/benchtests/bench-memset-large.c
@@ -118,6 +118,7 @@  test_main (void)
 #define libc_hidden_builtin_def(X)
 #define libc_hidden_def(X)
 #define libc_hidden_weak(X)
+#define strong_alias(X,Y)
 #define weak_alias(X,Y)
 #undef MEMSET
 #define MEMSET generic_memset
diff --git a/benchtests/bench-memset-walk.c b/benchtests/bench-memset-walk.c
index 466ee1b8d6..f7fcdf90ba 100644
--- a/benchtests/bench-memset-walk.c
+++ b/benchtests/bench-memset-walk.c
@@ -116,6 +116,7 @@  test_main (void)
 #define libc_hidden_builtin_def(X)
 #define libc_hidden_def(X)
 #define libc_hidden_weak(X)
+#define strong_alias(X,Y)
 #define weak_alias(X,Y)
 #undef MEMSET
 #define MEMSET generic_memset
diff --git a/benchtests/bench-memset.c b/benchtests/bench-memset.c
index 6c95a1e965..c801c86b73 100644
--- a/benchtests/bench-memset.c
+++ b/benchtests/bench-memset.c
@@ -147,6 +147,7 @@  test_main (void)
 #define libc_hidden_builtin_def(X)
 #define libc_hidden_def(X)
 #define libc_hidden_weak(X)
+#define strong_alias(X,Y)
 #define weak_alias(X,Y)
 #ifndef WIDE
 # undef MEMSET
diff --git a/elf/Makefile b/elf/Makefile
index 3386f0ce77..4b992bd348 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -162,11 +162,6 @@  ifeq (yes,$(have-loop-to-function))
 CFLAGS-rtld.c += -fno-tree-loop-distribute-patterns
 endif
 
-ifeq (yes,$(have-loop-to-function))
-# Likewise, during static library startup, memset is not yet available.
-CFLAGS-dl-support.c = -fno-tree-loop-distribute-patterns
-endif
-
 # Compile rtld itself without stack protection.
 # Also compile all routines in the static library that are elided from
 # the shared libc because they are in libc.a in the same way.
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 4af0b5b2ce..6d1996ae6e 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -44,6 +44,7 @@ 
 #include <dl-auxv.h>
 #include <dl-find_object.h>
 #include <array_length.h>
+#include <dl-symbol-hacks.h>
 
 extern char *__progname;
 char **_dl_argv = &__progname;	/* This is checked for some error messages.  */
diff --git a/elf/dl-symbol-hacks.h b/elf/dl-symbol-hacks.h
new file mode 100644
index 0000000000..b8caccb570
--- /dev/null
+++ b/elf/dl-symbol-hacks.h
@@ -0,0 +1,29 @@ 
+/* Symbol rediretion for loader/static initialization code.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_SYMBOL_HACKS_H
+#define _DL_SYMBOL_HACKS_H
+
+/* Some compiler optimizations may transform loops into memset/memmove
+   calls and without proper redirection it might call PLT throught
+   ifunc without relocations being processed.  */
+#ifndef SHARED
+asm ("memset = __memset_generic");
+#endif
+
+#endif
diff --git a/string/memset.c b/string/memset.c
index 1303dd7ad3..b98bad7095 100644
--- a/string/memset.c
+++ b/string/memset.c
@@ -88,3 +88,4 @@  MEMSET (void *dstpp, int c, size_t len)
   return dstpp;
 }
 libc_hidden_builtin_def (MEMSET)
+strong_alias (MEMSET, __memset_generic)
diff --git a/sysdeps/aarch64/multiarch/rtld-memset.S b/sysdeps/aarch64/multiarch/rtld-memset.S
index 7968d25e48..70591781d9 100644
--- a/sysdeps/aarch64/multiarch/rtld-memset.S
+++ b/sysdeps/aarch64/multiarch/rtld-memset.S
@@ -22,4 +22,5 @@ 
 #if IS_IN (rtld)
 # define MEMSET memset
 # include <sysdeps/aarch64/memset.S>
+strong_alias (memset, __memset_generic)
 #endif
diff --git a/sysdeps/alpha/memset.S b/sysdeps/alpha/memset.S
index 9249663d37..c5adae754b 100644
--- a/sysdeps/alpha/memset.S
+++ b/sysdeps/alpha/memset.S
@@ -124,3 +124,4 @@  $done:	ret
 
 	cfi_endproc
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/arm/memset.S b/sysdeps/arm/memset.S
index 9c05669864..9b9a3cb264 100644
--- a/sysdeps/arm/memset.S
+++ b/sysdeps/arm/memset.S
@@ -66,3 +66,4 @@  ENTRY(memset)
 	DO_RET(lr)
 END(memset)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/csky/abiv2/memset.S b/sysdeps/csky/abiv2/memset.S
index 41df8e2bb8..7e23edc787 100644
--- a/sysdeps/csky/abiv2/memset.S
+++ b/sysdeps/csky/abiv2/memset.S
@@ -96,3 +96,4 @@  END (memset)
 
 libc_hidden_builtin_def (memset)
 .weak memset
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/i386/i586/memset.S b/sysdeps/i386/i586/memset.S
index 672af41398..9d8dd40ad5 100644
--- a/sysdeps/i386/i586/memset.S
+++ b/sysdeps/i386/i586/memset.S
@@ -101,3 +101,4 @@  L(2):	shrl	$2, %ecx	/* convert byte count to longword count */
 	ret
 END (memset)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
index 3cb86c016d..3462cb99ab 100644
--- a/sysdeps/i386/i686/memset.S
+++ b/sysdeps/i386/i686/memset.S
@@ -77,3 +77,4 @@  ENTRY (memset)
 	ret
 END (memset)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/i386/i686/multiarch/memset-ia32.S b/sysdeps/i386/i686/multiarch/memset-ia32.S
index 8f0ae56cec..9a1b9c2b05 100644
--- a/sysdeps/i386/i686/multiarch/memset-ia32.S
+++ b/sysdeps/i386/i686/multiarch/memset-ia32.S
@@ -31,3 +31,8 @@ 
 #endif
 
 #include <sysdeps/i386/i686/memset.S>
+#if IS_IN(rtld)
+strong_alias (memset, __memset_generic)
+#else
+strong_alias (__memset_ia32, __memset_generic)
+#endif
diff --git a/sysdeps/i386/memset.S b/sysdeps/i386/memset.S
index db2753eb2f..c105090f03 100644
--- a/sysdeps/i386/memset.S
+++ b/sysdeps/i386/memset.S
@@ -54,3 +54,4 @@  ENTRY (memset)
 END (memset)
 
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/ia64/memset.S b/sysdeps/ia64/memset.S
index d52f23dd93..70ad1140b8 100644
--- a/sysdeps/ia64/memset.S
+++ b/sysdeps/ia64/memset.S
@@ -395,3 +395,4 @@  store_words:
 ;; }
 END(memset)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/mips/memset.S b/sysdeps/mips/memset.S
index c5ffab1d8c..cb5c9a140f 100644
--- a/sysdeps/mips/memset.S
+++ b/sysdeps/mips/memset.S
@@ -424,3 +424,4 @@  END(MEMSET_NAME)
 libc_hidden_builtin_def (MEMSET_NAME)
 # endif
 #endif
+strong_alias(MEMSET_NAME, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc32/memset.S b/sysdeps/powerpc/powerpc32/memset.S
index c125934f55..63ad3d2d2b 100644
--- a/sysdeps/powerpc/powerpc32/memset.S
+++ b/sysdeps/powerpc/powerpc32/memset.S
@@ -302,3 +302,4 @@  L(handletail32):
 
 END (memset)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc32/power4/memset.S b/sysdeps/powerpc/powerpc32/power4/memset.S
index 40b140c841..98811c1a66 100644
--- a/sysdeps/powerpc/powerpc32/power4/memset.S
+++ b/sysdeps/powerpc/powerpc32/power4/memset.S
@@ -224,3 +224,4 @@  L(medium_28t):
 	blr
 END (memset)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S b/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S
index 0937ba2215..4c5724bc60 100644
--- a/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S
+++ b/sysdeps/powerpc/powerpc32/power4/multiarch/memset-ppc32.S
@@ -39,3 +39,4 @@ 
 #endif
 
 #include <sysdeps/powerpc/powerpc32/power4/memset.S>
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc32/power6/memset.S b/sysdeps/powerpc/powerpc32/power6/memset.S
index d86701ff7c..5f4c8518bd 100644
--- a/sysdeps/powerpc/powerpc32/power6/memset.S
+++ b/sysdeps/powerpc/powerpc32/power6/memset.S
@@ -537,3 +537,4 @@  L(medium_28t):
 	blr
 END (memset)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc32/power7/memset.S b/sysdeps/powerpc/powerpc32/power7/memset.S
index 368e8b3939..68a6ec18d8 100644
--- a/sysdeps/powerpc/powerpc32/power7/memset.S
+++ b/sysdeps/powerpc/powerpc32/power7/memset.S
@@ -428,3 +428,4 @@  L(small):
 
 END (memset)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memset.S b/sysdeps/powerpc/powerpc64/le/power10/memset.S
index 0f43b002bf..551e221fd2 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memset.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memset.S
@@ -242,3 +242,4 @@  L(bcdz_tail):
 
 END_GEN_TB (MEMSET,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S b/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S
index 4ee567c6f9..451de98f08 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S
+++ b/sysdeps/powerpc/powerpc64/multiarch/memset-power4.S
@@ -22,3 +22,4 @@ 
 #define libc_hidden_builtin_def(name)
 
 #include <sysdeps/powerpc/powerpc64/power4/memset.S>
+strong_alias (__memset_power4, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S b/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S
index 30b25ef15f..91b9775edd 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S
+++ b/sysdeps/powerpc/powerpc64/multiarch/memset-ppc64.S
@@ -26,3 +26,4 @@ 
 #endif
 
 #include <sysdeps/powerpc/powerpc64/memset.S>
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc64/power4/memset.S b/sysdeps/powerpc/powerpc64/power4/memset.S
index 0f14a5198a..91557f0442 100644
--- a/sysdeps/powerpc/powerpc64/power4/memset.S
+++ b/sysdeps/powerpc/powerpc64/power4/memset.S
@@ -237,3 +237,4 @@  L(medium_28t):
 	blr
 END_GEN_TB (MEMSET,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc64/power6/memset.S b/sysdeps/powerpc/powerpc64/power6/memset.S
index 140a756348..29ec7f200f 100644
--- a/sysdeps/powerpc/powerpc64/power6/memset.S
+++ b/sysdeps/powerpc/powerpc64/power6/memset.S
@@ -381,3 +381,4 @@  L(medium_28t):
 	blr
 END_GEN_TB (MEMSET,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc64/power7/memset.S b/sysdeps/powerpc/powerpc64/power7/memset.S
index 358199a805..1afeaf754d 100644
--- a/sysdeps/powerpc/powerpc64/power7/memset.S
+++ b/sysdeps/powerpc/powerpc64/power7/memset.S
@@ -384,3 +384,4 @@  L(small):
 
 END_GEN_TB (MEMSET,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/powerpc/powerpc64/power8/memset.S b/sysdeps/powerpc/powerpc64/power8/memset.S
index 70cace14ef..73676cdb88 100644
--- a/sysdeps/powerpc/powerpc64/power8/memset.S
+++ b/sysdeps/powerpc/powerpc64/power8/memset.S
@@ -504,3 +504,4 @@  L(LE7_tail5):
 
 END_GEN_TB (MEMSET,TB_TOCLESS)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/s390/memset-z900.S b/sysdeps/s390/memset-z900.S
index 7adb466bb1..beecfa2815 100644
--- a/sysdeps/s390/memset-z900.S
+++ b/sysdeps/s390/memset-z900.S
@@ -185,3 +185,4 @@  strong_alias (MEMSET_DEFAULT, memset)
    Compare to libc_hidden_builtin_def (memset) in string/memset.c.  */
 strong_alias (MEMSET_DEFAULT, __GI_memset)
 #endif
+strong_alias (MEMSET_DEFAULT, __memset_generic)
diff --git a/sysdeps/sh/memset.S b/sysdeps/sh/memset.S
index 7852b472eb..fc36bc836a 100644
--- a/sysdeps/sh/memset.S
+++ b/sysdeps/sh/memset.S
@@ -84,3 +84,4 @@  L_byte_exit:
 	mov	r7,r0
 END(memset)
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
diff --git a/sysdeps/sparc/sparc32/memset.S b/sysdeps/sparc/sparc32/memset.S
index b1b67cb2d1..5026275239 100644
--- a/sysdeps/sparc/sparc32/memset.S
+++ b/sysdeps/sparc/sparc32/memset.S
@@ -142,4 +142,5 @@  ENTRY(memset)
 	b		4b
 	 sub		%o0, %o2, %o0
 END(memset)
+strong_alias (memset, __memset_generic)
 libc_hidden_builtin_def (memset)
diff --git a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
index 2dda6f1ed6..e5082fa161 100644
--- a/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
+++ b/sysdeps/sparc/sparc32/sparcv9/multiarch/memset-ultra1.S
@@ -27,3 +27,4 @@ 
 # define memset  __memset_ultra1
 # include <sysdeps/sparc/sparc32/sparcv9/memset.S>
 #endif
+strong_alias (__memset_ultra1, __memset_generic)
diff --git a/sysdeps/sparc/sparc64/memset.S b/sysdeps/sparc/sparc64/memset.S
index 33ecbc93fe..6a68ab52cb 100644
--- a/sysdeps/sparc/sparc64/memset.S
+++ b/sysdeps/sparc/sparc64/memset.S
@@ -303,4 +303,5 @@  ENTRY(memset)
 0:	retl
 	 mov		%o5, %o0
 END(memset)
+strong_alias (memset, __memset_generic)
 libc_hidden_builtin_def (memset)
diff --git a/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
index 3c3add791e..478a106c83 100644
--- a/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
+++ b/sysdeps/sparc/sparc64/multiarch/memset-ultra1.S
@@ -27,3 +27,4 @@ 
 # define memset  __memset_ultra1
 # include <sysdeps/sparc/sparc64/memset.S>
 #endif
+strong_alias (__memset_ultra1, __memset_generic)
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index f4e1bab601..52fb00b17b 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -25,6 +25,7 @@ 
 #define WMEMSET_SYMBOL(p,s)	__wmemset
 #define WMEMSET_CHK_SYMBOL(p,s) p
 
+#define NO_MULTIARCH
 #define DEFAULT_IMPL_V1	"multiarch/memset-sse2-unaligned-erms.S"
 #define DEFAULT_IMPL_V3	"multiarch/memset-avx2-unaligned-erms.S"
 #define DEFAULT_IMPL_V4	"multiarch/memset-evex-unaligned-erms.S"
@@ -32,6 +33,7 @@ 
 #include "isa-default-impl.h"
 
 libc_hidden_builtin_def (memset)
+strong_alias (memset, __memset_generic)
 
 #if IS_IN (libc)
 libc_hidden_def (__wmemset)
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
index a9054a9122..c2da716d7c 100644
--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
@@ -38,4 +38,7 @@ 
 
 # define USE_XMM_LESS_VEC
 # include "memset-vec-unaligned-erms.S"
+# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 3
+strong_alias (__memset_avx2_unaligned, __memset_generic)
+# endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
index ac4b2d2d50..034d19b35e 100644
--- a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
@@ -44,4 +44,7 @@ 
 
 # define USE_LESS_VEC_MASK_STORE	1
 # include "memset-vec-unaligned-erms.S"
+# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL >= 4
+strong_alias (__memset_evex_unaligned, __memset_generic)
+# endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
index 44f9b8888b..143055adff 100644
--- a/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-sse2-unaligned-erms.S
@@ -63,5 +63,8 @@ 
 # endif
 
 # include "memset-vec-unaligned-erms.S"
+# if !defined NO_MULTIARCH && MINIMUM_X86_ISA_LEVEL <= 2
+strong_alias (__memset_sse2_unaligned, __memset_generic)
+# endif
 
 #endif