RFC: Rewrite x86-64 IFUNC selector in C
Commit Message
On 28/05/2017 13:26, H.J. Lu wrote:
> On Thu, May 25, 2017 at 2:55 PM, Adhemerval Zanella
> <adhemerval.zanella@linaro.org> wrote:
>>
>>
>> On 25/05/2017 18:38, H.J. Lu wrote:
>>> On Thu, May 25, 2017 at 2:25 PM, Erich Elsen <eriche@google.com> wrote:
>>>> Ok, I'll get started then.
>>>>
>>>> Are there any general comments about the attached conversion for
>>>> memcpy? Just so I don't repeat the same wrong thing many times.
>>>
>>> You missed:
>>>
>>> /* Define multiple versions only for the definition in lib and for
>>> DSO. In static binaries we need memcpy before the initialization
>>> happened. */
>>> #if defined SHARED && IS_IN (libc)
>>>
>>> +typedef void * (*memcpy_fn)(void *, const void *, size_t);
>>> +
>>> +extern void * __memcpy_erms(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_sse2_unaligned_erms(void *dest, const void
>>> *src, size_t n);
>>> +extern void * __memcpy_ssse3(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_avx_unaligned_erms(void *dest, const void
>>> *src, size_t n);
>>> +extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n);
>>> +extern void * __memcpy_avx512_unaligned_erms(void *dest, const void
>>> *src, size_t n);
>>>
>>> Please use something similar to multiarch/strstr.c:
>>>
>>> /* Redefine strstr so that the compiler won't complain about the type
>>> mismatch with the IFUNC selector in strong_alias, below. */
>>> #undef strstr
>>> #define strstr __redirect_strstr
>>> #include <string.h>
>>> #undef strstr
>>> ...
>>> extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
>>>
>>> +/* Defined in cacheinfo.c */
>>> +extern long int __x86_shared_cache_size attribute_hidden;
>>> +extern long int __x86_shared_cache_size_half attribute_hidden;
>>> +extern long int __x86_data_cache_size attribute_hidden;
>>> +extern long int __x86_data_cache_size_half attribute_hidden;
>>> +extern long int __x86_shared_non_temporal_threshold attribute_hidden;
>>
>> It seems it will be used not only for memcpy, so I would suggest to add
>> on a common header on multiarch.
>>
>>>
>>> Remove them.
>>> static void * select_memcpy_impl(void) {
>>> + const struct cpu_features* cpu_features_struct_p = __get_cpu_features ();
>>> +
>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) {
>>> + return __memcpy_erms;
>>> + }
>>> +
>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) {
>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER))
>>> + return __memcpy_avx512_unaligned_erms;
>>> + return __memcpy_avx512_unaligned;
>>> + }
>>> +
>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) {
>>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
>>> + return __memcpy_avx_unaligned_erms;
>>> +
>>> + }
>>> + return __memcpy_avx_unaligned;
>>> + }
>>> + else {
>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) {
>>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
>>> + return __memcpy_sse2_unaligned_erms;
>>> +
>>> + }
>>> + return __memcpy_sse2_unaligned;
>>> + }
>>> + else {
>>> + if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) {
>>> + return __memcpy_sse2_unaligned;
>>> +
>>> + }
>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) {
>>> + return __memcpy_ssse3_back;
>>> +
>>> + }
>>> + return __memcpy_ssse3;
>>> + }
>>> + }
>>> +}
>>>
>>> Please
>>>
>>> 1. Fix formatting.
>>> 2. Remove unnecessary {}.
>>> 3. Don't use "else".
>>>
>>> +void *__new_memcpy(void *dest, const void *src, size_t n)
>>> + __attribute__ ((ifunc ("select_memcpy_impl")));
>>>
>>> Use "typeof" here.
>>
>> We have the libc_ifunc{_redirect} to handle the __attribute__ ((ifunc)) support
>> from compiler. I think you can use:
>>
>> # include <string.h>
>>
>> // extern __typeof (memcpy) __memcpy_<each supported one> attribute_hidden;
>>
>> static void *memcpy_selector (void)
>> {
>> // fill me.
>> }
>>
>> libc_ifunc_hidden (memcpy, memcpy, memcpy_selector);
>> libc_hidden_def (memcpy)
>
> Here is my take. It only covers memcpy and mempcpy. Please
> extend it to memmove as well as *_chk functions.
>
I think we can simplify it further and use the already existent ifunc macros on
libc-symbols.h. Also, for memmove I think we can organize the code better (at
least for ifunc) and build a extra object with a more meaningful name. I used
your logic for the ifunc selection and extended for memmove as well.
Comments
I was working on some of other functions and noticed the following
error when I ran `make check` with the original patch (I didn't try
Adhemerval's):
test-canon2.c:(.text+0x33): undefined reference to `mempcpy'
On Mon, May 29, 2017 at 1:34 PM, Adhemerval Zanella
<adhemerval.zanella@linaro.org> wrote:
>
>
> On 28/05/2017 13:26, H.J. Lu wrote:
>> On Thu, May 25, 2017 at 2:55 PM, Adhemerval Zanella
>> <adhemerval.zanella@linaro.org> wrote:
>>>
>>>
>>> On 25/05/2017 18:38, H.J. Lu wrote:
>>>> On Thu, May 25, 2017 at 2:25 PM, Erich Elsen <eriche@google.com> wrote:
>>>>> Ok, I'll get started then.
>>>>>
>>>>> Are there any general comments about the attached conversion for
>>>>> memcpy? Just so I don't repeat the same wrong thing many times.
>>>>
>>>> You missed:
>>>>
>>>> /* Define multiple versions only for the definition in lib and for
>>>> DSO. In static binaries we need memcpy before the initialization
>>>> happened. */
>>>> #if defined SHARED && IS_IN (libc)
>>>>
>>>> +typedef void * (*memcpy_fn)(void *, const void *, size_t);
>>>> +
>>>> +extern void * __memcpy_erms(void *dest, const void *src, size_t n);
>>>> +extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n);
>>>> +extern void * __memcpy_sse2_unaligned_erms(void *dest, const void
>>>> *src, size_t n);
>>>> +extern void * __memcpy_ssse3(void *dest, const void *src, size_t n);
>>>> +extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n);
>>>> +extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n);
>>>> +extern void * __memcpy_avx_unaligned_erms(void *dest, const void
>>>> *src, size_t n);
>>>> +extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n);
>>>> +extern void * __memcpy_avx512_unaligned_erms(void *dest, const void
>>>> *src, size_t n);
>>>>
>>>> Please use something similar to multiarch/strstr.c:
>>>>
>>>> /* Redefine strstr so that the compiler won't complain about the type
>>>> mismatch with the IFUNC selector in strong_alias, below. */
>>>> #undef strstr
>>>> #define strstr __redirect_strstr
>>>> #include <string.h>
>>>> #undef strstr
>>>> ...
>>>> extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
>>>>
>>>> +/* Defined in cacheinfo.c */
>>>> +extern long int __x86_shared_cache_size attribute_hidden;
>>>> +extern long int __x86_shared_cache_size_half attribute_hidden;
>>>> +extern long int __x86_data_cache_size attribute_hidden;
>>>> +extern long int __x86_data_cache_size_half attribute_hidden;
>>>> +extern long int __x86_shared_non_temporal_threshold attribute_hidden;
>>>
>>> It seems it will be used not only for memcpy, so I would suggest to add
>>> on a common header on multiarch.
>>>
>>>>
>>>> Remove them.
>>>> static void * select_memcpy_impl(void) {
>>>> + const struct cpu_features* cpu_features_struct_p = __get_cpu_features ();
>>>> +
>>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) {
>>>> + return __memcpy_erms;
>>>> + }
>>>> +
>>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) {
>>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER))
>>>> + return __memcpy_avx512_unaligned_erms;
>>>> + return __memcpy_avx512_unaligned;
>>>> + }
>>>> +
>>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) {
>>>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
>>>> + return __memcpy_avx_unaligned_erms;
>>>> +
>>>> + }
>>>> + return __memcpy_avx_unaligned;
>>>> + }
>>>> + else {
>>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) {
>>>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) {
>>>> + return __memcpy_sse2_unaligned_erms;
>>>> +
>>>> + }
>>>> + return __memcpy_sse2_unaligned;
>>>> + }
>>>> + else {
>>>> + if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) {
>>>> + return __memcpy_sse2_unaligned;
>>>> +
>>>> + }
>>>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) {
>>>> + return __memcpy_ssse3_back;
>>>> +
>>>> + }
>>>> + return __memcpy_ssse3;
>>>> + }
>>>> + }
>>>> +}
>>>>
>>>> Please
>>>>
>>>> 1. Fix formatting.
>>>> 2. Remove unnecessary {}.
>>>> 3. Don't use "else".
>>>>
>>>> +void *__new_memcpy(void *dest, const void *src, size_t n)
>>>> + __attribute__ ((ifunc ("select_memcpy_impl")));
>>>>
>>>> Use "typeof" here.
>>>
>>> We have the libc_ifunc{_redirect} to handle the __attribute__ ((ifunc)) support
>>> from compiler. I think you can use:
>>>
>>> # include <string.h>
>>>
>>> // extern __typeof (memcpy) __memcpy_<each supported one> attribute_hidden;
>>>
>>> static void *memcpy_selector (void)
>>> {
>>> // fill me.
>>> }
>>>
>>> libc_ifunc_hidden (memcpy, memcpy, memcpy_selector);
>>> libc_hidden_def (memcpy)
>>
>> Here is my take. It only covers memcpy and mempcpy. Please
>> extend it to memmove as well as *_chk functions.
>>
>
> I think we can simplify it further and use the already existent ifunc macros on
> libc-symbols.h. Also, for memmove I think we can organize the code better (at
> least for ifunc) and build a extra object with a more meaningful name. I used
> your logic for the ifunc selection and extended for memmove as well.
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 3736f54..b6179aa 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -7,6 +7,7 @@ ifeq ($(subdir),string)
> sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
> strcmp-sse2-unaligned strncmp-ssse3 \
> memcmp-sse4 memcpy-ssse3 \
> + mem-impls \
> memmove-ssse3 \
> memcpy-ssse3-back \
> memmove-ssse3-back \
> diff --git a/sysdeps/x86_64/multiarch/memmove.S b/sysdeps/x86_64/multiarch/mem-impls.S
> similarity index 52%
> rename from sysdeps/x86_64/multiarch/memmove.S
> rename to sysdeps/x86_64/multiarch/mem-impls.S
> index 8c534e8..5e74fa0 100644
> --- a/sysdeps/x86_64/multiarch/memmove.S
> +++ b/sysdeps/x86_64/multiarch/mem-impls.S
> @@ -1,6 +1,5 @@
> -/* Multiple versions of memmove
> - All versions must be listed in ifunc-impl-list.c.
> - Copyright (C) 2016-2017 Free Software Foundation, Inc.
> +/* Multiple versions of memmove, memcpy, and mempcpy.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> This file is part of the GNU C Library.
>
> The GNU C Library is free software; you can redistribute it and/or
> @@ -17,57 +16,6 @@
> License along with the GNU C Library; if not, see
> <http://www.gnu.org/licenses/>. */
>
> -#include <sysdep.h>
> -#include <init-arch.h>
> -
> -/* Define multiple versions only for the definition in lib and for
> - DSO. */
> -#if IS_IN (libc)
> - .text
> -ENTRY(__libc_memmove)
> - .type __libc_memmove, @gnu_indirect_function
> - LOAD_RTLD_GLOBAL_RO_RDX
> - lea __memmove_erms(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Prefer_ERMS)
> - jnz 2f
> - HAS_ARCH_FEATURE (Prefer_No_AVX512)
> - jnz 1f
> - HAS_ARCH_FEATURE (AVX512F_Usable)
> - jz 1f
> - lea __memmove_avx512_no_vzeroupper(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
> - jnz 2f
> - lea __memmove_avx512_unaligned_erms(%rip), %RAX_LP
> - HAS_CPU_FEATURE (ERMS)
> - jnz 2f
> - lea __memmove_avx512_unaligned(%rip), %RAX_LP
> - ret
> -1: lea __memmove_avx_unaligned(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
> - jz L(Fast_Unaligned_Load)
> - HAS_CPU_FEATURE (ERMS)
> - jz 2f
> - lea __memmove_avx_unaligned_erms(%rip), %RAX_LP
> - ret
> -L(Fast_Unaligned_Load):
> - lea __memmove_sse2_unaligned(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
> - jz L(SSSE3)
> - HAS_CPU_FEATURE (ERMS)
> - jz 2f
> - lea __memmove_sse2_unaligned_erms(%rip), %RAX_LP
> - ret
> -L(SSSE3):
> - HAS_CPU_FEATURE (SSSE3)
> - jz 2f
> - lea __memmove_ssse3_back(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Fast_Copy_Backward)
> - jnz 2f
> - lea __memmove_ssse3(%rip), %RAX_LP
> -2: ret
> -END(__libc_memmove)
> -#endif
> -
> #if IS_IN (libc)
> # define MEMMOVE_SYMBOL(p,s) p##_sse2_##s
>
> @@ -82,15 +30,16 @@ libc_hidden_ver (__mempcpy_sse2_unaligned, __mempcpy)
> The speedup we get from using SSE2 instructions is likely eaten away
> by the indirect call in the PLT. */
> # define libc_hidden_builtin_def
> +# else
> +strong_alias (__memmove_sse2_unaligned, memmove)
> # endif
> -strong_alias (__libc_memmove, memmove)
> #endif
>
> #if !defined SHARED || !IS_IN (libc)
> weak_alias (__mempcpy, mempcpy)
> #endif
>
> -#include "../memmove.S"
> +#include <sysdeps/x86_64/memmove.S>
>
> #if defined SHARED && IS_IN (libc)
> # include <shlib-compat.h>
> diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
> deleted file mode 100644
> index af27703..0000000
> --- a/sysdeps/x86_64/multiarch/memcpy.S
> +++ /dev/null
> @@ -1,75 +0,0 @@
> -/* Multiple versions of memcpy
> - All versions must be listed in ifunc-impl-list.c.
> - Copyright (C) 2010-2017 Free Software Foundation, Inc.
> - Contributed by Intel Corporation.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <sysdep.h>
> -#include <init-arch.h>
> -
> -/* Define multiple versions only for the definition in lib and for
> - DSO. In static binaries we need memcpy before the initialization
> - happened. */
> -#if defined SHARED && IS_IN (libc)
> - .text
> -ENTRY(__new_memcpy)
> - .type __new_memcpy, @gnu_indirect_function
> - LOAD_RTLD_GLOBAL_RO_RDX
> - lea __memcpy_erms(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Prefer_ERMS)
> - jnz 2f
> - HAS_ARCH_FEATURE (Prefer_No_AVX512)
> - jnz 1f
> - HAS_ARCH_FEATURE (AVX512F_Usable)
> - jz 1f
> - lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
> - jnz 2f
> - lea __memcpy_avx512_unaligned_erms(%rip), %RAX_LP
> - HAS_CPU_FEATURE (ERMS)
> - jnz 2f
> - lea __memcpy_avx512_unaligned(%rip), %RAX_LP
> - ret
> -1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
> - jz L(Fast_Unaligned_Load)
> - HAS_CPU_FEATURE (ERMS)
> - jz 2f
> - lea __memcpy_avx_unaligned_erms(%rip), %RAX_LP
> - ret
> -L(Fast_Unaligned_Load):
> - lea __memcpy_sse2_unaligned(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
> - jz L(SSSE3)
> - HAS_CPU_FEATURE (ERMS)
> - jz 2f
> - lea __memcpy_sse2_unaligned_erms(%rip), %RAX_LP
> - ret
> -L(SSSE3):
> - HAS_CPU_FEATURE (SSSE3)
> - jz 2f
> - lea __memcpy_ssse3_back(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Fast_Copy_Backward)
> - jnz 2f
> - lea __memcpy_ssse3(%rip), %RAX_LP
> -2: ret
> -END(__new_memcpy)
> -
> -# undef memcpy
> -# include <shlib-compat.h>
> -versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
> -#endif
> diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
> new file mode 100644
> index 0000000..ad1b31f
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/memcpy.c
> @@ -0,0 +1,35 @@
> +/* Multiple version of memcpy.
> + All versions must be listed in ifunc-impl-list.c.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#if defined SHARED && IS_IN (libc)
> +
> +# define memcpy __redirect_memcpy
> +# include <string.h>
> +# undef memcpy
> +
> +# define SYMBOL_NAME memcpy
> +# include "memifunc.h"
> +
> +extern __typeof (__redirect_memcpy) __new_memcpy;
> +
> +libc_ifunc (__new_memcpy, memcpy_ifunc_selector ());
> +
> +# include <shlib-compat.h>
> +versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
> +#endif
> diff --git a/sysdeps/x86_64/multiarch/memifunc.h b/sysdeps/x86_64/multiarch/memifunc.h
> new file mode 100644
> index 0000000..894b4a0
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/memifunc.h
> @@ -0,0 +1,95 @@
> +/* Common definition for memcpy, mempcpy, and memmove implementation.
> + All versions must be listed in ifunc-impl-list.c.
> + Copyright (C) 2016-2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +/* memcpy, mempcpy, and memmove share the same logic for ifunc selection. */
> +
> +#include <cpu-features.h>
> +#include <ldsodefs.h>
> +
> +#define PASTER1(x,y) x ## _ ## y
> +#define EVALUATOR1(x,y) PASTER1(x,y)
> +#define PASTER2(x,y) __ ## x ## _ ## y
> +#define EVALUATOR2(x,y) PASTER2(x,y)
> +
> +/* Basically set '__redirect_<symbol>' to use as type definition,
> + '__<symbol>_<variant>' as the optimized implementation and
> + '<symbol>_ifunc_selector' as the IFUNC selector. */
> +#define REDIRECT_NAME EVALUATOR1(__redirect, SYMBOL_NAME)
> +#define OPTIMIZE(name) EVALUATOR2(SYMBOL_NAME, name)
> +#define IFUNC_NAME EVALUATOR1(SYMBOL_NAME, ifunc_selector)
> +
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(erms) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned)
> + attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned_erms)
> + attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3_back) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned) attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned_erms)
> + attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned)
> + attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned_erms)
> + attribute_hidden;
> +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_no_vzeroupper)
> + attribute_hidden;
> +
> +static inline void *
> +IFUNC_NAME (void)
> +{
> + const struct cpu_features* cpu_features = __get_cpu_features ();
> +
> + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
> + return OPTIMIZE(erms);
> +
> + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
> + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
> + {
> + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
> + return OPTIMIZE(avx512_no_vzeroupper);
> +
> + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
> + return OPTIMIZE(avx512_unaligned_erms);
> +
> + return OPTIMIZE(avx512_unaligned);
> + }
> +
> + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
> + {
> + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
> + return OPTIMIZE(avx_unaligned_erms);
> +
> + return OPTIMIZE(avx_unaligned);
> + }
> +
> + if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3)
> + || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
> + {
> + if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
> + return OPTIMIZE(sse2_unaligned_erms);
> +
> + return OPTIMIZE(sse2_unaligned);
> + }
> +
> + if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward))
> + return OPTIMIZE(ssse3_back);
> +
> + return OPTIMIZE(ssse3);
> +}
> diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
> new file mode 100644
> index 0000000..76372fc
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/memmove.c
> @@ -0,0 +1,33 @@
> +/* Multiple version of memmmove.
> + All versions must be listed in ifunc-impl-list.c.
> + Copyright (C) 2016-2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#if defined SHARED && IS_IN (libc)
> +
> +# define memmove __redirect_memmove
> +# include <string.h>
> +# undef memmove
> +
> +# define SYMBOL_NAME memmove
> +# include "memifunc.h"
> +
> +extern __typeof (__redirect_memmove) __libc_memmove;
> +
> +libc_ifunc (__libc_memmove, memmove_ifunc_selector ());
> +strong_alias (__libc_memmove, memmove);
> +#endif
> diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
> deleted file mode 100644
> index b8b2b28..0000000
> --- a/sysdeps/x86_64/multiarch/mempcpy.S
> +++ /dev/null
> @@ -1,73 +0,0 @@
> -/* Multiple versions of mempcpy
> - All versions must be listed in ifunc-impl-list.c.
> - Copyright (C) 2010-2017 Free Software Foundation, Inc.
> - Contributed by Intel Corporation.
> - This file is part of the GNU C Library.
> -
> - The GNU C Library is free software; you can redistribute it and/or
> - modify it under the terms of the GNU Lesser General Public
> - License as published by the Free Software Foundation; either
> - version 2.1 of the License, or (at your option) any later version.
> -
> - The GNU C Library is distributed in the hope that it will be useful,
> - but WITHOUT ANY WARRANTY; without even the implied warranty of
> - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> - Lesser General Public License for more details.
> -
> - You should have received a copy of the GNU Lesser General Public
> - License along with the GNU C Library; if not, see
> - <http://www.gnu.org/licenses/>. */
> -
> -#include <sysdep.h>
> -#include <init-arch.h>
> -
> -/* Define multiple versions only for the definition in lib and for
> - DSO. In static binaries we need mempcpy before the initialization
> - happened. */
> -#if defined SHARED && IS_IN (libc)
> - .text
> -ENTRY(__mempcpy)
> - .type __mempcpy, @gnu_indirect_function
> - LOAD_RTLD_GLOBAL_RO_RDX
> - lea __mempcpy_erms(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Prefer_ERMS)
> - jnz 2f
> - HAS_ARCH_FEATURE (Prefer_No_AVX512)
> - jnz 1f
> - HAS_ARCH_FEATURE (AVX512F_Usable)
> - jz 1f
> - lea __mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
> - jnz 2f
> - lea __mempcpy_avx512_unaligned_erms(%rip), %RAX_LP
> - HAS_CPU_FEATURE (ERMS)
> - jnz 2f
> - lea __mempcpy_avx512_unaligned(%rip), %RAX_LP
> - ret
> -1: lea __mempcpy_avx_unaligned(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
> - jz L(Fast_Unaligned_Load)
> - HAS_CPU_FEATURE (ERMS)
> - jz 2f
> - lea __mempcpy_avx_unaligned_erms(%rip), %RAX_LP
> - ret
> -L(Fast_Unaligned_Load):
> - lea __mempcpy_sse2_unaligned(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
> - jz L(SSSE3)
> - HAS_CPU_FEATURE (ERMS)
> - jz 2f
> - lea __mempcpy_sse2_unaligned_erms(%rip), %RAX_LP
> - ret
> -L(SSSE3):
> - HAS_CPU_FEATURE (SSSE3)
> - jz 2f
> - lea __mempcpy_ssse3_back(%rip), %RAX_LP
> - HAS_ARCH_FEATURE (Fast_Copy_Backward)
> - jnz 2f
> - lea __mempcpy_ssse3(%rip), %RAX_LP
> -2: ret
> -END(__mempcpy)
> -
> -weak_alias (__mempcpy, mempcpy)
> -#endif
> diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c
> new file mode 100644
> index 0000000..e59bde2
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/mempcpy.c
> @@ -0,0 +1,34 @@
> +/* Multiple version of mempcpy.
> + All versions must be listed in ifunc-impl-list.c.
> + Copyright (C) 2017 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <http://www.gnu.org/licenses/>. */
> +
> +#if defined SHARED && IS_IN (libc)
> +
> +# define mempcpy __redirect_mempcpy
> +# define __mempcpy __redirect___mempcpy
> +# include <string.h>
> +# undef mempcpy
> +# undef __mempcpy
> +
> +# define SYMBOL_NAME mempcpy
> +# include "memifunc.h"
> +
> +libc_ifunc_redirected (__redirect_mempcpy, __mempcpy,
> + mempcpy_ifunc_selector ());
> +weak_alias (__mempcpy, mempcpy)
> +#endif
> --
> 2.7.4
On Mon, May 29, 2017 at 5:11 PM, Erich Elsen <eriche@google.com> wrote:
> I was working on some of other functions and noticed the following
> error when I ran `make check` with the original patch (I didn't try
> Adhemerval's):
>
> test-canon2.c:(.text+0x33): undefined reference to `mempcpy'
>
You must have missed something. Please take a look at hjl/ifunc/c
branch which converted memcpy/mempcpy/memmove to C.
H.J.
@@ -7,6 +7,7 @@ ifeq ($(subdir),string)
sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
strcmp-sse2-unaligned strncmp-ssse3 \
memcmp-sse4 memcpy-ssse3 \
+ mem-impls \
memmove-ssse3 \
memcpy-ssse3-back \
memmove-ssse3-back \
similarity index 52%
rename from sysdeps/x86_64/multiarch/memmove.S
rename to sysdeps/x86_64/multiarch/mem-impls.S
@@ -1,6 +1,5 @@
-/* Multiple versions of memmove
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2016-2017 Free Software Foundation, Inc.
+/* Multiple versions of memmove, memcpy, and mempcpy.
+ Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,57 +16,6 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. */
-#if IS_IN (libc)
- .text
-ENTRY(__libc_memmove)
- .type __libc_memmove, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- lea __memmove_erms(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_ERMS)
- jnz 2f
- HAS_ARCH_FEATURE (Prefer_No_AVX512)
- jnz 1f
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- lea __memmove_avx512_no_vzeroupper(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jnz 2f
- lea __memmove_avx512_unaligned_erms(%rip), %RAX_LP
- HAS_CPU_FEATURE (ERMS)
- jnz 2f
- lea __memmove_avx512_unaligned(%rip), %RAX_LP
- ret
-1: lea __memmove_avx_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz L(Fast_Unaligned_Load)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memmove_avx_unaligned_erms(%rip), %RAX_LP
- ret
-L(Fast_Unaligned_Load):
- lea __memmove_sse2_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
- jz L(SSSE3)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memmove_sse2_unaligned_erms(%rip), %RAX_LP
- ret
-L(SSSE3):
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- lea __memmove_ssse3_back(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jnz 2f
- lea __memmove_ssse3(%rip), %RAX_LP
-2: ret
-END(__libc_memmove)
-#endif
-
#if IS_IN (libc)
# define MEMMOVE_SYMBOL(p,s) p##_sse2_##s
@@ -82,15 +30,16 @@ libc_hidden_ver (__mempcpy_sse2_unaligned, __mempcpy)
The speedup we get from using SSE2 instructions is likely eaten away
by the indirect call in the PLT. */
# define libc_hidden_builtin_def
+# else
+strong_alias (__memmove_sse2_unaligned, memmove)
# endif
-strong_alias (__libc_memmove, memmove)
#endif
#if !defined SHARED || !IS_IN (libc)
weak_alias (__mempcpy, mempcpy)
#endif
-#include "../memmove.S"
+#include <sysdeps/x86_64/memmove.S>
#if defined SHARED && IS_IN (libc)
# include <shlib-compat.h>
deleted file mode 100644
@@ -1,75 +0,0 @@
-/* Multiple versions of memcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need memcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(__new_memcpy)
- .type __new_memcpy, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- lea __memcpy_erms(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_ERMS)
- jnz 2f
- HAS_ARCH_FEATURE (Prefer_No_AVX512)
- jnz 1f
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jnz 2f
- lea __memcpy_avx512_unaligned_erms(%rip), %RAX_LP
- HAS_CPU_FEATURE (ERMS)
- jnz 2f
- lea __memcpy_avx512_unaligned(%rip), %RAX_LP
- ret
-1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz L(Fast_Unaligned_Load)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memcpy_avx_unaligned_erms(%rip), %RAX_LP
- ret
-L(Fast_Unaligned_Load):
- lea __memcpy_sse2_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
- jz L(SSSE3)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memcpy_sse2_unaligned_erms(%rip), %RAX_LP
- ret
-L(SSSE3):
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- lea __memcpy_ssse3_back(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jnz 2f
- lea __memcpy_ssse3(%rip), %RAX_LP
-2: ret
-END(__new_memcpy)
-
-# undef memcpy
-# include <shlib-compat.h>
-versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
new file mode 100644
@@ -0,0 +1,35 @@
+/* Multiple version of memcpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if defined SHARED && IS_IN (libc)
+
+# define memcpy __redirect_memcpy
+# include <string.h>
+# undef memcpy
+
+# define SYMBOL_NAME memcpy
+# include "memifunc.h"
+
+extern __typeof (__redirect_memcpy) __new_memcpy;
+
+libc_ifunc (__new_memcpy, memcpy_ifunc_selector ());
+
+# include <shlib-compat.h>
+versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
+#endif
new file mode 100644
@@ -0,0 +1,95 @@
+/* Common definition for memcpy, mempcpy, and memmove implementation.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2016-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* memcpy, mempcpy, and memmove share the same logic for ifunc selection. */
+
+#include <cpu-features.h>
+#include <ldsodefs.h>
+
+#define PASTER1(x,y) x ## _ ## y
+#define EVALUATOR1(x,y) PASTER1(x,y)
+#define PASTER2(x,y) __ ## x ## _ ## y
+#define EVALUATOR2(x,y) PASTER2(x,y)
+
+/* Basically set '__redirect_<symbol>' to use as type definition,
+ '__<symbol>_<variant>' as the optimized implementation and
+ '<symbol>_ifunc_selector' as the IFUNC selector. */
+#define REDIRECT_NAME EVALUATOR1(__redirect, SYMBOL_NAME)
+#define OPTIMIZE(name) EVALUATOR2(SYMBOL_NAME, name)
+#define IFUNC_NAME EVALUATOR1(SYMBOL_NAME, ifunc_selector)
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE(erms) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3_back) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_no_vzeroupper)
+ attribute_hidden;
+
+static inline void *
+IFUNC_NAME (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
+ return OPTIMIZE(erms);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ {
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ return OPTIMIZE(avx512_no_vzeroupper);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE(avx512_unaligned_erms);
+
+ return OPTIMIZE(avx512_unaligned);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE(avx_unaligned_erms);
+
+ return OPTIMIZE(avx_unaligned);
+ }
+
+ if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3)
+ || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE(sse2_unaligned_erms);
+
+ return OPTIMIZE(sse2_unaligned);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward))
+ return OPTIMIZE(ssse3_back);
+
+ return OPTIMIZE(ssse3);
+}
new file mode 100644
@@ -0,0 +1,33 @@
+/* Multiple version of memmmove.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2016-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if defined SHARED && IS_IN (libc)
+
+# define memmove __redirect_memmove
+# include <string.h>
+# undef memmove
+
+# define SYMBOL_NAME memmove
+# include "memifunc.h"
+
+extern __typeof (__redirect_memmove) __libc_memmove;
+
+libc_ifunc (__libc_memmove, memmove_ifunc_selector ());
+strong_alias (__libc_memmove, memmove);
+#endif
deleted file mode 100644
@@ -1,73 +0,0 @@
-/* Multiple versions of mempcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need mempcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(__mempcpy)
- .type __mempcpy, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- lea __mempcpy_erms(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_ERMS)
- jnz 2f
- HAS_ARCH_FEATURE (Prefer_No_AVX512)
- jnz 1f
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- lea __mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jnz 2f
- lea __mempcpy_avx512_unaligned_erms(%rip), %RAX_LP
- HAS_CPU_FEATURE (ERMS)
- jnz 2f
- lea __mempcpy_avx512_unaligned(%rip), %RAX_LP
- ret
-1: lea __mempcpy_avx_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz L(Fast_Unaligned_Load)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __mempcpy_avx_unaligned_erms(%rip), %RAX_LP
- ret
-L(Fast_Unaligned_Load):
- lea __mempcpy_sse2_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
- jz L(SSSE3)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __mempcpy_sse2_unaligned_erms(%rip), %RAX_LP
- ret
-L(SSSE3):
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- lea __mempcpy_ssse3_back(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jnz 2f
- lea __mempcpy_ssse3(%rip), %RAX_LP
-2: ret
-END(__mempcpy)
-
-weak_alias (__mempcpy, mempcpy)
-#endif
new file mode 100644
@@ -0,0 +1,34 @@
+/* Multiple version of mempcpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if defined SHARED && IS_IN (libc)
+
+# define mempcpy __redirect_mempcpy
+# define __mempcpy __redirect___mempcpy
+# include <string.h>
+# undef mempcpy
+# undef __mempcpy
+
+# define SYMBOL_NAME mempcpy
+# include "memifunc.h"
+
+libc_ifunc_redirected (__redirect_mempcpy, __mempcpy,
+ mempcpy_ifunc_selector ());
+weak_alias (__mempcpy, mempcpy)
+#endif