From patchwork Mon May 29 20:34:27 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella Netto X-Patchwork-Id: 20632 Received: (qmail 125790 invoked by alias); 29 May 2017 20:34:36 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 125770 invoked by uid 89); 29 May 2017 20:34:35 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-22.2 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_STOCKGEN, RCVD_IN_DNSWL_NONE, RCVD_IN_SORBS_SPAM, SPF_PASS, TBC autolearn=ham version=3.3.2 spammy=organize X-HELO: mail-qk0-f177.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:subject:to:cc:references:from:message-id:date :user-agent:mime-version:in-reply-to:content-language :content-transfer-encoding; bh=gdOa9EB7zaFfZJT9m1TA4EntUvF7ao1Nnlj2hUMJ0yk=; b=T1X+aEzSOPxrjbBM56rPH6RI3GMa9ib7NqRvsEbRb3EdBlPln+45OlsOcqS+sMs/vE XtFo7HkIknMx+PdRxMge6TatYIQerIExAmE5h7dMfOV102MdMTXPl283027bkhms5srm TS7gBq4gScZROeoE2xDy9Jiyske1+6LJP/i4O+1fM1nR+hgAmC8k212c/PEkZpWcLVhG lUkDnmbaZDkBXzvFM14bfQRh3WDUfCcqc7NLyrnblKQZ8BoPJqwV0g9vH2Z9VxIOJZpm +ctkirsK7maGogcxrtP+wzX2nbj/kBCEYpQ9RNlU5ndYq+u+0kqQrwDjfwmu7+wwjYu4 H7PQ== X-Gm-Message-State: AODbwcCdoKWp34iLmGJxXdIuWB4v7Xzr7Pf8JAMO7WCe68vXuB9jS4yk RDm4vhO86OZ07LZvy8+vCA== X-Received: by 10.55.35.13 with SMTP id j13mr17062130qkj.22.1496090072734; Mon, 29 May 2017 13:34:32 -0700 (PDT) Subject: Re: RFC: Rewrite x86-64 IFUNC selector in C To: "H.J. Lu" Cc: Erich Elsen , Siddhesh Poyarekar , Carlos O'Donell , GNU C Library References: <4a16e1e8-9baf-7b75-41b0-e25a127c649a@linaro.org> <264ee0ca-ee55-297b-ac16-2761c77e0bfc@linaro.org> From: Adhemerval Zanella Message-ID: <10c41e3f-d9ea-184d-4580-1beac97fb2dd@linaro.org> Date: Mon, 29 May 2017 17:34:27 -0300 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.1.1 MIME-Version: 1.0 In-Reply-To: On 28/05/2017 13:26, H.J. Lu wrote: > On Thu, May 25, 2017 at 2:55 PM, Adhemerval Zanella > wrote: >> >> >> On 25/05/2017 18:38, H.J. Lu wrote: >>> On Thu, May 25, 2017 at 2:25 PM, Erich Elsen wrote: >>>> Ok, I'll get started then. >>>> >>>> Are there any general comments about the attached conversion for >>>> memcpy? Just so I don't repeat the same wrong thing many times. >>> >>> You missed: >>> >>> /* Define multiple versions only for the definition in lib and for >>> DSO. In static binaries we need memcpy before the initialization >>> happened. */ >>> #if defined SHARED && IS_IN (libc) >>> >>> +typedef void * (*memcpy_fn)(void *, const void *, size_t); >>> + >>> +extern void * __memcpy_erms(void *dest, const void *src, size_t n); >>> +extern void * __memcpy_sse2_unaligned(void *dest, const void *src, size_t n); >>> +extern void * __memcpy_sse2_unaligned_erms(void *dest, const void >>> *src, size_t n); >>> +extern void * __memcpy_ssse3(void *dest, const void *src, size_t n); >>> +extern void * __memcpy_ssse3_back(void *dest, const void *src, size_t n); >>> +extern void * __memcpy_avx_unaligned(void *dest, const void *src, size_t n); >>> +extern void * __memcpy_avx_unaligned_erms(void *dest, const void >>> *src, size_t n); >>> +extern void * __memcpy_avx512_unaligned(void *dest, const void *src, size_t n); >>> +extern void * __memcpy_avx512_unaligned_erms(void *dest, const void >>> *src, size_t n); >>> >>> Please use something similar to multiarch/strstr.c: >>> >>> /* Redefine strstr so that the compiler won't complain about the type >>> mismatch with the IFUNC selector in strong_alias, below. */ >>> #undef strstr >>> #define strstr __redirect_strstr >>> #include >>> #undef strstr >>> ... >>> extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden; >>> >>> +/* Defined in cacheinfo.c */ >>> +extern long int __x86_shared_cache_size attribute_hidden; >>> +extern long int __x86_shared_cache_size_half attribute_hidden; >>> +extern long int __x86_data_cache_size attribute_hidden; >>> +extern long int __x86_data_cache_size_half attribute_hidden; >>> +extern long int __x86_shared_non_temporal_threshold attribute_hidden; >> >> It seems it will be used not only for memcpy, so I would suggest to add >> on a common header on multiarch. >> >>> >>> Remove them. >>> static void * select_memcpy_impl(void) { >>> + const struct cpu_features* cpu_features_struct_p = __get_cpu_features (); >>> + >>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_ERMS)) { >>> + return __memcpy_erms; >>> + } >>> + >>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX512F_Usable)) { >>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Prefer_No_VZEROUPPER)) >>> + return __memcpy_avx512_unaligned_erms; >>> + return __memcpy_avx512_unaligned; >>> + } >>> + >>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, AVX_Fast_Unaligned_Load)) { >>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { >>> + return __memcpy_avx_unaligned_erms; >>> + >>> + } >>> + return __memcpy_avx_unaligned; >>> + } >>> + else { >>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Unaligned_Copy)) { >>> + if (CPU_FEATURES_CPU_P(cpu_features_struct_p, ERMS)) { >>> + return __memcpy_sse2_unaligned_erms; >>> + >>> + } >>> + return __memcpy_sse2_unaligned; >>> + } >>> + else { >>> + if (!CPU_FEATURES_CPU_P(cpu_features_struct_p, SSSE3)) { >>> + return __memcpy_sse2_unaligned; >>> + >>> + } >>> + if (CPU_FEATURES_ARCH_P(cpu_features_struct_p, Fast_Copy_Backward)) { >>> + return __memcpy_ssse3_back; >>> + >>> + } >>> + return __memcpy_ssse3; >>> + } >>> + } >>> +} >>> >>> Please >>> >>> 1. Fix formatting. >>> 2. Remove unnecessary {}. >>> 3. Don't use "else". >>> >>> +void *__new_memcpy(void *dest, const void *src, size_t n) >>> + __attribute__ ((ifunc ("select_memcpy_impl"))); >>> >>> Use "typeof" here. >> >> We have the libc_ifunc{_redirect} to handle the __attribute__ ((ifunc)) support >> from compiler. I think you can use: >> >> # include >> >> // extern __typeof (memcpy) __memcpy_ attribute_hidden; >> >> static void *memcpy_selector (void) >> { >> // fill me. >> } >> >> libc_ifunc_hidden (memcpy, memcpy, memcpy_selector); >> libc_hidden_def (memcpy) > > Here is my take. It only covers memcpy and mempcpy. Please > extend it to memmove as well as *_chk functions. > I think we can simplify it further and use the already existent ifunc macros on libc-symbols.h. Also, for memmove I think we can organize the code better (at least for ifunc) and build a extra object with a more meaningful name. I used your logic for the ifunc selection and extended for memmove as well. diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 3736f54..b6179aa 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -7,6 +7,7 @@ ifeq ($(subdir),string) sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \ strcmp-sse2-unaligned strncmp-ssse3 \ memcmp-sse4 memcpy-ssse3 \ + mem-impls \ memmove-ssse3 \ memcpy-ssse3-back \ memmove-ssse3-back \ diff --git a/sysdeps/x86_64/multiarch/memmove.S b/sysdeps/x86_64/multiarch/mem-impls.S similarity index 52% rename from sysdeps/x86_64/multiarch/memmove.S rename to sysdeps/x86_64/multiarch/mem-impls.S index 8c534e8..5e74fa0 100644 --- a/sysdeps/x86_64/multiarch/memmove.S +++ b/sysdeps/x86_64/multiarch/mem-impls.S @@ -1,6 +1,5 @@ -/* Multiple versions of memmove - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2016-2017 Free Software Foundation, Inc. +/* Multiple versions of memmove, memcpy, and mempcpy. + Copyright (C) 2017 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,57 +16,6 @@ License along with the GNU C Library; if not, see . */ -#include -#include - -/* Define multiple versions only for the definition in lib and for - DSO. */ -#if IS_IN (libc) - .text -ENTRY(__libc_memmove) - .type __libc_memmove, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - lea __memmove_erms(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_ERMS) - jnz 2f - HAS_ARCH_FEATURE (Prefer_No_AVX512) - jnz 1f - HAS_ARCH_FEATURE (AVX512F_Usable) - jz 1f - lea __memmove_avx512_no_vzeroupper(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) - jnz 2f - lea __memmove_avx512_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 2f - lea __memmove_avx512_unaligned(%rip), %RAX_LP - ret -1: lea __memmove_avx_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz L(Fast_Unaligned_Load) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __memmove_avx_unaligned_erms(%rip), %RAX_LP - ret -L(Fast_Unaligned_Load): - lea __memmove_sse2_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Unaligned_Copy) - jz L(SSSE3) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __memmove_sse2_unaligned_erms(%rip), %RAX_LP - ret -L(SSSE3): - HAS_CPU_FEATURE (SSSE3) - jz 2f - lea __memmove_ssse3_back(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Copy_Backward) - jnz 2f - lea __memmove_ssse3(%rip), %RAX_LP -2: ret -END(__libc_memmove) -#endif - #if IS_IN (libc) # define MEMMOVE_SYMBOL(p,s) p##_sse2_##s @@ -82,15 +30,16 @@ libc_hidden_ver (__mempcpy_sse2_unaligned, __mempcpy) The speedup we get from using SSE2 instructions is likely eaten away by the indirect call in the PLT. */ # define libc_hidden_builtin_def +# else +strong_alias (__memmove_sse2_unaligned, memmove) # endif -strong_alias (__libc_memmove, memmove) #endif #if !defined SHARED || !IS_IN (libc) weak_alias (__mempcpy, mempcpy) #endif -#include "../memmove.S" +#include #if defined SHARED && IS_IN (libc) # include diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S deleted file mode 100644 index af27703..0000000 --- a/sysdeps/x86_64/multiarch/memcpy.S +++ /dev/null @@ -1,75 +0,0 @@ -/* Multiple versions of memcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need memcpy before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(__new_memcpy) - .type __new_memcpy, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - lea __memcpy_erms(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_ERMS) - jnz 2f - HAS_ARCH_FEATURE (Prefer_No_AVX512) - jnz 1f - HAS_ARCH_FEATURE (AVX512F_Usable) - jz 1f - lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) - jnz 2f - lea __memcpy_avx512_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 2f - lea __memcpy_avx512_unaligned(%rip), %RAX_LP - ret -1: lea __memcpy_avx_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz L(Fast_Unaligned_Load) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __memcpy_avx_unaligned_erms(%rip), %RAX_LP - ret -L(Fast_Unaligned_Load): - lea __memcpy_sse2_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Unaligned_Copy) - jz L(SSSE3) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __memcpy_sse2_unaligned_erms(%rip), %RAX_LP - ret -L(SSSE3): - HAS_CPU_FEATURE (SSSE3) - jz 2f - lea __memcpy_ssse3_back(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Copy_Backward) - jnz 2f - lea __memcpy_ssse3(%rip), %RAX_LP -2: ret -END(__new_memcpy) - -# undef memcpy -# include -versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); -#endif diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c new file mode 100644 index 0000000..ad1b31f --- /dev/null +++ b/sysdeps/x86_64/multiarch/memcpy.c @@ -0,0 +1,35 @@ +/* Multiple version of memcpy. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined SHARED && IS_IN (libc) + +# define memcpy __redirect_memcpy +# include +# undef memcpy + +# define SYMBOL_NAME memcpy +# include "memifunc.h" + +extern __typeof (__redirect_memcpy) __new_memcpy; + +libc_ifunc (__new_memcpy, memcpy_ifunc_selector ()); + +# include +versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); +#endif diff --git a/sysdeps/x86_64/multiarch/memifunc.h b/sysdeps/x86_64/multiarch/memifunc.h new file mode 100644 index 0000000..894b4a0 --- /dev/null +++ b/sysdeps/x86_64/multiarch/memifunc.h @@ -0,0 +1,95 @@ +/* Common definition for memcpy, mempcpy, and memmove implementation. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* memcpy, mempcpy, and memmove share the same logic for ifunc selection. */ + +#include +#include + +#define PASTER1(x,y) x ## _ ## y +#define EVALUATOR1(x,y) PASTER1(x,y) +#define PASTER2(x,y) __ ## x ## _ ## y +#define EVALUATOR2(x,y) PASTER2(x,y) + +/* Basically set '__redirect_' to use as type definition, + '___' as the optimized implementation and + '_ifunc_selector' as the IFUNC selector. */ +#define REDIRECT_NAME EVALUATOR1(__redirect, SYMBOL_NAME) +#define OPTIMIZE(name) EVALUATOR2(SYMBOL_NAME, name) +#define IFUNC_NAME EVALUATOR1(SYMBOL_NAME, ifunc_selector) + +extern __typeof (REDIRECT_NAME) OPTIMIZE(erms) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(sse2_unaligned_erms) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(ssse3_back) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned) attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx_unaligned_erms) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_unaligned_erms) + attribute_hidden; +extern __typeof (REDIRECT_NAME) OPTIMIZE(avx512_no_vzeroupper) + attribute_hidden; + +static inline void * +IFUNC_NAME (void) +{ + const struct cpu_features* cpu_features = __get_cpu_features (); + + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS)) + return OPTIMIZE(erms); + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable) + && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512)) + { + if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)) + return OPTIMIZE(avx512_no_vzeroupper); + + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE(avx512_unaligned_erms); + + return OPTIMIZE(avx512_unaligned); + } + + if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE(avx_unaligned_erms); + + return OPTIMIZE(avx_unaligned); + } + + if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3) + || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy)) + { + if (CPU_FEATURES_CPU_P (cpu_features, ERMS)) + return OPTIMIZE(sse2_unaligned_erms); + + return OPTIMIZE(sse2_unaligned); + } + + if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward)) + return OPTIMIZE(ssse3_back); + + return OPTIMIZE(ssse3); +} diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c new file mode 100644 index 0000000..76372fc --- /dev/null +++ b/sysdeps/x86_64/multiarch/memmove.c @@ -0,0 +1,33 @@ +/* Multiple version of memmmove. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined SHARED && IS_IN (libc) + +# define memmove __redirect_memmove +# include +# undef memmove + +# define SYMBOL_NAME memmove +# include "memifunc.h" + +extern __typeof (__redirect_memmove) __libc_memmove; + +libc_ifunc (__libc_memmove, memmove_ifunc_selector ()); +strong_alias (__libc_memmove, memmove); +#endif diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S deleted file mode 100644 index b8b2b28..0000000 --- a/sysdeps/x86_64/multiarch/mempcpy.S +++ /dev/null @@ -1,73 +0,0 @@ -/* Multiple versions of mempcpy - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2010-2017 Free Software Foundation, Inc. - Contributed by Intel Corporation. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -/* Define multiple versions only for the definition in lib and for - DSO. In static binaries we need mempcpy before the initialization - happened. */ -#if defined SHARED && IS_IN (libc) - .text -ENTRY(__mempcpy) - .type __mempcpy, @gnu_indirect_function - LOAD_RTLD_GLOBAL_RO_RDX - lea __mempcpy_erms(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_ERMS) - jnz 2f - HAS_ARCH_FEATURE (Prefer_No_AVX512) - jnz 1f - HAS_ARCH_FEATURE (AVX512F_Usable) - jz 1f - lea __mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP - HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER) - jnz 2f - lea __mempcpy_avx512_unaligned_erms(%rip), %RAX_LP - HAS_CPU_FEATURE (ERMS) - jnz 2f - lea __mempcpy_avx512_unaligned(%rip), %RAX_LP - ret -1: lea __mempcpy_avx_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load) - jz L(Fast_Unaligned_Load) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __mempcpy_avx_unaligned_erms(%rip), %RAX_LP - ret -L(Fast_Unaligned_Load): - lea __mempcpy_sse2_unaligned(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Unaligned_Copy) - jz L(SSSE3) - HAS_CPU_FEATURE (ERMS) - jz 2f - lea __mempcpy_sse2_unaligned_erms(%rip), %RAX_LP - ret -L(SSSE3): - HAS_CPU_FEATURE (SSSE3) - jz 2f - lea __mempcpy_ssse3_back(%rip), %RAX_LP - HAS_ARCH_FEATURE (Fast_Copy_Backward) - jnz 2f - lea __mempcpy_ssse3(%rip), %RAX_LP -2: ret -END(__mempcpy) - -weak_alias (__mempcpy, mempcpy) -#endif diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c new file mode 100644 index 0000000..e59bde2 --- /dev/null +++ b/sysdeps/x86_64/multiarch/mempcpy.c @@ -0,0 +1,34 @@ +/* Multiple version of mempcpy. + All versions must be listed in ifunc-impl-list.c. + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined SHARED && IS_IN (libc) + +# define mempcpy __redirect_mempcpy +# define __mempcpy __redirect___mempcpy +# include +# undef mempcpy +# undef __mempcpy + +# define SYMBOL_NAME mempcpy +# include "memifunc.h" + +libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, + mempcpy_ifunc_selector ()); +weak_alias (__mempcpy, mempcpy) +#endif