From patchwork Tue Jun 12 22:19:25 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "H.J. Lu" X-Patchwork-Id: 27764 Received: (qmail 92261 invoked by alias); 12 Jun 2018 22:19:53 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 92063 invoked by uid 89); 12 Jun 2018 22:19:52 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-23.8 required=5.0 tests=AWL, BAYES_00, FREEMAIL_FROM, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_SHORT, KAM_STOCKGEN, SPF_SOFTFAIL autolearn=ham version=3.3.2 spammy= X-HELO: mga01.intel.com X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False X-ExtLoop1: 1 From: "H.J. Lu" To: libc-alpha@sourceware.org Subject: [PATCH 06/20] libcpu-rt-c/x86-64: Add memcpy, memmove and mempcpy Date: Tue, 12 Jun 2018 15:19:25 -0700 Message-Id: <20180612221939.19545-7-hjl.tools@gmail.com> In-Reply-To: <20180612221939.19545-1-hjl.tools@gmail.com> References: <20180612221939.19545-1-hjl.tools@gmail.com> * sysdeps/x86_64/Makefile (cpu-rt-c-routines): Add memcpy, memmove and mempcpy. * sysdeps/x86_64/memmove.S: Support libcpu-rt-c. * sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise. * sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise. * sysdeps/x86_64/multiarch/memcpy.c: Likewise. * sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S: Likewise. * sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S: Likewise. * sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S: Likewise. * sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S: Likewise. * sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: Likewise. * sysdeps/x86_64/multiarch/memmove.c: Likewise. * sysdeps/x86_64/multiarch/mempcpy.c: Likewise. * sysdeps/x86_64/multiarch/Makefile (libcpu-rt-c-sysdep_routines): Add memcpy-ssse3, memcpy-ssse3-back memmove-sse2-unaligned-erms, memmove-ssse3, memmove-ssse3-back, memmove-avx-unaligned-erms, memmove-avx512-unaligned-erms and memmove-avx512-no-vzeroupper. --- sysdeps/x86_64/Makefile | 2 +- sysdeps/x86_64/memmove.S | 12 +++++-- sysdeps/x86_64/multiarch/Makefile | 8 ++++- sysdeps/x86_64/multiarch/memcpy-ssse3-back.S | 6 ++-- sysdeps/x86_64/multiarch/memcpy-ssse3.S | 6 ++-- sysdeps/x86_64/multiarch/memcpy.c | 14 +++++--- .../multiarch/memmove-avx-unaligned-erms.S | 2 +- .../multiarch/memmove-avx512-no-vzeroupper.S | 8 +++-- .../multiarch/memmove-avx512-unaligned-erms.S | 2 +- .../multiarch/memmove-sse2-unaligned-erms.S | 2 +- .../multiarch/memmove-vec-unaligned-erms.S | 33 ++++++++++++------- sysdeps/x86_64/multiarch/memmove.c | 10 ++++-- sysdeps/x86_64/multiarch/mempcpy.c | 10 ++++-- 13 files changed, 82 insertions(+), 33 deletions(-) diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile index 1eb13d01da..978cff6cba 100644 --- a/sysdeps/x86_64/Makefile +++ b/sysdeps/x86_64/Makefile @@ -147,7 +147,7 @@ endif endif ifeq ($(subdir),cpu-rt-c) -cpu-rt-c-routines += memchr memcmp +cpu-rt-c-routines += memchr memcmp memcpy memmove mempcpy # For the CPU run-time tests. vpath %.c $(..)string diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S index 9cc92ff9a9..ec2b624966 100644 --- a/sysdeps/x86_64/memmove.S +++ b/sysdeps/x86_64/memmove.S @@ -29,7 +29,7 @@ #define SECTION(p) p #ifdef USE_MULTIARCH -# if !IS_IN (libc) +# if !(IS_IN (libc) || IS_IN (libcpu_rt_c)) # define MEMCPY_SYMBOL(p,s) memcpy # endif #else @@ -39,8 +39,12 @@ # define MEMCPY_SYMBOL(p,s) memcpy # endif #endif -#if !defined USE_MULTIARCH || !IS_IN (libc) -# define MEMPCPY_SYMBOL(p,s) __mempcpy +#if !defined USE_MULTIARCH || !(IS_IN (libc) || IS_IN (libcpu_rt_c)) +# if IS_IN (libcpu_rt_c) +# define MEMPCPY_SYMBOL(p,s) mempcpy +# else +# define MEMPCPY_SYMBOL(p,s) __mempcpy +# endif #endif #ifndef MEMMOVE_SYMBOL # define MEMMOVE_CHK_SYMBOL(p,s) p @@ -55,9 +59,11 @@ libc_hidden_builtin_def (memmove) strong_alias (memmove, __memcpy) libc_hidden_ver (memmove, memcpy) # endif +# if !IS_IN (libcpu_rt_c) libc_hidden_def (__mempcpy) weak_alias (__mempcpy, mempcpy) libc_hidden_builtin_def (mempcpy) +# endif # if defined SHARED && IS_IN (libc) # undef memcpy diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile index 9bb6c8c3cd..8e86e44e33 100644 --- a/sysdeps/x86_64/multiarch/Makefile +++ b/sysdeps/x86_64/multiarch/Makefile @@ -69,5 +69,11 @@ endif ifeq ($(subdir),cpu-rt-c) libcpu-rt-c-sysdep_routines += memchr-sse2 memchr-avx2 \ memcmp-sse2 memcmp-ssse3 memcmp-sse4 \ - memcmp-avx2-movbe + memcmp-avx2-movbe memcpy-ssse3 \ + memcpy-ssse3-back \ + memmove-sse2-unaligned-erms \ + memmove-ssse3 memmove-ssse3-back \ + memmove-avx-unaligned-erms \ + memmove-avx512-unaligned-erms \ + memmove-avx512-no-vzeroupper endif diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S index 7e37035487..9bcd5957cf 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S @@ -19,7 +19,7 @@ #include -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) #include "asm-syntax.h" @@ -44,10 +44,12 @@ .section .text.ssse3,"ax",@progbits #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE +# if IS_IN (libc) ENTRY (MEMPCPY_CHK) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMPCPY_CHK) +# endif ENTRY (MEMPCPY) movq %rdi, %rax @@ -56,7 +58,7 @@ ENTRY (MEMPCPY) END (MEMPCPY) #endif -#if !defined USE_AS_BCOPY +#if !defined USE_AS_BCOPY && IS_IN (libc) ENTRY (MEMCPY_CHK) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S index 5dd209034b..988ce0fc83 100644 --- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S +++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S @@ -19,7 +19,7 @@ #include -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) #include "asm-syntax.h" @@ -44,10 +44,12 @@ .section .text.ssse3,"ax",@progbits #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE +# if IS_IN (libc) ENTRY (MEMPCPY_CHK) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMPCPY_CHK) +# endif ENTRY (MEMPCPY) movq %rdi, %rax @@ -56,7 +58,7 @@ ENTRY (MEMPCPY) END (MEMPCPY) #endif -#if !defined USE_AS_BCOPY +#if !defined USE_AS_BCOPY && IS_IN (libc) ENTRY (MEMCPY_CHK) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c index 419f76aefc..ee3bb2706c 100644 --- a/sysdeps/x86_64/multiarch/memcpy.c +++ b/sysdeps/x86_64/multiarch/memcpy.c @@ -18,7 +18,7 @@ . */ /* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) # define memcpy __redirect_memcpy # include # undef memcpy @@ -26,14 +26,20 @@ # define SYMBOL_NAME memcpy # include "ifunc-memmove.h" +# if IS_IN (libcpu_rt_c) +# define __new_memcpy memcpy +# endif + libc_ifunc_redirected (__redirect_memcpy, __new_memcpy, IFUNC_SELECTOR ()); -# ifdef SHARED +# if !IS_IN (libcpu_rt_c) +# ifdef SHARED __hidden_ver1 (__new_memcpy, __GI_memcpy, __redirect_memcpy) __attribute__ ((visibility ("hidden"))); -# endif +# endif -# include +# include versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14); +# endif #endif diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S index e195e93f15..e996ace136 100644 --- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S @@ -1,4 +1,4 @@ -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) # define VEC_SIZE 32 # define VEC(i) ymm##i # define VMOVNT vmovntdq diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S index effc3ac2de..6cd1accfc5 100644 --- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S +++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S @@ -18,15 +18,18 @@ #include -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) # include "asm-syntax.h" .section .text.avx512,"ax",@progbits +# if IS_IN (libc) ENTRY (__mempcpy_chk_avx512_no_vzeroupper) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END (__mempcpy_chk_avx512_no_vzeroupper) +strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper) +# endif ENTRY (__mempcpy_avx512_no_vzeroupper) movq %rdi, %rax @@ -34,10 +37,12 @@ ENTRY (__mempcpy_avx512_no_vzeroupper) jmp L(start) END (__mempcpy_avx512_no_vzeroupper) +# if IS_IN (libc) ENTRY (__memmove_chk_avx512_no_vzeroupper) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END (__memmove_chk_avx512_no_vzeroupper) +# endif ENTRY (__memmove_avx512_no_vzeroupper) mov %rdi, %rax @@ -411,5 +416,4 @@ L(gobble_256bytes_nt_loop_bkw): END (__memmove_avx512_no_vzeroupper) strong_alias (__memmove_avx512_no_vzeroupper, __memcpy_avx512_no_vzeroupper) -strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper) #endif diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S index aac1515cf6..95381d458e 100644 --- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S @@ -1,4 +1,4 @@ -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) # define VEC_SIZE 64 # define VEC(i) zmm##i # define VMOVNT vmovntdq diff --git a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S index 7c6163ddcb..eae941e58e 100644 --- a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S @@ -16,7 +16,7 @@ License along with the GNU C Library; if not, see . */ -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) # define MEMMOVE_SYMBOL(p,s) p##_sse2_##s #else weak_alias (__mempcpy, mempcpy) diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S index e2ede45e9f..ada93ea6ad 100644 --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S @@ -131,7 +131,7 @@ L(start): jb L(less_vec) cmpq $(VEC_SIZE * 2), %rdx ja L(more_2x_vec) -#if !defined USE_MULTIARCH || !IS_IN (libc) +#if !defined USE_MULTIARCH || !(IS_IN (libc) || IS_IN (libcpu_rt_c)) L(last_2x_vec): #endif /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */ @@ -140,18 +140,20 @@ L(last_2x_vec): VMOVU %VEC(0), (%rdi) VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx) VZEROUPPER -#if !defined USE_MULTIARCH || !IS_IN (libc) +#if !defined USE_MULTIARCH || !(IS_IN (libc) || IS_IN (libcpu_rt_c)) L(nop): #endif ret -#if defined USE_MULTIARCH && IS_IN (libc) +#if defined USE_MULTIARCH && (IS_IN (libc) || IS_IN (libcpu_rt_c)) END (MEMMOVE_SYMBOL (__memmove, unaligned)) # if VEC_SIZE == 16 +# if IS_IN (libc) ENTRY (__mempcpy_chk_erms) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END (__mempcpy_chk_erms) +# endif /* Only used to measure performance of REP MOVSB. */ ENTRY (__mempcpy_erms) @@ -163,10 +165,12 @@ ENTRY (__mempcpy_erms) jmp L(start_movsb) END (__mempcpy_erms) +# if IS_IN (libc) ENTRY (__memmove_chk_erms) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END (__memmove_chk_erms) +# endif ENTRY (__memmove_erms) movq %rdi, %rax @@ -195,10 +199,12 @@ L(movsb_backward): ret END (__memmove_erms) strong_alias (__memmove_erms, __memcpy_erms) +# if IS_IN (libc) strong_alias (__memmove_chk_erms, __memcpy_chk_erms) +# endif # endif -# ifdef SHARED +# if defined SHARED && IS_IN (libc) ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms)) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) @@ -212,10 +218,12 @@ ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms)) # ifdef SHARED +# if IS_IN (libc) ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms)) cmpq %rdx, %rcx jb HIDDEN_JUMPTARGET (__chk_fail) END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms)) +# endif # endif ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) @@ -321,7 +329,7 @@ L(between_2_3): movw %si, (%rdi) ret -#if defined USE_MULTIARCH && IS_IN (libc) +#if defined USE_MULTIARCH && (IS_IN (libc) || IS_IN (libcpu_rt_c)) L(movsb_more_2x_vec): cmpq $REP_MOVSB_THRESHOLD, %rdx ja L(movsb) @@ -392,7 +400,8 @@ L(more_8x_vec): subq %r8, %rdi /* Adjust length. */ addq %r8, %rdx -#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) +#if ((defined USE_MULTIARCH || VEC_SIZE == 16) \ + && (IS_IN (libc) || IS_IN (libcpu_rt_c))) /* Check non-temporal store threshold. */ cmpq __x86_shared_non_temporal_threshold(%rip), %rdx ja L(large_forward) @@ -444,7 +453,8 @@ L(more_8x_vec_backward): subq %r8, %r9 /* Adjust length. */ subq %r8, %rdx -#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) +#if ((defined USE_MULTIARCH || VEC_SIZE == 16) \ + && (IS_IN (libc) || IS_IN (libcpu_rt_c))) /* Check non-temporal store threshold. */ cmpq __x86_shared_non_temporal_threshold(%rip), %rdx ja L(large_backward) @@ -474,7 +484,8 @@ L(loop_4x_vec_backward): VZEROUPPER ret -#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc) +#if ((defined USE_MULTIARCH || VEC_SIZE == 16) \ + && (IS_IN (libc) || IS_IN (libcpu_rt_c))) L(large_forward): /* Don't use non-temporal store if there is overlap between destination and source since destination may be in cache @@ -547,16 +558,16 @@ L(loop_large_backward): #endif END (MEMMOVE_SYMBOL (__memmove, unaligned_erms)) -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) # ifdef USE_MULTIARCH strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms), MEMMOVE_SYMBOL (__memcpy, unaligned_erms)) -# ifdef SHARED +# if defined SHARED && IS_IN (libc) strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms), MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms)) # endif # endif -# ifdef SHARED +# if defined SHARED && IS_IN (libc) strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned), MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned)) # endif diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c index d512228eae..a4fad5ae60 100644 --- a/sysdeps/x86_64/multiarch/memmove.c +++ b/sysdeps/x86_64/multiarch/memmove.c @@ -18,7 +18,7 @@ . */ /* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) # define memmove __redirect_memmove # include # undef memmove @@ -26,12 +26,18 @@ # define SYMBOL_NAME memmove # include "ifunc-memmove.h" +# if IS_IN (libcpu_rt_c) +# define __libc_memmove memmove +# endif + libc_ifunc_redirected (__redirect_memmove, __libc_memmove, IFUNC_SELECTOR ()); +# if !IS_IN (libcpu_rt_c) strong_alias (__libc_memmove, memmove); -# ifdef SHARED +# ifdef SHARED __hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove) __attribute__ ((visibility ("hidden"))); +# endif # endif #endif diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c index 9fe41dda82..fe3ece0774 100644 --- a/sysdeps/x86_64/multiarch/mempcpy.c +++ b/sysdeps/x86_64/multiarch/mempcpy.c @@ -18,7 +18,7 @@ . */ /* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) +#if IS_IN (libc) || IS_IN (libcpu_rt_c) # define mempcpy __redirect_mempcpy # define __mempcpy __redirect___mempcpy # define NO_MEMPCPY_STPCPY_REDIRECT @@ -30,13 +30,19 @@ # define SYMBOL_NAME mempcpy # include "ifunc-memmove.h" +# if IS_IN (libcpu_rt_c) +# define __mempcpy mempcpy +# endif + libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR ()); +# if !IS_IN (libcpu_rt_c) weak_alias (__mempcpy, mempcpy) -# ifdef SHARED +# ifdef SHARED __hidden_ver1 (__mempcpy, __GI___mempcpy, __redirect___mempcpy) __attribute__ ((visibility ("hidden"))); __hidden_ver1 (mempcpy, __GI_mempcpy, __redirect_mempcpy) __attribute__ ((visibility ("hidden"))); +# endif # endif #endif