From patchwork Tue Apr 26 12:07:49 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stefan Liebler X-Patchwork-Id: 11884 Received: (qmail 102827 invoked by alias); 26 Apr 2016 12:08:42 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 102610 invoked by uid 89); 26 Apr 2016 12:08:40 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-0.9 required=5.0 tests=AWL, BAYES_00, KAM_LAZY_DOMAIN_SECURITY, RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy=*__restrict, use_multiarch, z10, USE_MULTIARCH X-HELO: e06smtp16.uk.ibm.com X-IBM-Helo: d06dlp02.portsmouth.uk.ibm.com X-IBM-MailFrom: stli@linux.vnet.ibm.com X-IBM-RcptTo: libc-alpha@sourceware.org From: Stefan Liebler To: libc-alpha@sourceware.org Cc: carlos@redhat.com, Wilco.Dijkstra@arm.com, neleai@seznam.cz, Stefan Liebler Subject: [PATCH 4/4] S390: Implement mempcpy with help of memcpy. [BZ #19765] Date: Tue, 26 Apr 2016 14:07:49 +0200 Message-Id: <1461672469-2107-4-git-send-email-stli@linux.vnet.ibm.com> In-Reply-To: <1461672469-2107-1-git-send-email-stli@linux.vnet.ibm.com> References: <1461672469-2107-1-git-send-email-stli@linux.vnet.ibm.com> X-TM-AS-MML: disable X-Content-Scanned: Fidelis XPS MAILER x-cbid: 16042612-0025-0000-0000-000013560C83 There exist optimized memcpy functions on s390, but no optimized mempcpy. This patch adds mempcpy entry points in memcpy.S files, which use the memcpy implementation. Now mempcpy itself is also an IFUNC function as memcpy is and the variants are listed in ifunc-impl-list.c. The s390 string.h defines _HAVE_STRING_ARCH_mempcpy and __mempcpy/mempcpy definitions. If n is constant and small enough, __mempcpy/mempcpy is an inlined function, which uses memcpy and returns dest + n. In this constant case, GCC emits instructions like mvi or mvc and avoids the function call to memcpy. If n is not constant, then __mempcpy is called. ChangeLog: [BZ #19765] * sysdeps/s390/bits/string.h (mempcpy) Redirect to __mempcpy_inline_memcpy or __mempcpy_inline_mempcpy. (__mempcpy) Likewise. (__mempcpy_inline_mempcpy): New inline function. (__mempcpy_inline_memcpy): Likewise. (_HAVE_STRING_ARCH_mempcpy): New define. * sysdeps/s390/mempcpy.S: New File. * sysdeps/s390/multiarch/mempcpy.c: Likewise. * sysdeps/s390/multiarch/Makefile (sysdep_routines): Add mempcpy. * sysdeps/s390/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Add mempcpy variants. * sysdeps/s390/s390-32/memcpy.S: Add mempcpy entry point. (memcpy): Adjust to be usable from mempcpy entry point. (__memcpy_mvcle): Likewise. * sysdeps/s390/s390-64/memcpy.S: Likewise. * sysdeps/s390/s390-32/multiarch/memcpy-s390.S: Add entry points ____mempcpy_z196, ____mempcpy_z10 and add __GI_ symbols for mempcpy. (__memcpy_z196): Adjust to be usable from mempcpy entry point. (__memcpy_z10): Likewise. * sysdeps/s390/s390-64/multiarch/memcpy-s390x.S: Likewise. --- sysdeps/s390/bits/string.h | 36 +++++++++++++++++++ sysdeps/s390/mempcpy.S | 19 ++++++++++ sysdeps/s390/multiarch/Makefile | 3 +- sysdeps/s390/multiarch/ifunc-impl-list.c | 7 ++++ sysdeps/s390/multiarch/mempcpy.c | 26 ++++++++++++++ sysdeps/s390/s390-32/memcpy.S | 50 ++++++++++++++++----------- sysdeps/s390/s390-32/multiarch/memcpy-s390.S | 31 +++++++++++++++-- sysdeps/s390/s390-64/memcpy.S | 47 ++++++++++++++----------- sysdeps/s390/s390-64/multiarch/memcpy-s390x.S | 29 ++++++++++++++-- 9 files changed, 203 insertions(+), 45 deletions(-) create mode 100644 sysdeps/s390/mempcpy.S create mode 100644 sysdeps/s390/multiarch/mempcpy.c diff --git a/sysdeps/s390/bits/string.h b/sysdeps/s390/bits/string.h index 39e0b7f..eae3c8e 100644 --- a/sysdeps/s390/bits/string.h +++ b/sysdeps/s390/bits/string.h @@ -24,6 +24,42 @@ /* Use the unaligned string inline ABI. */ #define _STRING_INLINE_unaligned 1 +#if defined __USE_GNU && defined __OPTIMIZE__ \ + && defined __extern_always_inline && __GNUC_PREREQ (3,2) +# if !defined _FORCE_INLINES && !defined _HAVE_STRING_ARCH_mempcpy + +/* Don't inline mempcpy into memcpy as s390 has an optimized mempcpy. */ +# define _HAVE_STRING_ARCH_mempcpy 1 + +__extern_always_inline void * +__mempcpy_inline_memcpy (void *__restrict __dest, + const void *__restrict __src, size_t __n) +{ + return (char *) memcpy (__dest, __src, __n) + __n; +} + +__extern_always_inline void * +__mempcpy_inline_mempcpy (void *__restrict __dest, + const void *__restrict __src, size_t __n) +{ + return __mempcpy (__dest, __src, __n); +} + +/* If n is constant and small enough, __mempcpy/mempcpy is an inlined function, + which uses memcpy and returns dest + n. In this constant case, GCC emits + instructions like mvi or mvc and avoids the function call to memcpy. + If n is not constant, then __mempcpy is called. */ +# undef mempcpy +# undef __mempcpy +# define __mempcpy(dest, src, n) \ + (__extension__ (__builtin_constant_p (n) && n <= 65536 \ + ? __mempcpy_inline_memcpy (dest, src, n) \ + : __mempcpy_inline_mempcpy (dest, src, n))) +# define mempcpy(dest, src, n) __mempcpy (dest, src, n) + +# endif +#endif + /* We only provide optimizations if the user selects them and if GNU CC is used. */ #if !defined __NO_STRING_INLINES && defined __USE_STRING_INLINES \ diff --git a/sysdeps/s390/mempcpy.S b/sysdeps/s390/mempcpy.S new file mode 100644 index 0000000..c62074b --- /dev/null +++ b/sysdeps/s390/mempcpy.S @@ -0,0 +1,19 @@ +/* CPU specific mempcpy without multiarch - 32/64 bit S/390 version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* mempcpy is implemented in memcpy.S. */ diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile index 0805b07..89324ca 100644 --- a/sysdeps/s390/multiarch/Makefile +++ b/sysdeps/s390/multiarch/Makefile @@ -18,7 +18,8 @@ sysdep_routines += strlen strlen-vx strlen-c \ memchr memchr-vx \ rawmemchr rawmemchr-vx rawmemchr-c \ memccpy memccpy-vx memccpy-c \ - memrchr memrchr-vx memrchr-c + memrchr memrchr-vx memrchr-c \ + mempcpy endif ifeq ($(subdir),wcsmbs) diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index 62a4359..89898ec 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -69,6 +69,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, S390_IS_Z10 (stfle_bits), __memcpy_z10) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_default)) + IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, + S390_IS_Z196 (stfle_bits), ____mempcpy_z196) + IFUNC_IMPL_ADD (array, i, mempcpy, + S390_IS_Z10 (stfle_bits), ____mempcpy_z10) + IFUNC_IMPL_ADD (array, i, mempcpy, 1, ____mempcpy_default)) + #endif /* SHARED */ #ifdef HAVE_S390_VX_ASM_SUPPORT diff --git a/sysdeps/s390/multiarch/mempcpy.c b/sysdeps/s390/multiarch/mempcpy.c new file mode 100644 index 0000000..34d8329 --- /dev/null +++ b/sysdeps/s390/multiarch/mempcpy.c @@ -0,0 +1,26 @@ +/* Multiple versions of mempcpy. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + + +#if defined SHARED && IS_IN (libc) +# include +s390_libc_ifunc (__mempcpy) + +__asm__ (".weak mempcpy\n\t" + ".set mempcpy,__mempcpy\n\t"); +#endif diff --git a/sysdeps/s390/s390-32/memcpy.S b/sysdeps/s390/s390-32/memcpy.S index 2ac51ab..6be5104 100644 --- a/sysdeps/s390/s390-32/memcpy.S +++ b/sysdeps/s390/s390-32/memcpy.S @@ -25,12 +25,23 @@ %r3 = address of source memory area %r4 = number of bytes to copy. */ -#ifdef USE_MULTIARCH -ENTRY(__memcpy_default) -#else -ENTRY(memcpy) + .text +ENTRY(__mempcpy) + .machine "g5" + lr %r1,%r2 # Use as dest + la %r2,0(%r4,%r2) # Return dest + n + j .L_G5_start +END(__mempcpy) +#ifndef USE_MULTIARCH +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) #endif + +ENTRY(memcpy) .machine "g5" + lr %r1,%r2 # r1: Use as dest ; r2: Return dest +.L_G5_start: st %r13,52(%r15) .cfi_offset 13, -44 basr %r13,0 @@ -41,14 +52,13 @@ ENTRY(memcpy) lr %r5,%r4 srl %r5,8 ltr %r5,%r5 - lr %r1,%r2 jne .L_G5_13 ex %r4,.L_G5_17-.L_G5_16(%r13) .L_G5_4: l %r13,52(%r15) br %r14 .L_G5_13: - chi %r5,4096 # Switch to mvcle for copies >1MB + chi %r5,4096 # Switch to mvcle for copies >1MB jh __memcpy_mvcle .L_G5_12: mvc 0(256,%r1),0(%r3) @@ -59,24 +69,24 @@ ENTRY(memcpy) j .L_G5_4 .L_G5_17: mvc 0(1,%r1),0(%r3) -#ifdef USE_MULTIARCH -END(__memcpy_default) -#else END(memcpy) +#ifndef USE_MULTIARCH libc_hidden_builtin_def (memcpy) #endif ENTRY(__memcpy_mvcle) - # Using as standalone function will result in unexpected - # results since the length field is incremented by 1 in order to - # compensate the changes already done in the functions above. - ahi %r4,1 # length + 1 - lr %r5,%r4 # source length - lr %r4,%r3 # source address - lr %r3,%r5 # destination length = source length + # Using as standalone function will result in unexpected + # results since the length field is incremented by 1 in order to + # compensate the changes already done in the functions above. + lr %r0,%r2 # backup return dest [ + n ] + ahi %r4,1 # length + 1 + lr %r5,%r4 # source length + lr %r4,%r3 # source address + lr %r2,%r1 # destination address + lr %r3,%r5 # destination length = source length .L_MVCLE_1: - mvcle %r2,%r4,0 # thats it, MVCLE is your friend - jo .L_MVCLE_1 - lr %r2,%r1 # return destination address - br %r14 + mvcle %r2,%r4,0 # thats it, MVCLE is your friend + jo .L_MVCLE_1 + lr %r2,%r0 # return destination address + br %r14 END(__memcpy_mvcle) diff --git a/sysdeps/s390/s390-32/multiarch/memcpy-s390.S b/sysdeps/s390/s390-32/multiarch/memcpy-s390.S index 92ffaea..297a894 100644 --- a/sysdeps/s390/s390-32/multiarch/memcpy-s390.S +++ b/sysdeps/s390/s390-32/multiarch/memcpy-s390.S @@ -29,14 +29,23 @@ #if defined SHARED && IS_IN (libc) +ENTRY(____mempcpy_z196) + .machine "z196" + .machinemode "zarch_nohighgprs" + lr %r1,%r2 # Use as dest + la %r2,0(%r4,%r2) # Return dest + n + j .L_Z196_start +END(____mempcpy_z196) + ENTRY(__memcpy_z196) .machine "z196" .machinemode "zarch_nohighgprs" + lr %r1,%r2 # r1: Use as dest ; r2: Return dest +.L_Z196_start: llgfr %r4,%r4 ltgr %r4,%r4 je .L_Z196_4 aghi %r4,-1 - lr %r1,%r2 srlg %r5,%r4,8 ltgr %r5,%r5 jne .L_Z196_5 @@ -60,13 +69,22 @@ ENTRY(__memcpy_z196) mvc 0(1,%r1),0(%r3) END(__memcpy_z196) +ENTRY(____mempcpy_z10) + .machine "z10" + .machinemode "zarch_nohighgprs" + lr %r1,%r2 # Use as dest + la %r2,0(%r4,%r2) # Return dest + n + j .L_Z10_start +END(____mempcpy_z10) + ENTRY(__memcpy_z10) .machine "z10" .machinemode "zarch_nohighgprs" + lr %r1,%r2 # r1: Use as dest ; r2: Return dest +.L_Z10_start: llgfr %r4,%r4 cgije %r4,0,.L_Z10_4 aghi %r4,-1 - lr %r1,%r2 srlg %r5,%r4,8 cgijlh %r5,0,.L_Z10_13 .L_Z10_3: @@ -88,14 +106,23 @@ ENTRY(__memcpy_z10) mvc 0(1,%r1),0(%r3) END(__memcpy_z10) +# define __mempcpy ____mempcpy_default #endif /* SHARED && IS_IN (libc) */ +#define memcpy __memcpy_default #include "../memcpy.S" +#undef memcpy #if defined SHARED && IS_IN (libc) .globl __GI_memcpy .set __GI_memcpy,__memcpy_default +.globl __GI_mempcpy +.set __GI_mempcpy,____mempcpy_default +.globl __GI___mempcpy +.set __GI___mempcpy,____mempcpy_default #else .globl memcpy .set memcpy,__memcpy_default +.weak mempcpy +.set mempcpy,__mempcpy #endif diff --git a/sysdeps/s390/s390-64/memcpy.S b/sysdeps/s390/s390-64/memcpy.S index 9d60a14..e3ace0c 100644 --- a/sysdeps/s390/s390-64/memcpy.S +++ b/sysdeps/s390/s390-64/memcpy.S @@ -27,19 +27,27 @@ .text +ENTRY(__mempcpy) + .machine "z900" + lgr %r1,%r2 # Use as dest + la %r2,0(%r4,%r2) # Return dest + n + j .L_Z900_start +END(__mempcpy) +#ifndef USE_MULTIARCH +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) +#endif -#ifdef USE_MULTIARCH -ENTRY(__memcpy_default) -#else ENTRY(memcpy) -#endif .machine "z900" + lgr %r1,%r2 # r1: Use as dest ; r2: Return dest +.L_Z900_start: ltgr %r4,%r4 je .L_Z900_4 aghi %r4,-1 srlg %r5,%r4,8 ltgr %r5,%r5 - lgr %r1,%r2 jne .L_Z900_13 .L_Z900_3: larl %r5,.L_Z900_15 @@ -57,25 +65,24 @@ ENTRY(memcpy) j .L_Z900_3 .L_Z900_15: mvc 0(1,%r1),0(%r3) - -#ifdef USE_MULTIARCH -END(__memcpy_default) -#else END(memcpy) +#ifndef USE_MULTIARCH libc_hidden_builtin_def (memcpy) #endif ENTRY(__memcpy_mvcle) - # Using as standalone function will result in unexpected - # results since the length field is incremented by 1 in order to - # compensate the changes already done in the functions above. - aghi %r4,1 # length + 1 - lgr %r5,%r4 # source length - lgr %r4,%r3 # source address - lgr %r3,%r5 # destination length = source length + # Using as standalone function will result in unexpected + # results since the length field is incremented by 1 in order to + # compensate the changes already done in the functions above. + lgr %r0,%r2 # backup return dest [ + n ] + aghi %r4,1 # length + 1 + lgr %r5,%r4 # source length + lgr %r4,%r3 # source address + lgr %r2,%r1 # destination address + lgr %r3,%r5 # destination length = source length .L_MVCLE_1: - mvcle %r2,%r4,0 # thats it, MVCLE is your friend - jo .L_MVCLE_1 - lgr %r2,%r1 # return destination address - br %r14 + mvcle %r2,%r4,0 # thats it, MVCLE is your friend + jo .L_MVCLE_1 + lgr %r2,%r0 # return destination address + br %r14 END(__memcpy_mvcle) diff --git a/sysdeps/s390/s390-64/multiarch/memcpy-s390x.S b/sysdeps/s390/s390-64/multiarch/memcpy-s390x.S index 8f54526..0f5a36e 100644 --- a/sysdeps/s390/s390-64/multiarch/memcpy-s390x.S +++ b/sysdeps/s390/s390-64/multiarch/memcpy-s390x.S @@ -29,12 +29,20 @@ #if defined SHARED && IS_IN (libc) +ENTRY(____mempcpy_z196) + .machine "z196" + lgr %r1,%r2 # Use as dest + la %r2,0(%r4,%r2) # Return dest + n + j .L_Z196_start +END(____mempcpy_z196) + ENTRY(__memcpy_z196) .machine "z196" + lgr %r1,%r2 # r1: Use as dest ; r2: Return dest +.L_Z196_start: ltgr %r4,%r4 je .L_Z196_4 aghi %r4,-1 - lgr %r1,%r2 srlg %r5,%r4,8 ltgr %r5,%r5 jne .L_Z196_5 @@ -58,11 +66,19 @@ ENTRY(__memcpy_z196) mvc 0(1,%r1),0(%r3) END(__memcpy_z196) +ENTRY(____mempcpy_z10) + .machine "z10" + lgr %r1,%r2 # Use as dest + la %r2,0(%r4,%r2) # Return dest + n + j .L_Z10_start +END(____mempcpy_z10) + ENTRY(__memcpy_z10) .machine "z10" + lgr %r1,%r2 # r1: Use as dest ; r2: Return dest +.L_Z10_start: cgije %r4,0,.L_Z10_4 aghi %r4,-1 - lgr %r1,%r2 srlg %r5,%r4,8 cgijlh %r5,0,.L_Z10_13 .L_Z10_3: @@ -84,14 +100,23 @@ ENTRY(__memcpy_z10) mvc 0(1,%r1),0(%r3) END(__memcpy_z10) +# define __mempcpy ____mempcpy_default #endif /* SHARED && IS_IN (libc) */ +#define memcpy __memcpy_default #include "../memcpy.S" +#undef memcpy #if defined SHARED && IS_IN (libc) .globl __GI_memcpy .set __GI_memcpy,__memcpy_default +.globl __GI_mempcpy +.set __GI_mempcpy,____mempcpy_default +.globl __GI___mempcpy +.set __GI___mempcpy,____mempcpy_default #else .globl memcpy .set memcpy,__memcpy_default +.weak mempcpy +.set mempcpy,__mempcpy #endif