From patchwork Fri Aug 11 07:14:13 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Siddhesh Poyarekar X-Patchwork-Id: 22069 Received: (qmail 48633 invoked by alias); 11 Aug 2017 07:14:44 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 34006 invoked by uid 89); 11 Aug 2017 07:14:28 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-26.1 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_NEUTRAL autolearn=ham version=3.3.2 spammy=Hx-languages-length:4962, 2810 X-HELO: homiemail-a119.g.dreamhost.com From: Siddhesh Poyarekar To: libc-alpha@sourceware.org Subject: [PATCH 1/2] aarch64: Add multiarch variants of __memcpy_chk Date: Fri, 11 Aug 2017 12:44:13 +0530 Message-Id: <1502435654-18032-2-git-send-email-siddhesh@sourceware.org> In-Reply-To: <1502435654-18032-1-git-send-email-siddhesh@sourceware.org> References: <1502435654-18032-1-git-send-email-siddhesh@sourceware.org> In its current form, __memcpy_chk calls the generic version of memcpy, thereby defeating the point of the optimization. Add multiarch __memcpy_chk variant snippets that call their respective memcpy functions. * sysdeps/aarch64/memcpy.S * sysdeps/aarch64/multiarch/Makefile * sysdeps/aarch64/multiarch/ifunc-impl-list.c * sysdeps/aarch64/multiarch/memcpy_falkor.S * sysdeps/aarch64/multiarch/memcpy_generic.S * sysdeps/aarch64/multiarch/memcpy_thunderx.S --- sysdeps/aarch64/memcpy.S | 16 +++++++++++++++- sysdeps/aarch64/multiarch/Makefile | 4 ++++ sysdeps/aarch64/multiarch/ifunc-impl-list.c | 4 ++++ sysdeps/aarch64/multiarch/memcpy_falkor.S | 13 ++++++++++++- sysdeps/aarch64/multiarch/memcpy_generic.S | 5 +++++ sysdeps/aarch64/multiarch/memcpy_thunderx.S | 13 ++++++++++++- 6 files changed, 52 insertions(+), 3 deletions(-) diff --git a/sysdeps/aarch64/memcpy.S b/sysdeps/aarch64/memcpy.S index 88a3b90..18ef29e 100644 --- a/sysdeps/aarch64/memcpy.S +++ b/sysdeps/aarch64/memcpy.S @@ -27,6 +27,7 @@ #define dstin x0 #define src x1 #define count x2 +#define dstlen x3 #define dst x3 #define srcend x4 #define dstend x5 @@ -66,7 +67,20 @@ # define MEMCPY memcpy #endif -ENTRY_ALIGN (MEMMOVE, 6) +#if defined SHARED && IS_IN (libc) +#ifndef MEMCPY_CHK +# define MEMCPY_CHK __memcpy_chk +#endif + +ENTRY_ALIGN (MEMCPY_CHK, 6) + cmp dstlen, count + b.lo __chk_fail + b MEMCPY + nop +END (MEMCPY_CHK) +#endif + +ENTRY (MEMMOVE) DELOUSE (0) DELOUSE (1) diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile index 164ba1a..8189eb4 100644 --- a/sysdeps/aarch64/multiarch/Makefile +++ b/sysdeps/aarch64/multiarch/Makefile @@ -1,3 +1,7 @@ ifeq ($(subdir),string) sysdep_routines += memcpy_generic memcpy_thunderx memcpy_falkor endif + +ifeq ($(subdir),debug) +sysdep_routines += memcpy_chk-nonshared +endif diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c index 8e873b3..d534818 100644 --- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c +++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c @@ -42,6 +42,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_thunderx) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_falkor) IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic)) + IFUNC_IMPL (i, name, __memcpy_chk, + IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, __memcpy_chk_thunderx) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, __memcpy_chk_falkor) + IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1, __memcpy_chk_generic)) IFUNC_IMPL (i, name, memmove, IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_thunderx) IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic)) diff --git a/sysdeps/aarch64/multiarch/memcpy_falkor.S b/sysdeps/aarch64/multiarch/memcpy_falkor.S index dea4f22..181940f 100644 --- a/sysdeps/aarch64/multiarch/memcpy_falkor.S +++ b/sysdeps/aarch64/multiarch/memcpy_falkor.S @@ -26,6 +26,7 @@ #define dstin x0 #define src x1 #define count x2 +#define dstlen x3 #define dst x3 #define srcend x4 #define dstend x5 @@ -60,7 +61,17 @@ The non-temporal stores help optimize cache utilization. */ #if IS_IN (libc) -ENTRY_ALIGN (__memcpy_falkor, 6) + +# ifdef SHARED +ENTRY_ALIGN (__memcpy_chk_falkor, 6) + cmp dstlen, count + b.lo __chk_fail + nop + nop +END (__memcpy_chk_falkor) +# endif + +ENTRY(__memcpy_falkor) cmp count, 32 add srcend, src, count diff --git a/sysdeps/aarch64/multiarch/memcpy_generic.S b/sysdeps/aarch64/multiarch/memcpy_generic.S index edb2e52..43f6a75 100644 --- a/sysdeps/aarch64/multiarch/memcpy_generic.S +++ b/sysdeps/aarch64/multiarch/memcpy_generic.S @@ -28,6 +28,10 @@ # define MEMCPY __memcpy_generic # define MEMMOVE __memmove_generic +# ifdef SHARED +# define MEMCPY_CHK __memcpy_chk_generic +# endif + /* Do not hide the generic versions of memcpy and memmove, we use them internally. */ # undef libc_hidden_builtin_def @@ -37,6 +41,7 @@ /* It doesn't make sense to send libc-internal memcpy calls through a PLT. */ .globl __GI_memcpy; __GI_memcpy = __memcpy_generic .globl __GI_memmove; __GI_memmove = __memmove_generic + .globl __GI___memcpy_chk; __GI___memcpy_chk = __memcpy_chk_generic # endif #endif diff --git a/sysdeps/aarch64/multiarch/memcpy_thunderx.S b/sysdeps/aarch64/multiarch/memcpy_thunderx.S index 5ac9e34..2f48290 100644 --- a/sysdeps/aarch64/multiarch/memcpy_thunderx.S +++ b/sysdeps/aarch64/multiarch/memcpy_thunderx.S @@ -33,6 +33,7 @@ #define dstin x0 #define src x1 #define count x2 +#define dstlen x3 #define dst x3 #define srcend x4 #define dstend x5 @@ -80,7 +81,17 @@ # define MEMMOVE __memmove_thunderx # define USE_THUNDERX -ENTRY_ALIGN (MEMMOVE, 6) +#ifdef SHARED +#define MEMCPY_CHK __memcpy_chk_thunderx +ENTRY_ALIGN (MEMCPY_CHK, 6) + cmp dstlen, count + b.lo __chk_fail + b MEMCPY + nop +END (MEMCPY_CHK) +#endif + +ENTRY (MEMMOVE) DELOUSE (0) DELOUSE (1)