From patchwork Wed Aug 26 13:46:31 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "H.J. Lu" X-Patchwork-Id: 8452 Received: (qmail 36633 invoked by alias); 26 Aug 2015 13:46:43 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 36561 invoked by uid 89); 26 Aug 2015 13:46:38 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=0.1 required=5.0 tests=AWL, BAYES_50, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-pa0-f45.google.com X-Received: by 10.66.219.163 with SMTP id pp3mr4098000pac.8.1440596793416; Wed, 26 Aug 2015 06:46:33 -0700 (PDT) Date: Wed, 26 Aug 2015 06:46:31 -0700 From: "H.J. Lu" To: GNU C Library Subject: [PATCH 3/3] Add i386 memset and memcpy assembly functions Message-ID: <20150826134631.GC19484@gmail.com> MIME-Version: 1.0 Content-Disposition: inline User-Agent: Mutt/1.5.23 (2014-03-12) Add i386 memset and memcpy assembly functions with REP MOVSB/STOSB instructions. They will be used to implement i386 multi-arch memcpy. OK for master? H.J. --- * sysdeps/i386/bcopy.S: New file. * sysdeps/i386/bzero.S: Likewise. * sysdeps/i386/memcpy.S: Likewise. * sysdeps/i386/memmove.S: Likewise. * sysdeps/i386/mempcpy.S: Likewise. * sysdeps/i386/memset.S: Likewise. * sysdeps/i386/bzero.c: Removed. * sysdeps/i386/memset.c: Likewise. * sysdeps/i386/i586/memcpy_chk.S: Likewise. * sysdeps/i386/i586/mempcpy_chk.S: Likewise. * sysdeps/i386/i586/memset_chk.S: Likewise. * sysdeps/i386/i686/memcpy_chk.S: Moved to ... * sysdeps/i386/memcpy_chk.S: Here. * sysdeps/i386/i686/memmove_chk.S: Moved to ... * sysdeps/i386/memmove_chk.S: Here. * sysdeps/i386/i686/mempcpy_chk.S: Moved to ... * sysdeps/i386/mempcpy_chk.S: Likewise. * sysdeps/i386/i686/memset_chk.S: Moved to ... * sysdeps/i386/memset_chk.S: Likewise. --- sysdeps/i386/bcopy.S | 4 ++ sysdeps/i386/bzero.S | 5 ++ sysdeps/i386/bzero.c | 82 ------------------------------ sysdeps/i386/i586/memcpy_chk.S | 1 - sysdeps/i386/i586/mempcpy_chk.S | 1 - sysdeps/i386/i586/memset_chk.S | 1 - sysdeps/i386/memcpy.S | 95 +++++++++++++++++++++++++++++++++++ sysdeps/i386/{i686 => }/memcpy_chk.S | 8 +-- sysdeps/i386/memmove.S | 4 ++ sysdeps/i386/{i686 => }/memmove_chk.S | 15 +++--- sysdeps/i386/mempcpy.S | 7 +++ sysdeps/i386/{i686 => }/mempcpy_chk.S | 15 +++--- sysdeps/i386/memset.S | 68 +++++++++++++++++++++++++ sysdeps/i386/memset.c | 85 ------------------------------- sysdeps/i386/{i686 => }/memset_chk.S | 15 +++--- 15 files changed, 208 insertions(+), 198 deletions(-) create mode 100644 sysdeps/i386/bcopy.S create mode 100644 sysdeps/i386/bzero.S delete mode 100644 sysdeps/i386/bzero.c delete mode 100644 sysdeps/i386/i586/memcpy_chk.S delete mode 100644 sysdeps/i386/i586/mempcpy_chk.S delete mode 100644 sysdeps/i386/i586/memset_chk.S create mode 100644 sysdeps/i386/memcpy.S rename sysdeps/i386/{i686 => }/memcpy_chk.S (92%) create mode 100644 sysdeps/i386/memmove.S rename sysdeps/i386/{i686 => }/memmove_chk.S (78%) create mode 100644 sysdeps/i386/mempcpy.S rename sysdeps/i386/{i686 => }/mempcpy_chk.S (78%) create mode 100644 sysdeps/i386/memset.S delete mode 100644 sysdeps/i386/memset.c rename sysdeps/i386/{i686 => }/memset_chk.S (79%) diff --git a/sysdeps/i386/bcopy.S b/sysdeps/i386/bcopy.S new file mode 100644 index 0000000..12b8ddb --- /dev/null +++ b/sysdeps/i386/bcopy.S @@ -0,0 +1,4 @@ +#define USE_AS_MEMMOVE +#define USE_AS_BCOPY +#define MEMCPY bcopy +#include "memcpy.S" diff --git a/sysdeps/i386/bzero.S b/sysdeps/i386/bzero.S new file mode 100644 index 0000000..c8dd47b --- /dev/null +++ b/sysdeps/i386/bzero.S @@ -0,0 +1,5 @@ +#define USE_AS_BZERO +#define memset __bzero +#include "memset.S" + +weak_alias (__bzero, bzero) diff --git a/sysdeps/i386/bzero.c b/sysdeps/i386/bzero.c deleted file mode 100644 index 1a89444..0000000 --- a/sysdeps/i386/bzero.c +++ /dev/null @@ -1,82 +0,0 @@ -/* bzero -- set a block of memory to zero. For Intel 80x86, x>=3. - This file is part of the GNU C Library. - Copyright (C) 1991-2015 Free Software Foundation, Inc. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -#undef bzero -#undef __bzero - -#ifdef __GNUC__ - -void -__bzero (dstpp, len) - void *dstpp; - size_t len; -{ - /* N.B.: This code is almost verbatim from memset.c. */ - int d0; - unsigned long int dstp = (unsigned long int) dstpp; - - /* This explicit register allocation - improves code very much indeed. */ - register op_t x asm ("ax"); - - x = 0; - - /* Clear the direction flag, so filling will move forward. */ - asm volatile ("cld"); - - /* This threshold value is optimal. */ - if (len >= 12) - { - /* Adjust LEN for the bytes handled in the first loop. */ - len -= (-dstp) % OPSIZ; - - /* There are at least some bytes to set. - No need to test for LEN == 0 in this alignment loop. */ - - /* Fill bytes until DSTP is aligned on a longword boundary. */ - asm volatile ("rep\n" - "stosb" /* %0, %2, %3 */ : - "=D" (dstp), "=c" (d0) : - "0" (dstp), "1" ((-dstp) % OPSIZ), "a" (x) : - "memory"); - - /* Fill longwords. */ - asm volatile ("rep\n" - "stosl" /* %0, %2, %3 */ : - "=D" (dstp), "=c" (d0) : - "0" (dstp), "1" (len / OPSIZ), "a" (x) : - "memory"); - len %= OPSIZ; - } - - /* Write the last few bytes. */ - asm volatile ("rep\n" - "stosb" /* %0, %2, %3 */ : - "=D" (dstp), "=c" (d0) : - "0" (dstp), "c" (len), "a" (x) : - "memory"); -} -weak_alias (__bzero, bzero) - -#else -#include -#endif diff --git a/sysdeps/i386/i586/memcpy_chk.S b/sysdeps/i386/i586/memcpy_chk.S deleted file mode 100644 index ab8a95c..0000000 --- a/sysdeps/i386/i586/memcpy_chk.S +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/sysdeps/i386/i586/mempcpy_chk.S b/sysdeps/i386/i586/mempcpy_chk.S deleted file mode 100644 index 9a1de1d..0000000 --- a/sysdeps/i386/i586/mempcpy_chk.S +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/sysdeps/i386/i586/memset_chk.S b/sysdeps/i386/i586/memset_chk.S deleted file mode 100644 index 09f9d42..0000000 --- a/sysdeps/i386/i586/memset_chk.S +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/sysdeps/i386/memcpy.S b/sysdeps/i386/memcpy.S new file mode 100644 index 0000000..5f0196e --- /dev/null +++ b/sysdeps/i386/memcpy.S @@ -0,0 +1,95 @@ +/* memcpy with REP MOVSB/STOSB + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include "asm-syntax.h" + +#ifndef MEMCPY +# define MEMCPY memcpy +# define MEMCPY_CHK __memcpy_chk +#endif + +#ifdef USE_AS_BCOPY +# define STR2 12 +# define STR1 STR2+4 +# define N STR1+4 +#else +# define STR1 12 +# define STR2 STR1+4 +# define N STR2+4 +#endif + +#define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +#define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) + + .text +#if defined SHARED && IS_IN (libc) && !defined USE_AS_BCOPY +ENTRY (MEMCPY_CHK) + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb HIDDEN_JUMPTARGET (__chk_fail) +END (MEMCPY_CHK) +#endif +ENTRY (MEMCPY) + PUSH (%esi) + PUSH (%edi) + movl N(%esp), %ecx + movl STR1(%esp), %edi + movl STR2(%esp), %esi + mov %edi, %eax +#ifdef USE_AS_MEMPCPY + add %ecx, %eax +#endif + +#ifdef USE_AS_MEMMOVE + cmp %esi, %edi + ja L(copy_backward) + je L(bwd_write_0bytes) +#endif + + rep movsb + POP (%edi) + POP (%esi) + ret + +#ifdef USE_AS_MEMMOVE +L(copy_backward): + lea -1(%edi,%ecx), %edi + lea -1(%esi,%ecx), %esi + std + rep movsb + cld +L(bwd_write_0bytes): + POP (%edi) + POP (%esi) + ret +#endif + +END (MEMCPY) + +#ifndef USE_AS_BCOPY +libc_hidden_builtin_def (MEMCPY) +#endif diff --git a/sysdeps/i386/i686/memcpy_chk.S b/sysdeps/i386/memcpy_chk.S similarity index 92% rename from sysdeps/i386/i686/memcpy_chk.S rename to sysdeps/i386/memcpy_chk.S index cdf807f..b3b25de 100644 --- a/sysdeps/i386/i686/memcpy_chk.S +++ b/sysdeps/i386/memcpy_chk.S @@ -1,4 +1,4 @@ -/* Checking memcpy for i686. +/* Checking memcpy for i386. Copyright (C) 2004-2015 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,10 +16,10 @@ License along with the GNU C Library; if not, see . */ -#include -#include "asm-syntax.h" +#ifndef SHARED +# include +# include "asm-syntax.h" -#ifndef PIC /* For libc.so this is defined in memcpy.S. For libc.a, this is a separate source to avoid memcpy bringing in __chk_fail and all routines diff --git a/sysdeps/i386/memmove.S b/sysdeps/i386/memmove.S new file mode 100644 index 0000000..60a45d2 --- /dev/null +++ b/sysdeps/i386/memmove.S @@ -0,0 +1,4 @@ +#define USE_AS_MEMMOVE +#define MEMCPY memmove +#define MEMCPY_CHK __memmove_chk +#include "memcpy.S" diff --git a/sysdeps/i386/i686/memmove_chk.S b/sysdeps/i386/memmove_chk.S similarity index 78% rename from sysdeps/i386/i686/memmove_chk.S rename to sysdeps/i386/memmove_chk.S index 64bf9e0..26d2abd 100644 --- a/sysdeps/i386/i686/memmove_chk.S +++ b/sysdeps/i386/memmove_chk.S @@ -1,4 +1,4 @@ -/* Checking memmove for x86-64. +/* Checking memmove for i386 Copyright (C) 2004-2015 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,14 +16,13 @@ License along with the GNU C Library; if not, see . */ -#include -#include "asm-syntax.h" +#ifndef SHARED +# include +# include "asm-syntax.h" -#ifndef PIC - /* For libc.so this is defined in memmove.S. - For libc.a, this is a separate source to avoid - memmove bringing in __chk_fail and all routines - it calls. */ +/* For libc.so this is defined in memmove.S. For libc.a, this is a + separate source to avoid memmove bringing in __chk_fail and all + routines it calls. */ .text ENTRY (__memmove_chk) movl 12(%esp), %eax diff --git a/sysdeps/i386/mempcpy.S b/sysdeps/i386/mempcpy.S new file mode 100644 index 0000000..61addb7 --- /dev/null +++ b/sysdeps/i386/mempcpy.S @@ -0,0 +1,7 @@ +#define USE_AS_MEMPCPY +#define MEMCPY __mempcpy +#define MEMCPY_CHK __mempcpy_chk +#include "memcpy.S" + +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) diff --git a/sysdeps/i386/i686/mempcpy_chk.S b/sysdeps/i386/mempcpy_chk.S similarity index 78% rename from sysdeps/i386/i686/mempcpy_chk.S rename to sysdeps/i386/mempcpy_chk.S index a61757b..05f86c3 100644 --- a/sysdeps/i386/i686/mempcpy_chk.S +++ b/sysdeps/i386/mempcpy_chk.S @@ -1,4 +1,4 @@ -/* Checking mempcpy for x86-64. +/* Checking mempcpy for i386 Copyright (C) 2004-2015 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,14 +16,13 @@ License along with the GNU C Library; if not, see . */ -#include -#include "asm-syntax.h" +#ifndef SHARED +# include +# include "asm-syntax.h" -#ifndef PIC - /* For libc.so this is defined in mempcpy.S. - For libc.a, this is a separate source to avoid - mempcpy bringing in __chk_fail and all routines - it calls. */ +/* For libc.so this is defined in mempcpy.S. For libc.a, this is a + separate source to avoid mempcpy bringing in __chk_fail and all + routines it calls. */ .text ENTRY (__mempcpy_chk) movl 12(%esp), %eax diff --git a/sysdeps/i386/memset.S b/sysdeps/i386/memset.S new file mode 100644 index 0000000..21b3430 --- /dev/null +++ b/sysdeps/i386/memset.S @@ -0,0 +1,68 @@ +/* memset with REP MOVSB/STOSB + Copyright (C) 2015 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define CFI_PUSH(REG) \ + cfi_adjust_cfa_offset (4); \ + cfi_rel_offset (REG, 0) + +#define CFI_POP(REG) \ + cfi_adjust_cfa_offset (-4); \ + cfi_restore (REG) + +#define PUSH(REG) pushl REG; CFI_PUSH (REG) +#define POP(REG) popl REG; CFI_POP (REG) + +#define STR1 8 +#ifdef USE_AS_BZERO +#define N STR1+4 +#else +#define STR2 STR1+4 +#define N STR2+4 +#endif + + .text +#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO +ENTRY (__memset_chk) + movl 12(%esp), %eax + cmpl %eax, 16(%esp) + jb HIDDEN_JUMPTARGET (__chk_fail) +END (__memset_chk) +#endif +ENTRY (memset) + PUSH (%edi) + movl N(%esp), %ecx + movl STR1(%esp), %edi +#ifdef USE_AS_BZERO + xor %eax, %eax +#else + movzbl STR2(%esp), %eax + mov %edi, %edx +#endif + rep stosb +#ifndef USE_AS_BZERO + mov %edx, %eax +#endif + POP (%edi) + ret +END (memset) + +#ifndef USE_AS_BZERO +libc_hidden_builtin_def (memset) +#endif diff --git a/sysdeps/i386/memset.c b/sysdeps/i386/memset.c deleted file mode 100644 index bf11590..0000000 --- a/sysdeps/i386/memset.c +++ /dev/null @@ -1,85 +0,0 @@ -/* Set a block of memory to some byte value. - For Intel 80x86, x>=3. - Copyright (C) 1991-2015 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Torbjorn Granlund (tege@sics.se). - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include -#include - -#ifdef __GNUC__ - -#undef memset - -void * -memset (void *dstpp, int c, size_t len) -{ - int d0; - unsigned long int dstp = (unsigned long int) dstpp; - - /* This explicit register allocation - improves code very much indeed. */ - register op_t x asm("ax"); - - x = (unsigned char) c; - - /* Clear the direction flag, so filling will move forward. */ - asm volatile("cld"); - - /* This threshold value is optimal. */ - if (len >= 12) - { - /* Fill X with four copies of the char we want to fill with. */ - x |= (x << 8); - x |= (x << 16); - - /* Adjust LEN for the bytes handled in the first loop. */ - len -= (-dstp) % OPSIZ; - - /* There are at least some bytes to set. - No need to test for LEN == 0 in this alignment loop. */ - - /* Fill bytes until DSTP is aligned on a longword boundary. */ - asm volatile("rep\n" - "stosb" /* %0, %2, %3 */ : - "=D" (dstp), "=c" (d0) : - "0" (dstp), "1" ((-dstp) % OPSIZ), "a" (x) : - "memory"); - - /* Fill longwords. */ - asm volatile("rep\n" - "stosl" /* %0, %2, %3 */ : - "=D" (dstp), "=c" (d0) : - "0" (dstp), "1" (len / OPSIZ), "a" (x) : - "memory"); - len %= OPSIZ; - } - - /* Write the last few bytes. */ - asm volatile("rep\n" - "stosb" /* %0, %2, %3 */ : - "=D" (dstp), "=c" (d0) : - "0" (dstp), "1" (len), "a" (x) : - "memory"); - - return dstpp; -} -libc_hidden_builtin_def (memset) - -#else -#include -#endif diff --git a/sysdeps/i386/i686/memset_chk.S b/sysdeps/i386/memset_chk.S similarity index 79% rename from sysdeps/i386/i686/memset_chk.S rename to sysdeps/i386/memset_chk.S index da982fd..2312d32 100644 --- a/sysdeps/i386/i686/memset_chk.S +++ b/sysdeps/i386/memset_chk.S @@ -1,4 +1,4 @@ -/* Checking memset for i686. +/* Checking memset for i386. Copyright (C) 2004-2015 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -16,14 +16,13 @@ License along with the GNU C Library; if not, see . */ -#include -#include "asm-syntax.h" - #ifndef SHARED - /* For libc.so this is defined in memset.S. - For libc.a, this is a separate source to avoid - memset bringing in __chk_fail and all routines - it calls. */ +# include +# include "asm-syntax.h" + +/* For libc.so this is defined in memset.S. For libc.a, this is a + separate source to avoid memset bringing in __chk_fail and all + routines it calls. */ .text ENTRY (__memset_chk) movl 12(%esp), %eax