[3/3] Add i386 memset and memcpy assembly functions
Commit Message
Add i386 memset and memcpy assembly functions with REP MOVSB/STOSB
instructions. They will be used to implement i386 multi-arch memcpy.
OK for master?
H.J.
--
* sysdeps/i386/bcopy.S: New file.
* sysdeps/i386/bzero.S: Likewise.
* sysdeps/i386/memcpy.S: Likewise.
* sysdeps/i386/memmove.S: Likewise.
* sysdeps/i386/mempcpy.S: Likewise.
* sysdeps/i386/memset.S: Likewise.
* sysdeps/i386/bzero.c: Removed.
* sysdeps/i386/memset.c: Likewise.
* sysdeps/i386/i586/memcpy_chk.S: Likewise.
* sysdeps/i386/i586/mempcpy_chk.S: Likewise.
* sysdeps/i386/i586/memset_chk.S: Likewise.
* sysdeps/i386/i686/memcpy_chk.S: Moved to ...
* sysdeps/i386/memcpy_chk.S: Here.
* sysdeps/i386/i686/memmove_chk.S: Moved to ...
* sysdeps/i386/memmove_chk.S: Here.
* sysdeps/i386/i686/mempcpy_chk.S: Moved to ...
* sysdeps/i386/mempcpy_chk.S: Likewise.
* sysdeps/i386/i686/memset_chk.S: Moved to ...
* sysdeps/i386/memset_chk.S: Likewise.
---
sysdeps/i386/bcopy.S | 4 ++
sysdeps/i386/bzero.S | 5 ++
sysdeps/i386/bzero.c | 82 ------------------------------
sysdeps/i386/i586/memcpy_chk.S | 1 -
sysdeps/i386/i586/mempcpy_chk.S | 1 -
sysdeps/i386/i586/memset_chk.S | 1 -
sysdeps/i386/memcpy.S | 95 +++++++++++++++++++++++++++++++++++
sysdeps/i386/{i686 => }/memcpy_chk.S | 8 +--
sysdeps/i386/memmove.S | 4 ++
sysdeps/i386/{i686 => }/memmove_chk.S | 15 +++---
sysdeps/i386/mempcpy.S | 7 +++
sysdeps/i386/{i686 => }/mempcpy_chk.S | 15 +++---
sysdeps/i386/memset.S | 68 +++++++++++++++++++++++++
sysdeps/i386/memset.c | 85 -------------------------------
sysdeps/i386/{i686 => }/memset_chk.S | 15 +++---
15 files changed, 208 insertions(+), 198 deletions(-)
create mode 100644 sysdeps/i386/bcopy.S
create mode 100644 sysdeps/i386/bzero.S
delete mode 100644 sysdeps/i386/bzero.c
delete mode 100644 sysdeps/i386/i586/memcpy_chk.S
delete mode 100644 sysdeps/i386/i586/mempcpy_chk.S
delete mode 100644 sysdeps/i386/i586/memset_chk.S
create mode 100644 sysdeps/i386/memcpy.S
rename sysdeps/i386/{i686 => }/memcpy_chk.S (92%)
create mode 100644 sysdeps/i386/memmove.S
rename sysdeps/i386/{i686 => }/memmove_chk.S (78%)
create mode 100644 sysdeps/i386/mempcpy.S
rename sysdeps/i386/{i686 => }/mempcpy_chk.S (78%)
create mode 100644 sysdeps/i386/memset.S
delete mode 100644 sysdeps/i386/memset.c
rename sysdeps/i386/{i686 => }/memset_chk.S (79%)
Comments
On Wed, Aug 26, 2015 at 06:46:31AM -0700, H.J. Lu wrote:
> Add i386 memset and memcpy assembly functions with REP MOVSB/STOSB
> instructions. They will be used to implement i386 multi-arch memcpy.
>
> OK for master?
>
No, as rep stosb has terrible performance on most of machines, on ivy
bridge its around six times slower than rep stosq. I wouldn't be
surprised when you test it for affected machines it would be at least three times
slower than rep stosl on affected machines.
Only exception where you should use rep stosb that I know is haswell.
Perhaps you could adapt this implementation that I used for rep stosq
and change to rep stosl?
.text ;.globl memset_rep8; .type memset_rep8, @function;memset_rep8:; .cfi_startproc
movzbl %sil, %eax
lea (%rdi, %rdx), %rcx
movabsq $72340172838076673, %rsi
imulq %rsi, %rax
cmp $7, %rdx
jbe .Lless_16_bytes
movq %rax, (%rdi)
movq %rdi, %rsi
leaq 8(%rdi), %rdi
movq %rax, -8(%rcx)
andq $-8, %rdi
subq %rdi, %rcx
shrq $3, %rcx
rep stosq
movq %rsi, %rax
ret
.p2align 4
.Lless_16_bytes:
movq %rax, %rsi
movq %rdi, %rax
testb $4, %dl; jne .Lbetween_4_7_bytes
cmp $1, %dl; jbe .Lbetween_0_1_byte
movw %si, -2(%rcx)
movb %sil, (%rdi)
ret
.p2align 3
.Lbetween_4_7_bytes:
movl %esi, (%rdi)
movl %esi, -4(%rcx)
ret
.Lbetween_0_1_byte:
jb .Lzero_byte
movb %sil, (%rdi)
.Lzero_byte:
ret
.cfi_endproc ; .size memset_rep8, .-memset_rep8
On Wed, Aug 26, 2015 at 7:29 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
> On Wed, Aug 26, 2015 at 06:46:31AM -0700, H.J. Lu wrote:
>> Add i386 memset and memcpy assembly functions with REP MOVSB/STOSB
>> instructions. They will be used to implement i386 multi-arch memcpy.
>>
>> OK for master?
>>
> No, as rep stosb has terrible performance on most of machines, on ivy
> bridge its around six times slower than rep stosq. I wouldn't be
>
I added them for i386 memcpy family multiarch functions. We have
memcpy for i586 and i686:
sysdeps/i386/i586/memcpy.S
sysdeps/i386/i686/memcpy.S
But we don't have it for i486. I add them so that I can implement
i386 memset and memcpy family multiarch functions for i486,
i586 and i686 targets. i386 memset and memcpy are used only
when
1. Building glibc for i486 with --disable-multi-arch. Or
2. Processor doesn't support i686 nor SSE2.
I believe these are a very rare cases.
On Wed, Aug 26, 2015 at 07:49:51AM -0700, H.J. Lu wrote:
> On Wed, Aug 26, 2015 at 7:29 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
> > On Wed, Aug 26, 2015 at 06:46:31AM -0700, H.J. Lu wrote:
> >> Add i386 memset and memcpy assembly functions with REP MOVSB/STOSB
> >> instructions. They will be used to implement i386 multi-arch memcpy.
> >>
> >> OK for master?
> >>
> > No, as rep stosb has terrible performance on most of machines, on ivy
> > bridge its around six times slower than rep stosq. I wouldn't be
> >
>
> I added them for i386 memcpy family multiarch functions. We have
> memcpy for i586 and i686:
>
> sysdeps/i386/i586/memcpy.S
> sysdeps/i386/i686/memcpy.S
>
> But we don't have it for i486. I add them so that I can implement
> i386 memset and memcpy family multiarch functions for i486,
> i586 and i686 targets. i386 memset and memcpy are used only
> when
>
> 1. Building glibc for i486 with --disable-multi-arch. Or
> 2. Processor doesn't support i686 nor SSE2.
>
> I believe these are a very rare cases.
>
While true a existing implementation looked better. So if you need use
assembly could you pick these files compiled with gcc -S or something
like that?
On Wed, Aug 26, 2015 at 8:15 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
> On Wed, Aug 26, 2015 at 07:49:51AM -0700, H.J. Lu wrote:
>> On Wed, Aug 26, 2015 at 7:29 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
>> > On Wed, Aug 26, 2015 at 06:46:31AM -0700, H.J. Lu wrote:
>> >> Add i386 memset and memcpy assembly functions with REP MOVSB/STOSB
>> >> instructions. They will be used to implement i386 multi-arch memcpy.
>> >>
>> >> OK for master?
>> >>
>> > No, as rep stosb has terrible performance on most of machines, on ivy
>> > bridge its around six times slower than rep stosq. I wouldn't be
>> >
>>
>> I added them for i386 memcpy family multiarch functions. We have
>> memcpy for i586 and i686:
>>
>> sysdeps/i386/i586/memcpy.S
>> sysdeps/i386/i686/memcpy.S
>>
>> But we don't have it for i486. I add them so that I can implement
>> i386 memset and memcpy family multiarch functions for i486,
>> i586 and i686 targets. i386 memset and memcpy are used only
>> when
>>
>> 1. Building glibc for i486 with --disable-multi-arch. Or
>> 2. Processor doesn't support i686 nor SSE2.
>>
>> I believe these are a very rare cases.
>>
> While true a existing implementation looked better. So if you need use
> assembly could you pick these files compiled with gcc -S or something
> like that?
We don't know if they are better than REP MOVSB/STOSB in cases of
1. Building glibc for i486 with --disable-multi-arch. Or
2. Processor doesn't support i686 nor SSE2.
and on Haswell/Skylake, REP MOVSB/STOSB aren't too bad.
On Wed, Aug 26, 2015 at 08:31:30AM -0700, H.J. Lu wrote:
> On Wed, Aug 26, 2015 at 8:15 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
> > On Wed, Aug 26, 2015 at 07:49:51AM -0700, H.J. Lu wrote:
> >> On Wed, Aug 26, 2015 at 7:29 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
> >> > On Wed, Aug 26, 2015 at 06:46:31AM -0700, H.J. Lu wrote:
> >> >> Add i386 memset and memcpy assembly functions with REP MOVSB/STOSB
> >> >> instructions. They will be used to implement i386 multi-arch memcpy.
> >> >>
> >> >> OK for master?
> >> >>
> >> > No, as rep stosb has terrible performance on most of machines, on ivy
> >> > bridge its around six times slower than rep stosq. I wouldn't be
> >> >
> >>
> >> I added them for i386 memcpy family multiarch functions. We have
> >> memcpy for i586 and i686:
> >>
> >> sysdeps/i386/i586/memcpy.S
> >> sysdeps/i386/i686/memcpy.S
> >>
> >> But we don't have it for i486. I add them so that I can implement
> >> i386 memset and memcpy family multiarch functions for i486,
> >> i586 and i686 targets. i386 memset and memcpy are used only
> >> when
> >>
> >> 1. Building glibc for i486 with --disable-multi-arch. Or
> >> 2. Processor doesn't support i686 nor SSE2.
> >>
> >> I believe these are a very rare cases.
> >>
> > While true a existing implementation looked better. So if you need use
> > assembly could you pick these files compiled with gcc -S or something
> > like that?
>
> We don't know if they are better than REP MOVSB/STOSB in cases of
>
> 1. Building glibc for i486 with --disable-multi-arch. Or
> 2. Processor doesn't support i686 nor SSE2.
>
> and on Haswell/Skylake, REP MOVSB/STOSB aren't too bad.
>
I could accept that if we decide that we dont care about performance in
these cases. As 1. user already doesn't care as we need to use very slow
implementations with --disable-multi-arch.
As for 2 its about if we care about performance of old machines or not.
I would be for not optimizing for machines without sse2 as we don't have
these to test that.
With these arguments a change would be acceptable but of course I would
prefer one that is better on sandy bridge with disable-multiarch.
On Thu, Aug 27, 2015 at 4:46 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
> On Wed, Aug 26, 2015 at 08:31:30AM -0700, H.J. Lu wrote:
>> On Wed, Aug 26, 2015 at 8:15 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
>> > On Wed, Aug 26, 2015 at 07:49:51AM -0700, H.J. Lu wrote:
>> >> On Wed, Aug 26, 2015 at 7:29 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
>> >> > On Wed, Aug 26, 2015 at 06:46:31AM -0700, H.J. Lu wrote:
>> >> >> Add i386 memset and memcpy assembly functions with REP MOVSB/STOSB
>> >> >> instructions. They will be used to implement i386 multi-arch memcpy.
>> >> >>
>> >> >> OK for master?
>> >> >>
>> >> > No, as rep stosb has terrible performance on most of machines, on ivy
>> >> > bridge its around six times slower than rep stosq. I wouldn't be
>> >> >
>> >>
>> >> I added them for i386 memcpy family multiarch functions. We have
>> >> memcpy for i586 and i686:
>> >>
>> >> sysdeps/i386/i586/memcpy.S
>> >> sysdeps/i386/i686/memcpy.S
>> >>
>> >> But we don't have it for i486. I add them so that I can implement
>> >> i386 memset and memcpy family multiarch functions for i486,
>> >> i586 and i686 targets. i386 memset and memcpy are used only
>> >> when
>> >>
>> >> 1. Building glibc for i486 with --disable-multi-arch. Or
>> >> 2. Processor doesn't support i686 nor SSE2.
>> >>
>> >> I believe these are a very rare cases.
>> >>
>> > While true a existing implementation looked better. So if you need use
>> > assembly could you pick these files compiled with gcc -S or something
>> > like that?
>>
>> We don't know if they are better than REP MOVSB/STOSB in cases of
>>
>> 1. Building glibc for i486 with --disable-multi-arch. Or
>> 2. Processor doesn't support i686 nor SSE2.
>>
>> and on Haswell/Skylake, REP MOVSB/STOSB aren't too bad.
>>
> I could accept that if we decide that we dont care about performance in
> these cases. As 1. user already doesn't care as we need to use very slow
> implementations with --disable-multi-arch.
>
> As for 2 its about if we care about performance of old machines or not.
> I would be for not optimizing for machines without sse2 as we don't have
> these to test that.
>
> With these arguments a change would be acceptable but of course I would
> prefer one that is better on sandy bridge with disable-multiarch.
If people want better performance on Sandy Bridge, they shouldn't
configure glibc as i486 with --disable-multi-arch :-(.
On Thu, Aug 27, 2015 at 5:37 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Aug 27, 2015 at 4:46 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
>> On Wed, Aug 26, 2015 at 08:31:30AM -0700, H.J. Lu wrote:
>>> On Wed, Aug 26, 2015 at 8:15 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
>>> > On Wed, Aug 26, 2015 at 07:49:51AM -0700, H.J. Lu wrote:
>>> >> On Wed, Aug 26, 2015 at 7:29 AM, Ondřej Bílka <neleai@seznam.cz> wrote:
>>> >> > On Wed, Aug 26, 2015 at 06:46:31AM -0700, H.J. Lu wrote:
>>> >> >> Add i386 memset and memcpy assembly functions with REP MOVSB/STOSB
>>> >> >> instructions. They will be used to implement i386 multi-arch memcpy.
>>> >> >>
>>> >> >> OK for master?
>>> >> >>
>>> >> > No, as rep stosb has terrible performance on most of machines, on ivy
>>> >> > bridge its around six times slower than rep stosq. I wouldn't be
>>> >> >
>>> >>
>>> >> I added them for i386 memcpy family multiarch functions. We have
>>> >> memcpy for i586 and i686:
>>> >>
>>> >> sysdeps/i386/i586/memcpy.S
>>> >> sysdeps/i386/i686/memcpy.S
>>> >>
>>> >> But we don't have it for i486. I add them so that I can implement
>>> >> i386 memset and memcpy family multiarch functions for i486,
>>> >> i586 and i686 targets. i386 memset and memcpy are used only
>>> >> when
>>> >>
>>> >> 1. Building glibc for i486 with --disable-multi-arch. Or
>>> >> 2. Processor doesn't support i686 nor SSE2.
>>> >>
>>> >> I believe these are a very rare cases.
>>> >>
>>> > While true a existing implementation looked better. So if you need use
>>> > assembly could you pick these files compiled with gcc -S or something
>>> > like that?
>>>
>>> We don't know if they are better than REP MOVSB/STOSB in cases of
>>>
>>> 1. Building glibc for i486 with --disable-multi-arch. Or
>>> 2. Processor doesn't support i686 nor SSE2.
>>>
>>> and on Haswell/Skylake, REP MOVSB/STOSB aren't too bad.
>>>
>> I could accept that if we decide that we dont care about performance in
>> these cases. As 1. user already doesn't care as we need to use very slow
>> implementations with --disable-multi-arch.
>>
>> As for 2 its about if we care about performance of old machines or not.
>> I would be for not optimizing for machines without sse2 as we don't have
>> these to test that.
>>
>> With these arguments a change would be acceptable but of course I would
>> prefer one that is better on sandy bridge with disable-multiarch.
>
> If people want better performance on Sandy Bridge, they shouldn't
> configure glibc as i486 with --disable-multi-arch :-(.
>
> --
> H.J.
I am checking it in now.
new file mode 100644
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define USE_AS_BCOPY
+#define MEMCPY bcopy
+#include "memcpy.S"
new file mode 100644
@@ -0,0 +1,5 @@
+#define USE_AS_BZERO
+#define memset __bzero
+#include "memset.S"
+
+weak_alias (__bzero, bzero)
deleted file mode 100644
@@ -1,82 +0,0 @@
-/* bzero -- set a block of memory to zero. For Intel 80x86, x>=3.
- This file is part of the GNU C Library.
- Copyright (C) 1991-2015 Free Software Foundation, Inc.
- Contributed by Torbjorn Granlund (tege@sics.se).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <string.h>
-#include <memcopy.h>
-
-#undef bzero
-#undef __bzero
-
-#ifdef __GNUC__
-
-void
-__bzero (dstpp, len)
- void *dstpp;
- size_t len;
-{
- /* N.B.: This code is almost verbatim from memset.c. */
- int d0;
- unsigned long int dstp = (unsigned long int) dstpp;
-
- /* This explicit register allocation
- improves code very much indeed. */
- register op_t x asm ("ax");
-
- x = 0;
-
- /* Clear the direction flag, so filling will move forward. */
- asm volatile ("cld");
-
- /* This threshold value is optimal. */
- if (len >= 12)
- {
- /* Adjust LEN for the bytes handled in the first loop. */
- len -= (-dstp) % OPSIZ;
-
- /* There are at least some bytes to set.
- No need to test for LEN == 0 in this alignment loop. */
-
- /* Fill bytes until DSTP is aligned on a longword boundary. */
- asm volatile ("rep\n"
- "stosb" /* %0, %2, %3 */ :
- "=D" (dstp), "=c" (d0) :
- "0" (dstp), "1" ((-dstp) % OPSIZ), "a" (x) :
- "memory");
-
- /* Fill longwords. */
- asm volatile ("rep\n"
- "stosl" /* %0, %2, %3 */ :
- "=D" (dstp), "=c" (d0) :
- "0" (dstp), "1" (len / OPSIZ), "a" (x) :
- "memory");
- len %= OPSIZ;
- }
-
- /* Write the last few bytes. */
- asm volatile ("rep\n"
- "stosb" /* %0, %2, %3 */ :
- "=D" (dstp), "=c" (d0) :
- "0" (dstp), "c" (len), "a" (x) :
- "memory");
-}
-weak_alias (__bzero, bzero)
-
-#else
-#include <string/bzero.c>
-#endif
deleted file mode 100644
@@ -1 +0,0 @@
-#include <sysdeps/i386/i686/memcpy_chk.S>
deleted file mode 100644
@@ -1 +0,0 @@
-#include <sysdeps/i386/i686/mempcpy_chk.S>
deleted file mode 100644
@@ -1 +0,0 @@
-#include <sysdeps/i386/i686/memset_chk.S>
new file mode 100644
@@ -0,0 +1,95 @@
+/* memcpy with REP MOVSB/STOSB
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY memcpy
+# define MEMCPY_CHK __memcpy_chk
+#endif
+
+#ifdef USE_AS_BCOPY
+# define STR2 12
+# define STR1 STR2+4
+# define N STR1+4
+#else
+# define STR1 12
+# define STR2 STR1+4
+# define N STR2+4
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+ .text
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BCOPY
+ENTRY (MEMCPY_CHK)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (MEMCPY_CHK)
+#endif
+ENTRY (MEMCPY)
+ PUSH (%esi)
+ PUSH (%edi)
+ movl N(%esp), %ecx
+ movl STR1(%esp), %edi
+ movl STR2(%esp), %esi
+ mov %edi, %eax
+#ifdef USE_AS_MEMPCPY
+ add %ecx, %eax
+#endif
+
+#ifdef USE_AS_MEMMOVE
+ cmp %esi, %edi
+ ja L(copy_backward)
+ je L(bwd_write_0bytes)
+#endif
+
+ rep movsb
+ POP (%edi)
+ POP (%esi)
+ ret
+
+#ifdef USE_AS_MEMMOVE
+L(copy_backward):
+ lea -1(%edi,%ecx), %edi
+ lea -1(%esi,%ecx), %esi
+ std
+ rep movsb
+ cld
+L(bwd_write_0bytes):
+ POP (%edi)
+ POP (%esi)
+ ret
+#endif
+
+END (MEMCPY)
+
+#ifndef USE_AS_BCOPY
+libc_hidden_builtin_def (MEMCPY)
+#endif
similarity index 92%
rename from sysdeps/i386/i686/memcpy_chk.S
rename to sysdeps/i386/memcpy_chk.S
@@ -1,4 +1,4 @@
-/* Checking memcpy for i686.
+/* Checking memcpy for i386.
Copyright (C) 2004-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -16,10 +16,10 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include "asm-syntax.h"
+#ifndef SHARED
+# include <sysdep.h>
+# include "asm-syntax.h"
-#ifndef PIC
/* For libc.so this is defined in memcpy.S.
For libc.a, this is a separate source to avoid
memcpy bringing in __chk_fail and all routines
new file mode 100644
@@ -0,0 +1,4 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY memmove
+#define MEMCPY_CHK __memmove_chk
+#include "memcpy.S"
similarity index 78%
rename from sysdeps/i386/i686/memmove_chk.S
rename to sysdeps/i386/memmove_chk.S
@@ -1,4 +1,4 @@
-/* Checking memmove for x86-64.
+/* Checking memmove for i386
Copyright (C) 2004-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -16,14 +16,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include "asm-syntax.h"
+#ifndef SHARED
+# include <sysdep.h>
+# include "asm-syntax.h"
-#ifndef PIC
- /* For libc.so this is defined in memmove.S.
- For libc.a, this is a separate source to avoid
- memmove bringing in __chk_fail and all routines
- it calls. */
+/* For libc.so this is defined in memmove.S. For libc.a, this is a
+ separate source to avoid memmove bringing in __chk_fail and all
+ routines it calls. */
.text
ENTRY (__memmove_chk)
movl 12(%esp), %eax
new file mode 100644
@@ -0,0 +1,7 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY __mempcpy
+#define MEMCPY_CHK __mempcpy_chk
+#include "memcpy.S"
+
+weak_alias (__mempcpy, mempcpy)
+libc_hidden_builtin_def (mempcpy)
similarity index 78%
rename from sysdeps/i386/i686/mempcpy_chk.S
rename to sysdeps/i386/mempcpy_chk.S
@@ -1,4 +1,4 @@
-/* Checking mempcpy for x86-64.
+/* Checking mempcpy for i386
Copyright (C) 2004-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -16,14 +16,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include "asm-syntax.h"
+#ifndef SHARED
+# include <sysdep.h>
+# include "asm-syntax.h"
-#ifndef PIC
- /* For libc.so this is defined in mempcpy.S.
- For libc.a, this is a separate source to avoid
- mempcpy bringing in __chk_fail and all routines
- it calls. */
+/* For libc.so this is defined in mempcpy.S. For libc.a, this is a
+ separate source to avoid mempcpy bringing in __chk_fail and all
+ routines it calls. */
.text
ENTRY (__mempcpy_chk)
movl 12(%esp), %eax
new file mode 100644
@@ -0,0 +1,68 @@
+/* memset with REP MOVSB/STOSB
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#define STR1 8
+#ifdef USE_AS_BZERO
+#define N STR1+4
+#else
+#define STR2 STR1+4
+#define N STR2+4
+#endif
+
+ .text
+#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
+ENTRY (__memset_chk)
+ movl 12(%esp), %eax
+ cmpl %eax, 16(%esp)
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memset_chk)
+#endif
+ENTRY (memset)
+ PUSH (%edi)
+ movl N(%esp), %ecx
+ movl STR1(%esp), %edi
+#ifdef USE_AS_BZERO
+ xor %eax, %eax
+#else
+ movzbl STR2(%esp), %eax
+ mov %edi, %edx
+#endif
+ rep stosb
+#ifndef USE_AS_BZERO
+ mov %edx, %eax
+#endif
+ POP (%edi)
+ ret
+END (memset)
+
+#ifndef USE_AS_BZERO
+libc_hidden_builtin_def (memset)
+#endif
deleted file mode 100644
@@ -1,85 +0,0 @@
-/* Set a block of memory to some byte value.
- For Intel 80x86, x>=3.
- Copyright (C) 1991-2015 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Contributed by Torbjorn Granlund (tege@sics.se).
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <string.h>
-#include <memcopy.h>
-
-#ifdef __GNUC__
-
-#undef memset
-
-void *
-memset (void *dstpp, int c, size_t len)
-{
- int d0;
- unsigned long int dstp = (unsigned long int) dstpp;
-
- /* This explicit register allocation
- improves code very much indeed. */
- register op_t x asm("ax");
-
- x = (unsigned char) c;
-
- /* Clear the direction flag, so filling will move forward. */
- asm volatile("cld");
-
- /* This threshold value is optimal. */
- if (len >= 12)
- {
- /* Fill X with four copies of the char we want to fill with. */
- x |= (x << 8);
- x |= (x << 16);
-
- /* Adjust LEN for the bytes handled in the first loop. */
- len -= (-dstp) % OPSIZ;
-
- /* There are at least some bytes to set.
- No need to test for LEN == 0 in this alignment loop. */
-
- /* Fill bytes until DSTP is aligned on a longword boundary. */
- asm volatile("rep\n"
- "stosb" /* %0, %2, %3 */ :
- "=D" (dstp), "=c" (d0) :
- "0" (dstp), "1" ((-dstp) % OPSIZ), "a" (x) :
- "memory");
-
- /* Fill longwords. */
- asm volatile("rep\n"
- "stosl" /* %0, %2, %3 */ :
- "=D" (dstp), "=c" (d0) :
- "0" (dstp), "1" (len / OPSIZ), "a" (x) :
- "memory");
- len %= OPSIZ;
- }
-
- /* Write the last few bytes. */
- asm volatile("rep\n"
- "stosb" /* %0, %2, %3 */ :
- "=D" (dstp), "=c" (d0) :
- "0" (dstp), "1" (len), "a" (x) :
- "memory");
-
- return dstpp;
-}
-libc_hidden_builtin_def (memset)
-
-#else
-#include <string/memset.c>
-#endif
similarity index 79%
rename from sysdeps/i386/i686/memset_chk.S
rename to sysdeps/i386/memset_chk.S
@@ -1,4 +1,4 @@
-/* Checking memset for i686.
+/* Checking memset for i386.
Copyright (C) 2004-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -16,14 +16,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include "asm-syntax.h"
-
#ifndef SHARED
- /* For libc.so this is defined in memset.S.
- For libc.a, this is a separate source to avoid
- memset bringing in __chk_fail and all routines
- it calls. */
+# include <sysdep.h>
+# include "asm-syntax.h"
+
+/* For libc.so this is defined in memset.S. For libc.a, this is a
+ separate source to avoid memset bringing in __chk_fail and all
+ routines it calls. */
.text
ENTRY (__memset_chk)
movl 12(%esp), %eax