[v2,1/2] x86: Move mem{p}{mov|cpy}_{chk_}erms to its own file
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
Commit Message
The primary memmove_{impl}_unaligned_erms implementations don't
interact with this function. Putting them in same file both
wastes space and unnecessarily bloats a hot code section.
---
sysdeps/x86_64/multiarch/Makefile | 1 +
sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++
.../multiarch/memmove-vec-unaligned-erms.S | 50 -----------------
3 files changed, 54 insertions(+), 50 deletions(-)
create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S
Comments
On Wed, Jun 29, 2022 at 3:13 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
>
> The primary memmove_{impl}_unaligned_erms implementations don't
> interact with this function. Putting them in same file both
> wastes space and unnecessarily bloats a hot code section.
> ---
> sysdeps/x86_64/multiarch/Makefile | 1 +
> sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++
> .../multiarch/memmove-vec-unaligned-erms.S | 50 -----------------
> 3 files changed, 54 insertions(+), 50 deletions(-)
> create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S
>
> diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> index 666ee4d5d6..62a4d96fb8 100644
> --- a/sysdeps/x86_64/multiarch/Makefile
> +++ b/sysdeps/x86_64/multiarch/Makefile
> @@ -18,6 +18,7 @@ sysdep_routines += \
> memmove-avx-unaligned-erms-rtm \
> memmove-avx512-no-vzeroupper \
> memmove-avx512-unaligned-erms \
> + memmove-erms \
> memmove-evex-unaligned-erms \
> memmove-sse2-unaligned-erms \
> memmove-ssse3 \
> diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S
> new file mode 100644
> index 0000000000..d98d21644b
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/memmove-erms.S
> @@ -0,0 +1,53 @@
Need copyright notice.
> +#include <sysdep.h>
> +
> +#if defined USE_MULTIARCH && IS_IN (libc)
> + .text
> +ENTRY (__mempcpy_chk_erms)
> + cmp %RDX_LP, %RCX_LP
> + jb HIDDEN_JUMPTARGET (__chk_fail)
> +END (__mempcpy_chk_erms)
> +
> +/* Only used to measure performance of REP MOVSB. */
> +ENTRY (__mempcpy_erms)
> + mov %RDI_LP, %RAX_LP
> + /* Skip zero length. */
> + test %RDX_LP, %RDX_LP
> + jz 2f
> + add %RDX_LP, %RAX_LP
> + jmp L(start_movsb)
> +END (__mempcpy_erms)
> +
> +ENTRY (__memmove_chk_erms)
> + cmp %RDX_LP, %RCX_LP
> + jb HIDDEN_JUMPTARGET (__chk_fail)
> +END (__memmove_chk_erms)
> +
> +ENTRY (__memmove_erms)
> + movq %rdi, %rax
> + /* Skip zero length. */
> + test %RDX_LP, %RDX_LP
> + jz 2f
> +L(start_movsb):
> + mov %RDX_LP, %RCX_LP
> + cmp %RSI_LP, %RDI_LP
> + jb 1f
> + /* Source == destination is less common. */
> + je 2f
> + lea (%rsi,%rcx), %RDX_LP
> + cmp %RDX_LP, %RDI_LP
> + jb L(movsb_backward)
> +1:
> + rep movsb
> +2:
> + ret
> +L(movsb_backward):
> + leaq -1(%rdi,%rcx), %rdi
> + leaq -1(%rsi,%rcx), %rsi
> + std
> + rep movsb
> + cld
> + ret
> +END (__memmove_erms)
> +strong_alias (__memmove_erms, __memcpy_erms)
> +strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
> +#endif
> diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> index d1518b8bab..04747133b7 100644
> --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> @@ -239,56 +239,6 @@ L(start):
> #endif
> #if defined USE_MULTIARCH && IS_IN (libc)
> END (MEMMOVE_SYMBOL (__memmove, unaligned))
> -# if VEC_SIZE == 16
> -ENTRY (__mempcpy_chk_erms)
> - cmp %RDX_LP, %RCX_LP
> - jb HIDDEN_JUMPTARGET (__chk_fail)
> -END (__mempcpy_chk_erms)
> -
> -/* Only used to measure performance of REP MOVSB. */
> -ENTRY (__mempcpy_erms)
> - mov %RDI_LP, %RAX_LP
> - /* Skip zero length. */
> - test %RDX_LP, %RDX_LP
> - jz 2f
> - add %RDX_LP, %RAX_LP
> - jmp L(start_movsb)
> -END (__mempcpy_erms)
> -
> -ENTRY (__memmove_chk_erms)
> - cmp %RDX_LP, %RCX_LP
> - jb HIDDEN_JUMPTARGET (__chk_fail)
> -END (__memmove_chk_erms)
> -
> -ENTRY (__memmove_erms)
> - movq %rdi, %rax
> - /* Skip zero length. */
> - test %RDX_LP, %RDX_LP
> - jz 2f
> -L(start_movsb):
> - mov %RDX_LP, %RCX_LP
> - cmp %RSI_LP, %RDI_LP
> - jb 1f
> - /* Source == destination is less common. */
> - je 2f
> - lea (%rsi,%rcx), %RDX_LP
> - cmp %RDX_LP, %RDI_LP
> - jb L(movsb_backward)
> -1:
> - rep movsb
> -2:
> - ret
> -L(movsb_backward):
> - leaq -1(%rdi,%rcx), %rdi
> - leaq -1(%rsi,%rcx), %rsi
> - std
> - rep movsb
> - cld
> - ret
> -END (__memmove_erms)
> -strong_alias (__memmove_erms, __memcpy_erms)
> -strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
> -# endif
>
> # ifdef SHARED
> ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
> --
> 2.34.1
>
On Wed, Jun 29, 2022 at 3:20 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Jun 29, 2022 at 3:13 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> >
> > The primary memmove_{impl}_unaligned_erms implementations don't
> > interact with this function. Putting them in same file both
> > wastes space and unnecessarily bloats a hot code section.
> > ---
> > sysdeps/x86_64/multiarch/Makefile | 1 +
> > sysdeps/x86_64/multiarch/memmove-erms.S | 53 +++++++++++++++++++
> > .../multiarch/memmove-vec-unaligned-erms.S | 50 -----------------
> > 3 files changed, 54 insertions(+), 50 deletions(-)
> > create mode 100644 sysdeps/x86_64/multiarch/memmove-erms.S
> >
> > diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
> > index 666ee4d5d6..62a4d96fb8 100644
> > --- a/sysdeps/x86_64/multiarch/Makefile
> > +++ b/sysdeps/x86_64/multiarch/Makefile
> > @@ -18,6 +18,7 @@ sysdep_routines += \
> > memmove-avx-unaligned-erms-rtm \
> > memmove-avx512-no-vzeroupper \
> > memmove-avx512-unaligned-erms \
> > + memmove-erms \
> > memmove-evex-unaligned-erms \
> > memmove-sse2-unaligned-erms \
> > memmove-ssse3 \
> > diff --git a/sysdeps/x86_64/multiarch/memmove-erms.S b/sysdeps/x86_64/multiarch/memmove-erms.S
> > new file mode 100644
> > index 0000000000..d98d21644b
> > --- /dev/null
> > +++ b/sysdeps/x86_64/multiarch/memmove-erms.S
> > @@ -0,0 +1,53 @@
>
> Need copyright notice.
Fixed in V3.
>
> > +#include <sysdep.h>
> > +
> > +#if defined USE_MULTIARCH && IS_IN (libc)
> > + .text
> > +ENTRY (__mempcpy_chk_erms)
> > + cmp %RDX_LP, %RCX_LP
> > + jb HIDDEN_JUMPTARGET (__chk_fail)
> > +END (__mempcpy_chk_erms)
> > +
> > +/* Only used to measure performance of REP MOVSB. */
> > +ENTRY (__mempcpy_erms)
> > + mov %RDI_LP, %RAX_LP
> > + /* Skip zero length. */
> > + test %RDX_LP, %RDX_LP
> > + jz 2f
> > + add %RDX_LP, %RAX_LP
> > + jmp L(start_movsb)
> > +END (__mempcpy_erms)
> > +
> > +ENTRY (__memmove_chk_erms)
> > + cmp %RDX_LP, %RCX_LP
> > + jb HIDDEN_JUMPTARGET (__chk_fail)
> > +END (__memmove_chk_erms)
> > +
> > +ENTRY (__memmove_erms)
> > + movq %rdi, %rax
> > + /* Skip zero length. */
> > + test %RDX_LP, %RDX_LP
> > + jz 2f
> > +L(start_movsb):
> > + mov %RDX_LP, %RCX_LP
> > + cmp %RSI_LP, %RDI_LP
> > + jb 1f
> > + /* Source == destination is less common. */
> > + je 2f
> > + lea (%rsi,%rcx), %RDX_LP
> > + cmp %RDX_LP, %RDI_LP
> > + jb L(movsb_backward)
> > +1:
> > + rep movsb
> > +2:
> > + ret
> > +L(movsb_backward):
> > + leaq -1(%rdi,%rcx), %rdi
> > + leaq -1(%rsi,%rcx), %rsi
> > + std
> > + rep movsb
> > + cld
> > + ret
> > +END (__memmove_erms)
> > +strong_alias (__memmove_erms, __memcpy_erms)
> > +strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
> > +#endif
> > diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> > index d1518b8bab..04747133b7 100644
> > --- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> > +++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
> > @@ -239,56 +239,6 @@ L(start):
> > #endif
> > #if defined USE_MULTIARCH && IS_IN (libc)
> > END (MEMMOVE_SYMBOL (__memmove, unaligned))
> > -# if VEC_SIZE == 16
> > -ENTRY (__mempcpy_chk_erms)
> > - cmp %RDX_LP, %RCX_LP
> > - jb HIDDEN_JUMPTARGET (__chk_fail)
> > -END (__mempcpy_chk_erms)
> > -
> > -/* Only used to measure performance of REP MOVSB. */
> > -ENTRY (__mempcpy_erms)
> > - mov %RDI_LP, %RAX_LP
> > - /* Skip zero length. */
> > - test %RDX_LP, %RDX_LP
> > - jz 2f
> > - add %RDX_LP, %RAX_LP
> > - jmp L(start_movsb)
> > -END (__mempcpy_erms)
> > -
> > -ENTRY (__memmove_chk_erms)
> > - cmp %RDX_LP, %RCX_LP
> > - jb HIDDEN_JUMPTARGET (__chk_fail)
> > -END (__memmove_chk_erms)
> > -
> > -ENTRY (__memmove_erms)
> > - movq %rdi, %rax
> > - /* Skip zero length. */
> > - test %RDX_LP, %RDX_LP
> > - jz 2f
> > -L(start_movsb):
> > - mov %RDX_LP, %RCX_LP
> > - cmp %RSI_LP, %RDI_LP
> > - jb 1f
> > - /* Source == destination is less common. */
> > - je 2f
> > - lea (%rsi,%rcx), %RDX_LP
> > - cmp %RDX_LP, %RDI_LP
> > - jb L(movsb_backward)
> > -1:
> > - rep movsb
> > -2:
> > - ret
> > -L(movsb_backward):
> > - leaq -1(%rdi,%rcx), %rdi
> > - leaq -1(%rsi,%rcx), %rsi
> > - std
> > - rep movsb
> > - cld
> > - ret
> > -END (__memmove_erms)
> > -strong_alias (__memmove_erms, __memcpy_erms)
> > -strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
> > -# endif
> >
> > # ifdef SHARED
> > ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
> > --
> > 2.34.1
> >
>
>
> --
> H.J.
@@ -18,6 +18,7 @@ sysdep_routines += \
memmove-avx-unaligned-erms-rtm \
memmove-avx512-no-vzeroupper \
memmove-avx512-unaligned-erms \
+ memmove-erms \
memmove-evex-unaligned-erms \
memmove-sse2-unaligned-erms \
memmove-ssse3 \
new file mode 100644
@@ -0,0 +1,53 @@
+#include <sysdep.h>
+
+#if defined USE_MULTIARCH && IS_IN (libc)
+ .text
+ENTRY (__mempcpy_chk_erms)
+ cmp %RDX_LP, %RCX_LP
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__mempcpy_chk_erms)
+
+/* Only used to measure performance of REP MOVSB. */
+ENTRY (__mempcpy_erms)
+ mov %RDI_LP, %RAX_LP
+ /* Skip zero length. */
+ test %RDX_LP, %RDX_LP
+ jz 2f
+ add %RDX_LP, %RAX_LP
+ jmp L(start_movsb)
+END (__mempcpy_erms)
+
+ENTRY (__memmove_chk_erms)
+ cmp %RDX_LP, %RCX_LP
+ jb HIDDEN_JUMPTARGET (__chk_fail)
+END (__memmove_chk_erms)
+
+ENTRY (__memmove_erms)
+ movq %rdi, %rax
+ /* Skip zero length. */
+ test %RDX_LP, %RDX_LP
+ jz 2f
+L(start_movsb):
+ mov %RDX_LP, %RCX_LP
+ cmp %RSI_LP, %RDI_LP
+ jb 1f
+ /* Source == destination is less common. */
+ je 2f
+ lea (%rsi,%rcx), %RDX_LP
+ cmp %RDX_LP, %RDI_LP
+ jb L(movsb_backward)
+1:
+ rep movsb
+2:
+ ret
+L(movsb_backward):
+ leaq -1(%rdi,%rcx), %rdi
+ leaq -1(%rsi,%rcx), %rsi
+ std
+ rep movsb
+ cld
+ ret
+END (__memmove_erms)
+strong_alias (__memmove_erms, __memcpy_erms)
+strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
+#endif
@@ -239,56 +239,6 @@ L(start):
#endif
#if defined USE_MULTIARCH && IS_IN (libc)
END (MEMMOVE_SYMBOL (__memmove, unaligned))
-# if VEC_SIZE == 16
-ENTRY (__mempcpy_chk_erms)
- cmp %RDX_LP, %RCX_LP
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__mempcpy_chk_erms)
-
-/* Only used to measure performance of REP MOVSB. */
-ENTRY (__mempcpy_erms)
- mov %RDI_LP, %RAX_LP
- /* Skip zero length. */
- test %RDX_LP, %RDX_LP
- jz 2f
- add %RDX_LP, %RAX_LP
- jmp L(start_movsb)
-END (__mempcpy_erms)
-
-ENTRY (__memmove_chk_erms)
- cmp %RDX_LP, %RCX_LP
- jb HIDDEN_JUMPTARGET (__chk_fail)
-END (__memmove_chk_erms)
-
-ENTRY (__memmove_erms)
- movq %rdi, %rax
- /* Skip zero length. */
- test %RDX_LP, %RDX_LP
- jz 2f
-L(start_movsb):
- mov %RDX_LP, %RCX_LP
- cmp %RSI_LP, %RDI_LP
- jb 1f
- /* Source == destination is less common. */
- je 2f
- lea (%rsi,%rcx), %RDX_LP
- cmp %RDX_LP, %RDI_LP
- jb L(movsb_backward)
-1:
- rep movsb
-2:
- ret
-L(movsb_backward):
- leaq -1(%rdi,%rcx), %rdi
- leaq -1(%rsi,%rcx), %rsi
- std
- rep movsb
- cld
- ret
-END (__memmove_erms)
-strong_alias (__memmove_erms, __memcpy_erms)
-strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
-# endif
# ifdef SHARED
ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))