[06/20] libcpu-rt-c/x86-64: Add memcpy, memmove and mempcpy

Message ID 20180612221939.19545-7-hjl.tools@gmail.com
State New, archived
Headers

Commit Message

H.J. Lu June 12, 2018, 10:19 p.m. UTC
  * sysdeps/x86_64/Makefile (cpu-rt-c-routines): Add memcpy,
	memmove and mempcpy.
	* sysdeps/x86_64/memmove.S: Support libcpu-rt-c.
	* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: Likewise.
	* sysdeps/x86_64/multiarch/memcpy-ssse3.S: Likewise.
	* sysdeps/x86_64/multiarch/memcpy.c: Likewise.
	* sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S:
	Likewise.
	* sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S:
	Likewise.
	* sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S:
	Likewise.
	* sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S:
	Likewise.
	* sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: Likewise.
	* sysdeps/x86_64/multiarch/memmove.c: Likewise.
	* sysdeps/x86_64/multiarch/mempcpy.c: Likewise.
	* sysdeps/x86_64/multiarch/Makefile
	(libcpu-rt-c-sysdep_routines): Add memcpy-ssse3, memcpy-ssse3-back
	memmove-sse2-unaligned-erms, memmove-ssse3, memmove-ssse3-back,
	memmove-avx-unaligned-erms, memmove-avx512-unaligned-erms and
	memmove-avx512-no-vzeroupper.
---
 sysdeps/x86_64/Makefile                       |  2 +-
 sysdeps/x86_64/memmove.S                      | 12 +++++--
 sysdeps/x86_64/multiarch/Makefile             |  8 ++++-
 sysdeps/x86_64/multiarch/memcpy-ssse3-back.S  |  6 ++--
 sysdeps/x86_64/multiarch/memcpy-ssse3.S       |  6 ++--
 sysdeps/x86_64/multiarch/memcpy.c             | 14 +++++---
 .../multiarch/memmove-avx-unaligned-erms.S    |  2 +-
 .../multiarch/memmove-avx512-no-vzeroupper.S  |  8 +++--
 .../multiarch/memmove-avx512-unaligned-erms.S |  2 +-
 .../multiarch/memmove-sse2-unaligned-erms.S   |  2 +-
 .../multiarch/memmove-vec-unaligned-erms.S    | 33 ++++++++++++-------
 sysdeps/x86_64/multiarch/memmove.c            | 10 ++++--
 sysdeps/x86_64/multiarch/mempcpy.c            | 10 ++++--
 13 files changed, 82 insertions(+), 33 deletions(-)
  

Patch

diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 1eb13d01da..978cff6cba 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -147,7 +147,7 @@  endif
 endif
 
 ifeq ($(subdir),cpu-rt-c)
-cpu-rt-c-routines += memchr memcmp
+cpu-rt-c-routines += memchr memcmp memcpy memmove mempcpy
 
 # For the CPU run-time tests.
 vpath %.c $(..)string
diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S
index 9cc92ff9a9..ec2b624966 100644
--- a/sysdeps/x86_64/memmove.S
+++ b/sysdeps/x86_64/memmove.S
@@ -29,7 +29,7 @@ 
 #define SECTION(p)		p
 
 #ifdef USE_MULTIARCH
-# if !IS_IN (libc)
+# if !(IS_IN (libc) || IS_IN (libcpu_rt_c))
 #  define MEMCPY_SYMBOL(p,s)		memcpy
 # endif
 #else
@@ -39,8 +39,12 @@ 
 #  define MEMCPY_SYMBOL(p,s)		memcpy
 # endif
 #endif
-#if !defined USE_MULTIARCH || !IS_IN (libc)
-# define MEMPCPY_SYMBOL(p,s)		__mempcpy
+#if !defined USE_MULTIARCH || !(IS_IN (libc) || IS_IN (libcpu_rt_c))
+# if IS_IN (libcpu_rt_c)
+#  define MEMPCPY_SYMBOL(p,s)		mempcpy
+# else
+#  define MEMPCPY_SYMBOL(p,s)		__mempcpy
+# endif
 #endif
 #ifndef MEMMOVE_SYMBOL
 # define MEMMOVE_CHK_SYMBOL(p,s)	p
@@ -55,9 +59,11 @@  libc_hidden_builtin_def (memmove)
 strong_alias (memmove, __memcpy)
 libc_hidden_ver (memmove, memcpy)
 # endif
+# if !IS_IN (libcpu_rt_c)
 libc_hidden_def (__mempcpy)
 weak_alias (__mempcpy, mempcpy)
 libc_hidden_builtin_def (mempcpy)
+# endif
 
 # if defined SHARED && IS_IN (libc)
 #  undef memcpy
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 9bb6c8c3cd..8e86e44e33 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -69,5 +69,11 @@  endif
 ifeq ($(subdir),cpu-rt-c)
 libcpu-rt-c-sysdep_routines += memchr-sse2 memchr-avx2 \
 			       memcmp-sse2 memcmp-ssse3 memcmp-sse4 \
-			       memcmp-avx2-movbe
+			       memcmp-avx2-movbe memcpy-ssse3 \
+			       memcpy-ssse3-back \
+			       memmove-sse2-unaligned-erms \
+			       memmove-ssse3 memmove-ssse3-back \
+			       memmove-avx-unaligned-erms \
+			       memmove-avx512-unaligned-erms \
+			       memmove-avx512-no-vzeroupper
 endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index 7e37035487..9bcd5957cf 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -19,7 +19,7 @@ 
 
 #include <sysdep.h>
 
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 
 #include "asm-syntax.h"
 
@@ -44,10 +44,12 @@ 
 
 	.section .text.ssse3,"ax",@progbits
 #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+# if IS_IN (libc)
 ENTRY (MEMPCPY_CHK)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (MEMPCPY_CHK)
+# endif
 
 ENTRY (MEMPCPY)
 	movq	%rdi, %rax
@@ -56,7 +58,7 @@  ENTRY (MEMPCPY)
 END (MEMPCPY)
 #endif
 
-#if !defined USE_AS_BCOPY
+#if !defined USE_AS_BCOPY && IS_IN (libc)
 ENTRY (MEMCPY_CHK)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3.S b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
index 5dd209034b..988ce0fc83 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3.S
@@ -19,7 +19,7 @@ 
 
 #include <sysdep.h>
 
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 
 #include "asm-syntax.h"
 
@@ -44,10 +44,12 @@ 
 
 	.section .text.ssse3,"ax",@progbits
 #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
+# if IS_IN (libc)
 ENTRY (MEMPCPY_CHK)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (MEMPCPY_CHK)
+# endif
 
 ENTRY (MEMPCPY)
 	movq	%rdi, %rax
@@ -56,7 +58,7 @@  ENTRY (MEMPCPY)
 END (MEMPCPY)
 #endif
 
-#if !defined USE_AS_BCOPY
+#if !defined USE_AS_BCOPY && IS_IN (libc)
 ENTRY (MEMCPY_CHK)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
diff --git a/sysdeps/x86_64/multiarch/memcpy.c b/sysdeps/x86_64/multiarch/memcpy.c
index 419f76aefc..ee3bb2706c 100644
--- a/sysdeps/x86_64/multiarch/memcpy.c
+++ b/sysdeps/x86_64/multiarch/memcpy.c
@@ -18,7 +18,7 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 /* Define multiple versions only for the definition in libc.  */
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 # define memcpy __redirect_memcpy
 # include <string.h>
 # undef memcpy
@@ -26,14 +26,20 @@ 
 # define SYMBOL_NAME memcpy
 # include "ifunc-memmove.h"
 
+# if IS_IN (libcpu_rt_c)
+#  define __new_memcpy memcpy
+# endif
+
 libc_ifunc_redirected (__redirect_memcpy, __new_memcpy,
 		       IFUNC_SELECTOR ());
 
-# ifdef SHARED
+# if !IS_IN (libcpu_rt_c)
+#  ifdef SHARED
 __hidden_ver1 (__new_memcpy, __GI_memcpy, __redirect_memcpy)
   __attribute__ ((visibility ("hidden")));
-# endif
+#  endif
 
-# include <shlib-compat.h>
+#  include <shlib-compat.h>
 versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
+# endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
index e195e93f15..e996ace136 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms.S
@@ -1,4 +1,4 @@ 
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 # define VEC_SIZE	32
 # define VEC(i)		ymm##i
 # define VMOVNT		vmovntdq
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
index effc3ac2de..6cd1accfc5 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-no-vzeroupper.S
@@ -18,15 +18,18 @@ 
 
 #include <sysdep.h>
 
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 
 # include "asm-syntax.h"
 
 	.section .text.avx512,"ax",@progbits
+# if IS_IN (libc)
 ENTRY (__mempcpy_chk_avx512_no_vzeroupper)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (__mempcpy_chk_avx512_no_vzeroupper)
+strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper)
+# endif
 
 ENTRY (__mempcpy_avx512_no_vzeroupper)
 	movq	%rdi, %rax
@@ -34,10 +37,12 @@  ENTRY (__mempcpy_avx512_no_vzeroupper)
 	jmp	L(start)
 END (__mempcpy_avx512_no_vzeroupper)
 
+# if IS_IN (libc)
 ENTRY (__memmove_chk_avx512_no_vzeroupper)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (__memmove_chk_avx512_no_vzeroupper)
+# endif
 
 ENTRY (__memmove_avx512_no_vzeroupper)
 	mov	%rdi, %rax
@@ -411,5 +416,4 @@  L(gobble_256bytes_nt_loop_bkw):
 END (__memmove_avx512_no_vzeroupper)
 
 strong_alias (__memmove_avx512_no_vzeroupper, __memcpy_avx512_no_vzeroupper)
-strong_alias (__memmove_chk_avx512_no_vzeroupper, __memcpy_chk_avx512_no_vzeroupper)
 #endif
diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
index aac1515cf6..95381d458e 100644
--- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
@@ -1,4 +1,4 @@ 
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 # define VEC_SIZE	64
 # define VEC(i)		zmm##i
 # define VMOVNT		vmovntdq
diff --git a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
index 7c6163ddcb..eae941e58e 100644
--- a/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
@@ -16,7 +16,7 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 # define MEMMOVE_SYMBOL(p,s)	p##_sse2_##s
 #else
 weak_alias (__mempcpy, mempcpy)
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
index e2ede45e9f..ada93ea6ad 100644
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
@@ -131,7 +131,7 @@  L(start):
 	jb	L(less_vec)
 	cmpq	$(VEC_SIZE * 2), %rdx
 	ja	L(more_2x_vec)
-#if !defined USE_MULTIARCH || !IS_IN (libc)
+#if !defined USE_MULTIARCH || !(IS_IN (libc) || IS_IN (libcpu_rt_c))
 L(last_2x_vec):
 #endif
 	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
@@ -140,18 +140,20 @@  L(last_2x_vec):
 	VMOVU	%VEC(0), (%rdi)
 	VMOVU	%VEC(1), -VEC_SIZE(%rdi,%rdx)
 	VZEROUPPER
-#if !defined USE_MULTIARCH || !IS_IN (libc)
+#if !defined USE_MULTIARCH || !(IS_IN (libc) || IS_IN (libcpu_rt_c))
 L(nop):
 #endif
 	ret
-#if defined USE_MULTIARCH && IS_IN (libc)
+#if defined USE_MULTIARCH && (IS_IN (libc) || IS_IN (libcpu_rt_c))
 END (MEMMOVE_SYMBOL (__memmove, unaligned))
 
 # if VEC_SIZE == 16
+#  if IS_IN (libc)
 ENTRY (__mempcpy_chk_erms)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (__mempcpy_chk_erms)
+#  endif
 
 /* Only used to measure performance of REP MOVSB.  */
 ENTRY (__mempcpy_erms)
@@ -163,10 +165,12 @@  ENTRY (__mempcpy_erms)
 	jmp	L(start_movsb)
 END (__mempcpy_erms)
 
+#  if IS_IN (libc)
 ENTRY (__memmove_chk_erms)
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (__memmove_chk_erms)
+#  endif
 
 ENTRY (__memmove_erms)
 	movq	%rdi, %rax
@@ -195,10 +199,12 @@  L(movsb_backward):
 	ret
 END (__memmove_erms)
 strong_alias (__memmove_erms, __memcpy_erms)
+#  if IS_IN (libc)
 strong_alias (__memmove_chk_erms, __memcpy_chk_erms)
+#  endif
 # endif
 
-# ifdef SHARED
+# if defined SHARED && IS_IN (libc)
 ENTRY (MEMMOVE_CHK_SYMBOL (__mempcpy_chk, unaligned_erms))
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
@@ -212,10 +218,12 @@  ENTRY (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
 END (MEMMOVE_SYMBOL (__mempcpy, unaligned_erms))
 
 # ifdef SHARED
+#  if IS_IN (libc)
 ENTRY (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
 	cmpq	%rdx, %rcx
 	jb	HIDDEN_JUMPTARGET (__chk_fail)
 END (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned_erms))
+#  endif
 # endif
 
 ENTRY (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
@@ -321,7 +329,7 @@  L(between_2_3):
 	movw	%si, (%rdi)
 	ret
 
-#if defined USE_MULTIARCH && IS_IN (libc)
+#if defined USE_MULTIARCH && (IS_IN (libc) || IS_IN (libcpu_rt_c))
 L(movsb_more_2x_vec):
 	cmpq	$REP_MOVSB_THRESHOLD, %rdx
 	ja	L(movsb)
@@ -392,7 +400,8 @@  L(more_8x_vec):
 	subq	%r8, %rdi
 	/* Adjust length.  */
 	addq	%r8, %rdx
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+#if ((defined USE_MULTIARCH || VEC_SIZE == 16) \
+     && (IS_IN (libc) || IS_IN (libcpu_rt_c)))
 	/* Check non-temporal store threshold.  */
 	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
 	ja	L(large_forward)
@@ -444,7 +453,8 @@  L(more_8x_vec_backward):
 	subq	%r8, %r9
 	/* Adjust length.  */
 	subq	%r8, %rdx
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+#if ((defined USE_MULTIARCH || VEC_SIZE == 16) \
+     && (IS_IN (libc) || IS_IN (libcpu_rt_c)))
 	/* Check non-temporal store threshold.  */
 	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
 	ja	L(large_backward)
@@ -474,7 +484,8 @@  L(loop_4x_vec_backward):
 	VZEROUPPER
 	ret
 
-#if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+#if ((defined USE_MULTIARCH || VEC_SIZE == 16) \
+     && (IS_IN (libc) || IS_IN (libcpu_rt_c)))
 L(large_forward):
 	/* Don't use non-temporal store if there is overlap between
 	   destination and source since destination may be in cache
@@ -547,16 +558,16 @@  L(loop_large_backward):
 #endif
 END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
 
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 # ifdef USE_MULTIARCH
 strong_alias (MEMMOVE_SYMBOL (__memmove, unaligned_erms),
 	      MEMMOVE_SYMBOL (__memcpy, unaligned_erms))
-#  ifdef SHARED
+#  if defined SHARED && IS_IN (libc)
 strong_alias (MEMMOVE_SYMBOL (__memmove_chk, unaligned_erms),
 	      MEMMOVE_SYMBOL (__memcpy_chk, unaligned_erms))
 #  endif
 # endif
-# ifdef SHARED
+# if defined SHARED && IS_IN (libc)
 strong_alias (MEMMOVE_CHK_SYMBOL (__memmove_chk, unaligned),
 	      MEMMOVE_CHK_SYMBOL (__memcpy_chk, unaligned))
 # endif
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index d512228eae..a4fad5ae60 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -18,7 +18,7 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 /* Define multiple versions only for the definition in libc.  */
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 # define memmove __redirect_memmove
 # include <string.h>
 # undef memmove
@@ -26,12 +26,18 @@ 
 # define SYMBOL_NAME memmove
 # include "ifunc-memmove.h"
 
+# if IS_IN (libcpu_rt_c)
+#  define __libc_memmove memmove
+# endif
+
 libc_ifunc_redirected (__redirect_memmove, __libc_memmove,
 		       IFUNC_SELECTOR ());
 
+# if !IS_IN (libcpu_rt_c)
 strong_alias (__libc_memmove, memmove);
-# ifdef SHARED
+#  ifdef SHARED
 __hidden_ver1 (__libc_memmove, __GI_memmove, __redirect_memmove)
   __attribute__ ((visibility ("hidden")));
+#  endif
 # endif
 #endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy.c b/sysdeps/x86_64/multiarch/mempcpy.c
index 9fe41dda82..fe3ece0774 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.c
+++ b/sysdeps/x86_64/multiarch/mempcpy.c
@@ -18,7 +18,7 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 /* Define multiple versions only for the definition in libc.  */
-#if IS_IN (libc)
+#if IS_IN (libc) || IS_IN (libcpu_rt_c)
 # define mempcpy __redirect_mempcpy
 # define __mempcpy __redirect___mempcpy
 # define NO_MEMPCPY_STPCPY_REDIRECT
@@ -30,13 +30,19 @@ 
 # define SYMBOL_NAME mempcpy
 # include "ifunc-memmove.h"
 
+# if IS_IN (libcpu_rt_c)
+#  define __mempcpy mempcpy
+# endif
+
 libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR ());
 
+# if !IS_IN (libcpu_rt_c)
 weak_alias (__mempcpy, mempcpy)
-# ifdef SHARED
+#  ifdef SHARED
 __hidden_ver1 (__mempcpy, __GI___mempcpy, __redirect___mempcpy)
   __attribute__ ((visibility ("hidden")));
 __hidden_ver1 (mempcpy, __GI_mempcpy, __redirect_mempcpy)
   __attribute__ ((visibility ("hidden")));
+#  endif
 # endif
 #endif