From a89f56e004bd30a38248b44632628db9088a79da Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 30 May 2017 08:49:36 -0700
Subject: [PATCH] x86-64: Add memmove.h
---
sysdeps/x86_64/multiarch/Makefile | 1 +
sysdeps/x86_64/multiarch/memcpy.S | 75 -----------------
sysdeps/x86_64/multiarch/memcpy.c | 37 +++++++++
.../{memmove.S => memmove-sse2-unaligned-erms.S} | 57 +------------
sysdeps/x86_64/multiarch/memmove.c | 34 ++++++++
sysdeps/x86_64/multiarch/memmove.h | 95 ++++++++++++++++++++++
sysdeps/x86_64/multiarch/mempcpy.S | 73 -----------------
sysdeps/x86_64/multiarch/mempcpy.c | 35 ++++++++
8 files changed, 204 insertions(+), 203 deletions(-)
delete mode 100644 sysdeps/x86_64/multiarch/memcpy.S
create mode 100644 sysdeps/x86_64/multiarch/memcpy.c
rename sysdeps/x86_64/multiarch/{memmove.S => memmove-sse2-unaligned-erms.S} (53%)
create mode 100644 sysdeps/x86_64/multiarch/memmove.c
create mode 100644 sysdeps/x86_64/multiarch/memmove.h
delete mode 100644 sysdeps/x86_64/multiarch/mempcpy.S
create mode 100644 sysdeps/x86_64/multiarch/mempcpy.c
@@ -19,6 +19,7 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned \
strcspn-c strpbrk-c strspn-c varshift \
memset-avx512-no-vzeroupper \
+ memmove-sse2-unaligned-erms \
memmove-avx-unaligned-erms \
memmove-avx512-unaligned-erms \
memset-avx2-unaligned-erms \
deleted file mode 100644
@@ -1,75 +0,0 @@
-/* Multiple versions of memcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need memcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(__new_memcpy)
- .type __new_memcpy, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- lea __memcpy_erms(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_ERMS)
- jnz 2f
- HAS_ARCH_FEATURE (Prefer_No_AVX512)
- jnz 1f
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- lea __memcpy_avx512_no_vzeroupper(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jnz 2f
- lea __memcpy_avx512_unaligned_erms(%rip), %RAX_LP
- HAS_CPU_FEATURE (ERMS)
- jnz 2f
- lea __memcpy_avx512_unaligned(%rip), %RAX_LP
- ret
-1: lea __memcpy_avx_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz L(Fast_Unaligned_Load)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memcpy_avx_unaligned_erms(%rip), %RAX_LP
- ret
-L(Fast_Unaligned_Load):
- lea __memcpy_sse2_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
- jz L(SSSE3)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memcpy_sse2_unaligned_erms(%rip), %RAX_LP
- ret
-L(SSSE3):
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- lea __memcpy_ssse3_back(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jnz 2f
- lea __memcpy_ssse3(%rip), %RAX_LP
-2: ret
-END(__new_memcpy)
-
-# undef memcpy
-# include <shlib-compat.h>
-versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
new file mode 100644
@@ -0,0 +1,37 @@
+/* Multiple versions of memcpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in lib and for
+ DSO. In static binaries we need memcpy before the initialization
+ happened. */
+#if defined SHARED && IS_IN (libc)
+# define memcpy __redirect_memcpy
+# include <string.h>
+# undef memcpy
+
+# define SYMBOL_NAME memcpy
+# include "memmove.h"
+
+extern __typeof (__redirect_memcpy) __new_memcpy;
+
+libc_ifunc (__new_memcpy, IFUNC_SELECTOR ());
+
+# include <shlib-compat.h>
+versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
+#endif
similarity index 53%
rename from sysdeps/x86_64/multiarch/memmove.S
rename to sysdeps/x86_64/multiarch/memmove-sse2-unaligned-erms.S
@@ -1,6 +1,5 @@
-/* Multiple versions of memmove
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2016-2017 Free Software Foundation, Inc.
+/* memmove with SSE2.
+ Copyright (C) 2017 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -17,57 +16,6 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. */
-#if IS_IN (libc)
- .text
-ENTRY(__libc_memmove)
- .type __libc_memmove, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- lea __memmove_erms(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_ERMS)
- jnz 2f
- HAS_ARCH_FEATURE (Prefer_No_AVX512)
- jnz 1f
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- lea __memmove_avx512_no_vzeroupper(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jnz 2f
- lea __memmove_avx512_unaligned_erms(%rip), %RAX_LP
- HAS_CPU_FEATURE (ERMS)
- jnz 2f
- lea __memmove_avx512_unaligned(%rip), %RAX_LP
- ret
-1: lea __memmove_avx_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz L(Fast_Unaligned_Load)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memmove_avx_unaligned_erms(%rip), %RAX_LP
- ret
-L(Fast_Unaligned_Load):
- lea __memmove_sse2_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
- jz L(SSSE3)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __memmove_sse2_unaligned_erms(%rip), %RAX_LP
- ret
-L(SSSE3):
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- lea __memmove_ssse3_back(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jnz 2f
- lea __memmove_ssse3(%rip), %RAX_LP
-2: ret
-END(__libc_memmove)
-#endif
-
#if IS_IN (libc)
# define MEMMOVE_SYMBOL(p,s) p##_sse2_##s
@@ -83,7 +31,6 @@ libc_hidden_ver (__mempcpy_sse2_unaligned, __mempcpy)
by the indirect call in the PLT. */
# define libc_hidden_builtin_def
# endif
-strong_alias (__libc_memmove, memmove)
#endif
#if !defined SHARED || !IS_IN (libc)
new file mode 100644
@@ -0,0 +1,34 @@
+/* Multiple versions of memmmove.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2016-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in lib and for
+ DSO. */
+#if IS_IN (libc)
+# define memmove __redirect_memmove
+# include <string.h>
+# undef memmove
+
+# define SYMBOL_NAME memmove
+# include "memmove.h"
+
+extern __typeof (__redirect_memmove) __libc_memmove;
+
+libc_ifunc (__libc_memmove, IFUNC_SELECTOR ());
+strong_alias (__libc_memmove, memmove);
+#endif
new file mode 100644
@@ -0,0 +1,95 @@
+/* Common definition for memcpy, mempcpy, and memmove implementation.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2016-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* memcpy, mempcpy, and memmove share the same logic for ifunc selection. */
+
+#include <cpu-features.h>
+#include <ldsodefs.h>
+
+#define PASTER1(x,y) x##_##y
+#define EVALUATOR1(x,y) PASTER1 (x,y)
+#define PASTER2(x,y) __##x##_##y
+#define EVALUATOR2(x,y) PASTER2 (x,y)
+
+/* Basically set '__redirect_<symbol>' to use as type definition,
+ '__<symbol>_<variant>' as the optimized implementation and
+ '<symbol>_ifunc_selector' as the IFUNC selector. */
+#define REDIRECT_NAME EVALUATOR1 (__redirect, SYMBOL_NAME)
+#define OPTIMIZE(name) EVALUATOR2 (SYMBOL_NAME, name)
+#define IFUNC_SELECTOR EVALUATOR1 (SYMBOL_NAME, ifunc_selector)
+
+extern __typeof (REDIRECT_NAME) OPTIMIZE (erms) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+ attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_no_vzeroupper)
+ attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_ERMS))
+ return OPTIMIZE (erms);
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX512F_Usable)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ {
+ if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ return OPTIMIZE (avx512_no_vzeroupper);
+
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (avx512_unaligned_erms);
+
+ return OPTIMIZE (avx512_unaligned);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (avx_unaligned_erms);
+
+ return OPTIMIZE (avx_unaligned);
+ }
+
+ if (!CPU_FEATURES_CPU_P (cpu_features, SSSE3)
+ || CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Copy))
+ {
+ if (CPU_FEATURES_CPU_P (cpu_features, ERMS))
+ return OPTIMIZE (sse2_unaligned_erms);
+
+ return OPTIMIZE (sse2_unaligned);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Copy_Backward))
+ return OPTIMIZE (ssse3_back);
+
+ return OPTIMIZE (ssse3);
+}
deleted file mode 100644
@@ -1,73 +0,0 @@
-/* Multiple versions of mempcpy
- All versions must be listed in ifunc-impl-list.c.
- Copyright (C) 2010-2017 Free Software Foundation, Inc.
- Contributed by Intel Corporation.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#include <sysdep.h>
-#include <init-arch.h>
-
-/* Define multiple versions only for the definition in lib and for
- DSO. In static binaries we need mempcpy before the initialization
- happened. */
-#if defined SHARED && IS_IN (libc)
- .text
-ENTRY(__mempcpy)
- .type __mempcpy, @gnu_indirect_function
- LOAD_RTLD_GLOBAL_RO_RDX
- lea __mempcpy_erms(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_ERMS)
- jnz 2f
- HAS_ARCH_FEATURE (Prefer_No_AVX512)
- jnz 1f
- HAS_ARCH_FEATURE (AVX512F_Usable)
- jz 1f
- lea __mempcpy_avx512_no_vzeroupper(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
- jnz 2f
- lea __mempcpy_avx512_unaligned_erms(%rip), %RAX_LP
- HAS_CPU_FEATURE (ERMS)
- jnz 2f
- lea __mempcpy_avx512_unaligned(%rip), %RAX_LP
- ret
-1: lea __mempcpy_avx_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
- jz L(Fast_Unaligned_Load)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __mempcpy_avx_unaligned_erms(%rip), %RAX_LP
- ret
-L(Fast_Unaligned_Load):
- lea __mempcpy_sse2_unaligned(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Unaligned_Copy)
- jz L(SSSE3)
- HAS_CPU_FEATURE (ERMS)
- jz 2f
- lea __mempcpy_sse2_unaligned_erms(%rip), %RAX_LP
- ret
-L(SSSE3):
- HAS_CPU_FEATURE (SSSE3)
- jz 2f
- lea __mempcpy_ssse3_back(%rip), %RAX_LP
- HAS_ARCH_FEATURE (Fast_Copy_Backward)
- jnz 2f
- lea __mempcpy_ssse3(%rip), %RAX_LP
-2: ret
-END(__mempcpy)
-
-weak_alias (__mempcpy, mempcpy)
-#endif
new file mode 100644
@@ -0,0 +1,35 @@
+/* Multiple versions of mempcpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in lib and for
+ DSO. In static binaries we need mempcpy before the initialization
+ happened. */
+#if defined SHARED && IS_IN (libc)
+# define mempcpy __redirect_mempcpy
+# define __mempcpy __redirect___mempcpy
+# include <string.h>
+# undef mempcpy
+# undef __mempcpy
+
+# define SYMBOL_NAME mempcpy
+# include "memmove.h"
+
+libc_ifunc_redirected (__redirect_mempcpy, __mempcpy, IFUNC_SELECTOR ());
+weak_alias (__mempcpy, mempcpy)
+#endif
--
2.9.4