Patchwork [7/7] Enable __memcpy_chk_sse2_unaligned

login
register
mail settings
Submitter H.J. Lu
Date March 7, 2016, 5:36 p.m.
Message ID <1457372190-12196-8-git-send-email-hjl.tools@gmail.com>
Download mbox | patch
Permalink /patch/11240/
State New
Headers show

Comments

H.J. Lu - March 7, 2016, 5:36 p.m.
Check Fast_Unaligned_Load for __memcpy_chk_sse2_unaligned. The new
selection order is:

1. __memcpy_chk_avx_unaligned if AVX_Fast_Unaligned_Load bit is set.
2. __memcpy_chk_sse2_unaligned if Fast_Unaligned_Load bit is set.
3. __memcpy_chk_sse2 if SSSE3 isn't available.
4. __memcpy_chk_ssse3_back if Fast_Copy_Backward bit it set.
5. __memcpy_chk_ssse3

	[BZ #19776]
	* sysdeps/x86_64/multiarch/mempcpy_chk.S (__mempcpy_chk): Check
	Fast_Unaligned_Load to enable __mempcpy_chk_sse2_unaligned.
---
 sysdeps/x86_64/multiarch/memcpy_chk.S | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

Patch

diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index 648217e..c009211 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -32,22 +32,25 @@  ENTRY(__memcpy_chk)
 	LOAD_RTLD_GLOBAL_RO_RDX
 #ifdef HAVE_AVX512_ASM_SUPPORT
 	HAS_ARCH_FEATURE (AVX512F_Usable)
-	jz      1f
+	jz	1f
 	HAS_ARCH_FEATURE (Prefer_No_VZEROUPPER)
-	jz      1f
-	leaq    __memcpy_chk_avx512_no_vzeroupper(%rip), %rax
+	jz	1f
+	lea	__memcpy_chk_avx512_no_vzeroupper(%rip), %RAX_LP
 	ret
 #endif
-1:	leaq	__memcpy_chk_sse2(%rip), %rax
+1:	lea	__memcpy_chk_avx_unaligned(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
+	jnz	2f
+	lea	__memcpy_chk_sse2_unaligned(%rip), %RAX_LP
+	HAS_ARCH_FEATURE (Fast_Unaligned_Load)
+	jnz	2f
+	lea	__memcpy_chk_sse2(%rip), %RAX_LP
 	HAS_CPU_FEATURE (SSSE3)
 	jz	2f
-	leaq	__memcpy_chk_ssse3(%rip), %rax
+	lea	__memcpy_chk_ssse3_back(%rip), %RAX_LP
 	HAS_ARCH_FEATURE (Fast_Copy_Backward)
-	jz	2f
-	leaq	__memcpy_chk_ssse3_back(%rip), %rax
-	HAS_ARCH_FEATURE (AVX_Fast_Unaligned_Load)
-	jz  2f
-	leaq    __memcpy_chk_avx_unaligned(%rip), %rax
+	jnz	2f
+	lea	__memcpy_chk_ssse3(%rip), %RAX_LP
 2:	ret
 END(__memcpy_chk)
 # else