Fix misdetected Slow_SSE4_2 cpu feature bit (bug 17501)

Message ID mvmzjcpu2lf.fsf@hawking.suse.de
State Committed
Headers

Commit Message

Andreas Schwab Oct. 21, 2014, 10:41 a.m. UTC
  Several places misdetect the Slow_SSE4_2 due to using the wrong offset
into the __cpu_features structure.  This is only a performance bug,
since the actual check for SSE4_2 is correct.

Tested on x86_64-suse-linux and i686-suse-linux, on a system that has
SSE4_2 but not Slow_SSE4_2.

Andreas.

	[BZ #17501]
	* sysdeps/i386/i686/multiarch/strcasecmp.S (__strcasecmp): Fix
	check for Slow_SSE4_2 feature bit.
	* sysdeps/i386/i686/multiarch/strcmp.S (STRCMP): Likewise.
	* sysdeps/i386/i686/multiarch/strncase.S (__strncasecmp): Likewise.
	* sysdeps/x86_64/multiarch/strcmp.S (STRCMP, __strcascmp):
	Likewise.  Fix check for Fast_Unaligned_Load feature bit.
---
 sysdeps/i386/i686/multiarch/strcasecmp.S | 4 ++--
 sysdeps/i386/i686/multiarch/strcmp.S     | 4 ++--
 sysdeps/i386/i686/multiarch/strncase.S   | 4 ++--
 sysdeps/x86_64/multiarch/strcmp.S        | 8 ++++----
 4 files changed, 10 insertions(+), 10 deletions(-)
  

Comments

Mike Frysinger Oct. 24, 2014, 1:52 a.m. UTC | #1
On 21 Oct 2014 12:41, Andreas Schwab wrote:
> Several places misdetect the Slow_SSE4_2 due to using the wrong offset
> into the __cpu_features structure.  This is only a performance bug,
> since the actual check for SSE4_2 is correct.
> 
> Tested on x86_64-suse-linux and i686-suse-linux, on a system that has
> SSE4_2 but not Slow_SSE4_2.

lgtm
-mike
  

Patch

diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S
index 4f2de4f..57ccef5 100644
--- a/sysdeps/i386/i686/multiarch/strcasecmp.S
+++ b/sysdeps/i386/i686/multiarch/strcasecmp.S
@@ -37,7 +37,7 @@  ENTRY(__strcasecmp)
 	leal	__strcasecmp_ssse3@GOTOFF(%ebx), %eax
 	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
 	jz	2f
-	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
 	jnz	2f
 	leal	__strcasecmp_sse4_2@GOTOFF(%ebx), %eax
 2:	popl	%ebx
@@ -58,7 +58,7 @@  ENTRY(__strcasecmp)
 	leal	__strcasecmp_ssse3, %eax
 	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
 	jz	2f
-	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
+	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
 	jnz	2f
 	leal	__strcasecmp_sse4_2, %eax
 2:	ret
diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S
index 2ad6bf4..23cbd6f 100644
--- a/sysdeps/i386/i686/multiarch/strcmp.S
+++ b/sysdeps/i386/i686/multiarch/strcmp.S
@@ -68,7 +68,7 @@  ENTRY(STRCMP)
 	leal	__STRCMP_SSSE3@GOTOFF(%ebx), %eax
 	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
 	jz	2f
-	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
 	jnz	2f
 	leal	__STRCMP_SSE4_2@GOTOFF(%ebx), %eax
 2:	popl	%ebx
@@ -89,7 +89,7 @@  ENTRY(STRCMP)
 	leal	__STRCMP_SSSE3, %eax
 	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
 	jz	2f
-	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
+	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
 	jnz	2f
 	leal	__STRCMP_SSE4_2, %eax
 2:	ret
diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S
index 9b4cfa0..41644a5 100644
--- a/sysdeps/i386/i686/multiarch/strncase.S
+++ b/sysdeps/i386/i686/multiarch/strncase.S
@@ -37,7 +37,7 @@  ENTRY(__strncasecmp)
 	leal	__strncasecmp_ssse3@GOTOFF(%ebx), %eax
 	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx)
 	jz	2f
-	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
+	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx)
 	jnz	2f
 	leal	__strncasecmp_sse4_2@GOTOFF(%ebx), %eax
 2:	popl	%ebx
@@ -58,7 +58,7 @@  ENTRY(__strncasecmp)
 	leal	__strncasecmp_ssse3, %eax
 	testl	$bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features
 	jz	2f
-	testl	$bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features
+	testl	$bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features
 	jnz	2f
 	leal	__strncasecmp_sse4_2, %eax
 2:	ret
diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S
index f3e0ca1..bd71714 100644
--- a/sysdeps/x86_64/multiarch/strcmp.S
+++ b/sysdeps/x86_64/multiarch/strcmp.S
@@ -91,10 +91,10 @@  ENTRY(STRCMP)
 1:
 #ifdef USE_AS_STRCMP
 	leaq	__strcmp_sse2_unaligned(%rip), %rax
-	testl   $bit_Fast_Unaligned_Load, __cpu_features+CPUID_OFFSET+index_Fast_Unaligned_Load(%rip)
+	testl   $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
 	jnz     3f
 #else
-	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	testl	$bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
 	jnz	2f
 	leaq	STRCMP_SSE42(%rip), %rax
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
@@ -120,7 +120,7 @@  ENTRY(__strcasecmp)
 	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
 	jnz	3f
 #  endif
-	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	testl	$bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
 	jnz	2f
 	leaq	__strcasecmp_sse42(%rip), %rax
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
@@ -146,7 +146,7 @@  ENTRY(__strncasecmp)
 	testl	$bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip)
 	jnz	3f
 #  endif
-	testl	$bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip)
+	testl	$bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip)
 	jnz	2f
 	leaq	__strncasecmp_sse42(%rip), %rax
 	testl	$bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)