From patchwork Tue Oct 21 10:41:32 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andreas Schwab X-Patchwork-Id: 3306 Received: (qmail 5235 invoked by alias); 21 Oct 2014 10:41:37 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 5223 invoked by uid 89); 21 Oct 2014 10:41:37 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-3.6 required=5.0 tests=AWL, BAYES_00, RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: mx2.suse.de From: Andreas Schwab To: libc-alpha@sourceware.org Subject: [PATCH] Fix misdetected Slow_SSE4_2 cpu feature bit (bug 17501) X-Yow: Ha ha Ha ha Ha ha Ha Ha Ha Ha -- When will I EVER stop HAVING FUN?!! Date: Tue, 21 Oct 2014 12:41:32 +0200 Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/24.3 (gnu/linux) MIME-Version: 1.0 Several places misdetect the Slow_SSE4_2 due to using the wrong offset into the __cpu_features structure. This is only a performance bug, since the actual check for SSE4_2 is correct. Tested on x86_64-suse-linux and i686-suse-linux, on a system that has SSE4_2 but not Slow_SSE4_2. Andreas. [BZ #17501] * sysdeps/i386/i686/multiarch/strcasecmp.S (__strcasecmp): Fix check for Slow_SSE4_2 feature bit. * sysdeps/i386/i686/multiarch/strcmp.S (STRCMP): Likewise. * sysdeps/i386/i686/multiarch/strncase.S (__strncasecmp): Likewise. * sysdeps/x86_64/multiarch/strcmp.S (STRCMP, __strcascmp): Likewise. Fix check for Fast_Unaligned_Load feature bit. --- sysdeps/i386/i686/multiarch/strcasecmp.S | 4 ++-- sysdeps/i386/i686/multiarch/strcmp.S | 4 ++-- sysdeps/i386/i686/multiarch/strncase.S | 4 ++-- sysdeps/x86_64/multiarch/strcmp.S | 8 ++++---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sysdeps/i386/i686/multiarch/strcasecmp.S b/sysdeps/i386/i686/multiarch/strcasecmp.S index 4f2de4f..57ccef5 100644 --- a/sysdeps/i386/i686/multiarch/strcasecmp.S +++ b/sysdeps/i386/i686/multiarch/strcasecmp.S @@ -37,7 +37,7 @@ ENTRY(__strcasecmp) leal __strcasecmp_ssse3@GOTOFF(%ebx), %eax testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx) jz 2f - testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx) + testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx) jnz 2f leal __strcasecmp_sse4_2@GOTOFF(%ebx), %eax 2: popl %ebx @@ -58,7 +58,7 @@ ENTRY(__strcasecmp) leal __strcasecmp_ssse3, %eax testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features jz 2f - testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features + testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features jnz 2f leal __strcasecmp_sse4_2, %eax 2: ret diff --git a/sysdeps/i386/i686/multiarch/strcmp.S b/sysdeps/i386/i686/multiarch/strcmp.S index 2ad6bf4..23cbd6f 100644 --- a/sysdeps/i386/i686/multiarch/strcmp.S +++ b/sysdeps/i386/i686/multiarch/strcmp.S @@ -68,7 +68,7 @@ ENTRY(STRCMP) leal __STRCMP_SSSE3@GOTOFF(%ebx), %eax testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx) jz 2f - testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx) + testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx) jnz 2f leal __STRCMP_SSE4_2@GOTOFF(%ebx), %eax 2: popl %ebx @@ -89,7 +89,7 @@ ENTRY(STRCMP) leal __STRCMP_SSSE3, %eax testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features jz 2f - testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features + testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features jnz 2f leal __STRCMP_SSE4_2, %eax 2: ret diff --git a/sysdeps/i386/i686/multiarch/strncase.S b/sysdeps/i386/i686/multiarch/strncase.S index 9b4cfa0..41644a5 100644 --- a/sysdeps/i386/i686/multiarch/strncase.S +++ b/sysdeps/i386/i686/multiarch/strncase.S @@ -37,7 +37,7 @@ ENTRY(__strncasecmp) leal __strncasecmp_ssse3@GOTOFF(%ebx), %eax testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features@GOTOFF(%ebx) jz 2f - testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx) + testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features@GOTOFF(%ebx) jnz 2f leal __strncasecmp_sse4_2@GOTOFF(%ebx), %eax 2: popl %ebx @@ -58,7 +58,7 @@ ENTRY(__strncasecmp) leal __strncasecmp_ssse3, %eax testl $bit_SSE4_2, CPUID_OFFSET+index_SSE4_2+__cpu_features jz 2f - testl $bit_Slow_SSE4_2, CPUID_OFFSET+index_Slow_SSE4_2+__cpu_features + testl $bit_Slow_SSE4_2, FEATURE_OFFSET+index_Slow_SSE4_2+__cpu_features jnz 2f leal __strncasecmp_sse4_2, %eax 2: ret diff --git a/sysdeps/x86_64/multiarch/strcmp.S b/sysdeps/x86_64/multiarch/strcmp.S index f3e0ca1..bd71714 100644 --- a/sysdeps/x86_64/multiarch/strcmp.S +++ b/sysdeps/x86_64/multiarch/strcmp.S @@ -91,10 +91,10 @@ ENTRY(STRCMP) 1: #ifdef USE_AS_STRCMP leaq __strcmp_sse2_unaligned(%rip), %rax - testl $bit_Fast_Unaligned_Load, __cpu_features+CPUID_OFFSET+index_Fast_Unaligned_Load(%rip) + testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip) jnz 3f #else - testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) + testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip) jnz 2f leaq STRCMP_SSE42(%rip), %rax testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) @@ -120,7 +120,7 @@ ENTRY(__strcasecmp) testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) jnz 3f # endif - testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) + testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip) jnz 2f leaq __strcasecmp_sse42(%rip), %rax testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) @@ -146,7 +146,7 @@ ENTRY(__strncasecmp) testl $bit_AVX_Usable, __cpu_features+FEATURE_OFFSET+index_AVX_Usable(%rip) jnz 3f # endif - testl $bit_Slow_SSE4_2, __cpu_features+CPUID_OFFSET+index_Slow_SSE4_2(%rip) + testl $bit_Slow_SSE4_2, __cpu_features+FEATURE_OFFSET+index_Slow_SSE4_2(%rip) jnz 2f leaq __strncasecmp_sse42(%rip), %rax testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)