From patchwork Sat May 20 14:50:06 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: "Lu, Hongjiu" <hongjiu.lu@intel.com>
X-Patchwork-Id: 20517
Received: (qmail 93449 invoked by alias); 20 May 2017 14:50:12 -0000
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: libc-alpha-owner@sourceware.org
Delivered-To: mailing list libc-alpha@sourceware.org
Received: (qmail 93433 invoked by uid 89); 20 May 2017 14:50:10 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-25.9 required=5.0 tests=BAYES_00, GIT_PATCH_0,
	GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3,
	KAM_LAZY_DOMAIN_SECURITY, NO_DNS_FOR_FROM,
	RP_MATCHES_RCVD autolearn=ham version=3.3.2 spammy=
X-HELO: mga02.intel.com
X-ExtLoop1: 1
Date: Sat, 20 May 2017 07:50:06 -0700
From: "H.J. Lu" <hongjiu.lu@intel.com>
To: GNU C Library <libc-alpha@sourceware.org>
Subject: [PATCH] x86_64: Remove 9 REX bytes from memchr.S
Message-ID: <20170520145006.GA19672@lucon.org>
Reply-To: "H.J. Lu" <hjl.tools@gmail.com>
MIME-Version: 1.0
Content-Disposition: inline
User-Agent: Mutt/1.8.0 (2017-02-23)

There is no need to use 64-bit registers when only the lower 32 bits
are non-zero.

Tested on x86-64.  OK for master?

H.J.
---
	* sysdeps/x86_64/memchr.S (MEMCHR): Use 32-bit registers for
	the lower 32 bits.
---
 sysdeps/x86_64/memchr.S | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 8242f2d..f1dad9e 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -44,10 +44,10 @@ ENTRY(MEMCHR)
 	punpcklbw %xmm1, %xmm1
 #endif
 
-	and	$63, %rcx
+	and	$63, %ecx
 	pshufd	$0, %xmm1, %xmm1
 
-	cmp	$48, %rcx
+	cmp	$48, %ecx
 	ja	L(crosscache)
 
 	movdqu	(%rdi), %xmm0
@@ -59,7 +59,7 @@ ENTRY(MEMCHR)
 	sub	$16, %rdx
 	jbe	L(return_null)
 	add	$16, %rdi
-	and	$15, %rcx
+	and	$15, %ecx
 	and	$-16, %rdi
 	add	%rcx, %rdx
 	sub	$64, %rdx
@@ -68,7 +68,7 @@ ENTRY(MEMCHR)
 
 	.p2align 4
 L(crosscache):
-	and	$15, %rcx
+	and	$15, %ecx
 	and	$-16, %rdi
 	movdqa	(%rdi), %xmm0
 
@@ -162,7 +162,7 @@ L(loop_prolog):
 
 	mov	%rdi, %rcx
 	and	$-64, %rdi
-	and	$63, %rcx
+	and	$63, %ecx
 	add	%rcx, %rdx
 
 	.p2align 4
@@ -214,7 +214,7 @@ L(align64_loop):
 
 	.p2align 4
 L(exit_loop):
-	add	$32, %rdx
+	add	$32, %edx
 	jle	L(exit_loop_32)
 
 	movdqa	(%rdi), %xmm0
@@ -234,7 +234,7 @@ L(exit_loop):
 	pmovmskb %xmm3, %eax
 	test	%eax, %eax
 	jnz	L(matches32_1)
-	sub	$16, %rdx
+	sub	$16, %edx
 	jle	L(return_null)
 
 	PCMPEQ	48(%rdi), %xmm1
@@ -246,13 +246,13 @@ L(exit_loop):
 
 	.p2align 4
 L(exit_loop_32):
-	add	$32, %rdx
+	add	$32, %edx
 	movdqa	(%rdi), %xmm0
 	PCMPEQ	%xmm1, %xmm0
 	pmovmskb %xmm0, %eax
 	test	%eax, %eax
 	jnz	L(matches_1)
-	sub	$16, %rdx
+	sub	$16, %edx
 	jbe	L(return_null)
 
 	PCMPEQ	16(%rdi), %xmm1