powerpc64: Remove old strncmp optimization

Message ID 20221129160257.1947346-1-rajis@linux.ibm.com
State Superseded
Headers
Series powerpc64: Remove old strncmp optimization |

Commit Message

Rajalakshmi Srinivasaraghavan Nov. 29, 2022, 4:02 p.m. UTC
  This patch removes the power4 strncmp optimization for powerpc64 and uses
__strncmp_ppc implementation instead.  Currently, both power4 and ppc
IFUNC variants result in similar performance.

Tested on ppc64le with and without --disable-multi-arch flag.
---
 sysdeps/powerpc/powerpc64/multiarch/Makefile  |   3 +-
 .../powerpc64/multiarch/ifunc-impl-list.c     |   2 -
 .../powerpc64/multiarch/strncmp-power4.S      |  23 --
 sysdeps/powerpc/powerpc64/multiarch/strncmp.c |   3 -
 sysdeps/powerpc/powerpc64/power4/strncmp.S    | 225 ------------------
 5 files changed, 1 insertion(+), 255 deletions(-)
 delete mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
 delete mode 100644 sysdeps/powerpc/powerpc64/power4/strncmp.S
  

Comments

Paul E. Murphy Nov. 29, 2022, 10:37 p.m. UTC | #1
On 11/29/22 10:02 AM, Rajalakshmi Srinivasaraghavan via Libc-alpha wrote:
> This patch removes the power4 strncmp optimization for powerpc64 and uses
> __strncmp_ppc implementation instead.  Currently, both power4 and ppc
> IFUNC variants result in similar performance.
What cpu was used to test performance? I'd be curious if this holds for 
something like the venerable ppc970.

> 
> Tested on ppc64le with and without --disable-multi-arch flag.
> ---
>   sysdeps/powerpc/powerpc64/multiarch/Makefile  |   3 +-
>   .../powerpc64/multiarch/ifunc-impl-list.c     |   2 -
>   .../powerpc64/multiarch/strncmp-power4.S      |  23 --
>   sysdeps/powerpc/powerpc64/multiarch/strncmp.c |   3 -
>   sysdeps/powerpc/powerpc64/power4/strncmp.S    | 225 ------------------
>   5 files changed, 1 insertion(+), 255 deletions(-)
>   delete mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
>   delete mode 100644 sysdeps/powerpc/powerpc64/power4/strncmp.S
> 
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
> index 6f2436b660..ed25e234ba 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
> +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
> @@ -12,8 +12,7 @@ sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
>   		   strnlen-power8 strnlen-power7 strnlen-ppc64 \
>   		   strcasecmp-power7 strcasecmp_l-power7 \
>   		   strncase-power7 strncase_l-power7 \
> -		   strncmp-power8 strncmp-power7 \
> -		   strncmp-power4 strncmp-ppc64 \
> +		   strncmp-power8 strncmp-power7 strncmp-ppc64 \
>   		   strchr-power8 strchr-power7 strchr-ppc64 \
>   		   strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \
>   		   strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
> index 5a3c7a5886..238f784146 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
> @@ -171,8 +171,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>   			      __strncmp_power8)
>   	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_ARCH_2_06,
>   			      __strncmp_power7)
> -	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_POWER4,
> -			      __strncmp_power4)
>   	      IFUNC_IMPL_ADD (array, i, strncmp, 1,
>   			      __strncmp_ppc))
> 
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
> deleted file mode 100644
> index df6ad6ae9c..0000000000
> --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
> +++ /dev/null
> @@ -1,23 +0,0 @@
> -/* Copyright (C) 2013-2022 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#define STRNCMP __strncmp_power4
> -
> -#undef libc_hidden_builtin_def
> -#define libc_hidden_builtin_def(name)
> -
> -#include <sysdeps/powerpc/powerpc64/power4/strncmp.S>
> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
> index da3de73c27..7400ed3b9a 100644
> --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
> @@ -26,7 +26,6 @@
>   # include "init-arch.h"
> 
>   extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
> -extern __typeof (strncmp) __strncmp_power4 attribute_hidden;
>   extern __typeof (strncmp) __strncmp_power7 attribute_hidden;
>   extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
>   # ifdef __LITTLE_ENDIAN__
> @@ -46,7 +45,5 @@ libc_ifunc_redirected (__redirect_strncmp, strncmp,
>   		       ? __strncmp_power8
>   		       : (hwcap & PPC_FEATURE_ARCH_2_06)
>   			 ? __strncmp_power7
> -			 : (hwcap & PPC_FEATURE_POWER4)
> -			   ? __strncmp_power4
>   			   : __strncmp_ppc);
Minor nit, does the spacing of __strcncmp_ppc need updating here too?
  
Adhemerval Zanella Netto Nov. 30, 2022, 1:49 p.m. UTC | #2
On 29/11/22 19:37, Paul E Murphy via Libc-alpha wrote:
> 
> 
> On 11/29/22 10:02 AM, Rajalakshmi Srinivasaraghavan via Libc-alpha wrote:
>> This patch removes the power4 strncmp optimization for powerpc64 and uses
>> __strncmp_ppc implementation instead.  Currently, both power4 and ppc
>> IFUNC variants result in similar performance.
> What cpu was used to test performance? I'd be curious if this holds for something like the venerable ppc970.


Do we really care, specially because I am not sure how it was evaluated on
power4 chip?

Otherwise, LGTM.
  
Rajalakshmi Srinivasaraghavan Nov. 30, 2022, 4:50 p.m. UTC | #3
On 11/29/22 4:37 PM, Paul E Murphy wrote:
>
>
> On 11/29/22 10:02 AM, Rajalakshmi Srinivasaraghavan via Libc-alpha wrote:
>> This patch removes the power4 strncmp optimization for powerpc64 and 
>> uses
>> __strncmp_ppc implementation instead.  Currently, both power4 and ppc
>> IFUNC variants result in similar performance.
> What cpu was used to test performance? I'd be curious if this holds 
> for something like the venerable ppc970.

Compared __strncmp_power4 and __strncmp_ppc benchtest numbers on POWER8 
and POWER10 systems.

>
>>
>> Tested on ppc64le with and without --disable-multi-arch flag.
>> ---
>>   sysdeps/powerpc/powerpc64/multiarch/Makefile  |   3 +-
>>   .../powerpc64/multiarch/ifunc-impl-list.c     |   2 -
>>   .../powerpc64/multiarch/strncmp-power4.S      |  23 --
>>   sysdeps/powerpc/powerpc64/multiarch/strncmp.c |   3 -
>>   sysdeps/powerpc/powerpc64/power4/strncmp.S    | 225 ------------------
>>   5 files changed, 1 insertion(+), 255 deletions(-)
>>   delete mode 100644 
>> sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
>>   delete mode 100644 sysdeps/powerpc/powerpc64/power4/strncmp.S
>>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile 
>> b/sysdeps/powerpc/powerpc64/multiarch/Makefile
>> index 6f2436b660..ed25e234ba 100644
>> --- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
>> @@ -12,8 +12,7 @@ sysdep_routines += memcpy-power8-cached 
>> memcpy-power7 memcpy-a2 memcpy-power6 \
>>              strnlen-power8 strnlen-power7 strnlen-ppc64 \
>>              strcasecmp-power7 strcasecmp_l-power7 \
>>              strncase-power7 strncase_l-power7 \
>> -           strncmp-power8 strncmp-power7 \
>> -           strncmp-power4 strncmp-ppc64 \
>> +           strncmp-power8 strncmp-power7 strncmp-ppc64 \
>>              strchr-power8 strchr-power7 strchr-ppc64 \
>>              strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \
>>              strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c 
>> b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
>> index 5a3c7a5886..238f784146 100644
>> --- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
>> @@ -171,8 +171,6 @@ __libc_ifunc_impl_list (const char *name, struct 
>> libc_ifunc_impl *array,
>>                     __strncmp_power8)
>>             IFUNC_IMPL_ADD (array, i, strncmp, hwcap & 
>> PPC_FEATURE_ARCH_2_06,
>>                     __strncmp_power7)
>> -          IFUNC_IMPL_ADD (array, i, strncmp, hwcap & 
>> PPC_FEATURE_POWER4,
>> -                  __strncmp_power4)
>>             IFUNC_IMPL_ADD (array, i, strncmp, 1,
>>                     __strncmp_ppc))
>>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S 
>> b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
>> deleted file mode 100644
>> index df6ad6ae9c..0000000000
>> --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
>> +++ /dev/null
>> @@ -1,23 +0,0 @@
>> -/* Copyright (C) 2013-2022 Free Software Foundation, Inc.
>> -   This file is part of the GNU C Library.
>> -
>> -   The GNU C Library is free software; you can redistribute it and/or
>> -   modify it under the terms of the GNU Lesser General Public
>> -   License as published by the Free Software Foundation; either
>> -   version 2.1 of the License, or (at your option) any later version.
>> -
>> -   The GNU C Library is distributed in the hope that it will be useful,
>> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> -   Lesser General Public License for more details.
>> -
>> -   You should have received a copy of the GNU Lesser General Public
>> -   License along with the GNU C Library; if not, see
>> -   <https://www.gnu.org/licenses/>.  */
>> -
>> -#define STRNCMP __strncmp_power4
>> -
>> -#undef libc_hidden_builtin_def
>> -#define libc_hidden_builtin_def(name)
>> -
>> -#include <sysdeps/powerpc/powerpc64/power4/strncmp.S>
>> diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c 
>> b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
>> index da3de73c27..7400ed3b9a 100644
>> --- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
>> +++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
>> @@ -26,7 +26,6 @@
>>   # include "init-arch.h"
>>
>>   extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
>> -extern __typeof (strncmp) __strncmp_power4 attribute_hidden;
>>   extern __typeof (strncmp) __strncmp_power7 attribute_hidden;
>>   extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
>>   # ifdef __LITTLE_ENDIAN__
>> @@ -46,7 +45,5 @@ libc_ifunc_redirected (__redirect_strncmp, strncmp,
>>                  ? __strncmp_power8
>>                  : (hwcap & PPC_FEATURE_ARCH_2_06)
>>                ? __strncmp_power7
>> -             : (hwcap & PPC_FEATURE_POWER4)
>> -               ? __strncmp_power4
>>                  : __strncmp_ppc);
> Minor nit, does the spacing of __strcncmp_ppc need updating here too?

Ack.
  
Paul E. Murphy Dec. 1, 2022, 8:26 p.m. UTC | #4
On 11/30/22 7:49 AM, Adhemerval Zanella Netto wrote:
> 
> 
> On 29/11/22 19:37, Paul E Murphy via Libc-alpha wrote:
>>
>>
>> On 11/29/22 10:02 AM, Rajalakshmi Srinivasaraghavan via Libc-alpha wrote:
>>> This patch removes the power4 strncmp optimization for powerpc64 and uses
>>> __strncmp_ppc implementation instead.  Currently, both power4 and ppc
>>> IFUNC variants result in similar performance.
>> What cpu was used to test performance? I'd be curious if this holds for something like the venerable ppc970.
> 
> 
> Do we really care, specially because I am not sure how it was evaluated on
> power4 chip?

On the merits of performance, I don't think this is sufficient 
justification.  We should be more detailed.

If the justification is tidying up, this patch is OK. That should be 
made explicit in the commit message.

It does make me wonder, when is it OK to drop cpu specific 
optimizations?  I suspect there aren't many running a modern BE ppc 
distro with P7 or older.
  

Patch

diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index 6f2436b660..ed25e234ba 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -12,8 +12,7 @@  sysdep_routines += memcpy-power8-cached memcpy-power7 memcpy-a2 memcpy-power6 \
 		   strnlen-power8 strnlen-power7 strnlen-ppc64 \
 		   strcasecmp-power7 strcasecmp_l-power7 \
 		   strncase-power7 strncase_l-power7 \
-		   strncmp-power8 strncmp-power7 \
-		   strncmp-power4 strncmp-ppc64 \
+		   strncmp-power8 strncmp-power7 strncmp-ppc64 \
 		   strchr-power8 strchr-power7 strchr-ppc64 \
 		   strchrnul-power8 strchrnul-power7 strchrnul-ppc64 \
 		   strcpy-power8 strcpy-power7 strcpy-ppc64 stpcpy-power8 \
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index 5a3c7a5886..238f784146 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -171,8 +171,6 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 			      __strncmp_power8)
 	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_ARCH_2_06,
 			      __strncmp_power7)
-	      IFUNC_IMPL_ADD (array, i, strncmp, hwcap & PPC_FEATURE_POWER4,
-			      __strncmp_power4)
 	      IFUNC_IMPL_ADD (array, i, strncmp, 1,
 			      __strncmp_ppc))
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S b/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
deleted file mode 100644
index df6ad6ae9c..0000000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp-power4.S
+++ /dev/null
@@ -1,23 +0,0 @@ 
-/* Copyright (C) 2013-2022 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#define STRNCMP __strncmp_power4
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#include <sysdeps/powerpc/powerpc64/power4/strncmp.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
index da3de73c27..7400ed3b9a 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncmp.c
@@ -26,7 +26,6 @@ 
 # include "init-arch.h"
 
 extern __typeof (strncmp) __strncmp_ppc attribute_hidden;
-extern __typeof (strncmp) __strncmp_power4 attribute_hidden;
 extern __typeof (strncmp) __strncmp_power7 attribute_hidden;
 extern __typeof (strncmp) __strncmp_power8 attribute_hidden;
 # ifdef __LITTLE_ENDIAN__
@@ -46,7 +45,5 @@  libc_ifunc_redirected (__redirect_strncmp, strncmp,
 		       ? __strncmp_power8
 		       : (hwcap & PPC_FEATURE_ARCH_2_06)
 			 ? __strncmp_power7
-			 : (hwcap & PPC_FEATURE_POWER4)
-			   ? __strncmp_power4
 			   : __strncmp_ppc);
 #endif
diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S
deleted file mode 100644
index 8c82518504..0000000000
--- a/sysdeps/powerpc/powerpc64/power4/strncmp.S
+++ /dev/null
@@ -1,225 +0,0 @@ 
-/* Optimized strcmp implementation for PowerPC64.
-   Copyright (C) 2003-2022 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#ifndef STRNCMP
-# define STRNCMP strncmp
-#endif
-
-/* See strlen.s for comments on how the end-of-string testing works.  */
-
-/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
-
-ENTRY_TOCLESS (STRNCMP, 4)
-	CALL_MCOUNT 3
-
-#define rTMP2	r0
-#define rRTN	r3
-#define rSTR1	r3	/* first string arg */
-#define rSTR2	r4	/* second string arg */
-#define rN	r5	/* max string length */
-#define rWORD1	r6	/* current word in s1 */
-#define rWORD2	r7	/* current word in s2 */
-#define rWORD3  r10
-#define rWORD4  r11
-#define rFEFE	r8	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
-#define r7F7F	r9	/* constant 0x7f7f7f7f7f7f7f7f */
-#define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f7f7f7f7f) */
-#define rBITDIF	r11	/* bits that differ in s1 & s2 words */
-#define rTMP	r12
-
-	dcbt	0,rSTR1
-	or	rTMP, rSTR2, rSTR1
-	lis	r7F7F, 0x7f7f
-	dcbt	0,rSTR2
-	clrldi.	rTMP, rTMP, 61
-	cmpldi	cr1, rN, 0
-	lis	rFEFE, -0x101
-	bne	L(unaligned)
-/* We are doubleword aligned so set up for two loops.  first a double word
-   loop, then fall into the byte loop if any residual.  */
-	srdi.	rTMP, rN, 3
-	clrldi	rN, rN, 61
-	addi	rFEFE, rFEFE, -0x101
-	addi	r7F7F, r7F7F, 0x7f7f
-	cmpldi	cr1, rN, 0
-	beq	L(unaligned)
-
-	mtctr	rTMP	/* Power4 wants mtctr 1st in dispatch group.  */
-	ld	rWORD1, 0(rSTR1)
-	ld	rWORD2, 0(rSTR2)
-	sldi	rTMP, rFEFE, 32
-	insrdi	r7F7F, r7F7F, 32, 0
-	add	rFEFE, rFEFE, rTMP
-	b	L(g1)
-
-L(g0):
-	ldu	rWORD1, 8(rSTR1)
-	bne-	cr1, L(different)
-	ldu	rWORD2, 8(rSTR2)
-L(g1):	add	rTMP, rFEFE, rWORD1
-	nor	rNEG, r7F7F, rWORD1
-	bdz	L(tail)
-	and.	rTMP, rTMP, rNEG
-	cmpd	cr1, rWORD1, rWORD2
-	beq+	L(g0)
-
-/* OK. We've hit the end of the string. We need to be careful that
-   we don't compare two strings as different because of gunk beyond
-   the end of the strings...  */
-
-#ifdef __LITTLE_ENDIAN__
-L(endstring):
-	addi    rTMP2, rTMP, -1
-	beq	cr1, L(equal)
-	andc    rTMP2, rTMP2, rTMP
-	rldimi	rTMP2, rTMP2, 1, 0
-	and	rWORD2, rWORD2, rTMP2	/* Mask off gunk.  */
-	and	rWORD1, rWORD1, rTMP2
-	cmpd	cr1, rWORD1, rWORD2
-	beq	cr1, L(equal)
-	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
-	neg	rNEG, rBITDIF
-	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
-	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
-	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
-	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
-	sld	rWORD2, rWORD2, rNEG
-	xor.	rBITDIF, rWORD1, rWORD2
-	sub	rRTN, rWORD1, rWORD2
-	blt-	L(highbit)
-	sradi	rRTN, rRTN, 63		/* must return an int.  */
-	ori	rRTN, rRTN, 1
-	blr
-L(equal):
-	li	rRTN, 0
-	blr
-
-L(different):
-	ld	rWORD1, -8(rSTR1)
-	xor	rBITDIF, rWORD1, rWORD2	/* rBITDIF has bits that differ.  */
-	neg	rNEG, rBITDIF
-	and	rNEG, rNEG, rBITDIF	/* rNEG has LS bit that differs.  */
-	cntlzd	rNEG, rNEG		/* bitcount of the bit.  */
-	andi.	rNEG, rNEG, 56		/* bitcount to LS byte that differs. */
-	sld	rWORD1, rWORD1, rNEG	/* shift left to clear MS bytes.  */
-	sld	rWORD2, rWORD2, rNEG
-	xor.	rBITDIF, rWORD1, rWORD2
-	sub	rRTN, rWORD1, rWORD2
-	blt-	L(highbit)
-	sradi	rRTN, rRTN, 63
-	ori	rRTN, rRTN, 1
-	blr
-L(highbit):
-	sradi	rRTN, rWORD2, 63
-	ori	rRTN, rRTN, 1
-	blr
-
-#else
-L(endstring):
-	and	rTMP, r7F7F, rWORD1
-	beq	cr1, L(equal)
-	add	rTMP, rTMP, r7F7F
-	xor.	rBITDIF, rWORD1, rWORD2
-	andc	rNEG, rNEG, rTMP
-	blt-	L(highbit)
-	cntlzd	rBITDIF, rBITDIF
-	cntlzd	rNEG, rNEG
-	addi	rNEG, rNEG, 7
-	cmpd	cr1, rNEG, rBITDIF
-	sub	rRTN, rWORD1, rWORD2
-	blt-	cr1, L(equal)
-	sradi	rRTN, rRTN, 63		/* must return an int.  */
-	ori	rRTN, rRTN, 1
-	blr
-L(equal):
-	li	rRTN, 0
-	blr
-
-L(different):
-	ld	rWORD1, -8(rSTR1)
-	xor.	rBITDIF, rWORD1, rWORD2
-	sub	rRTN, rWORD1, rWORD2
-	blt-	L(highbit)
-	sradi	rRTN, rRTN, 63
-	ori	rRTN, rRTN, 1
-	blr
-L(highbit):
-	sradi	rRTN, rWORD2, 63
-	ori	rRTN, rRTN, 1
-	blr
-#endif
-
-/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
-	.align 4
-L(tail):
-	and.	rTMP, rTMP, rNEG
-	cmpd	cr1, rWORD1, rWORD2
-	bne-	L(endstring)
-	addi	rSTR1, rSTR1, 8
-	bne-	cr1, L(different)
-	addi	rSTR2, rSTR2, 8
-	cmpldi	cr1, rN, 0
-L(unaligned):
-	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
-	ble	cr1, L(ux)
-L(uz):
-	lbz	rWORD1, 0(rSTR1)
-	lbz	rWORD2, 0(rSTR2)
-	.align 4
-L(u1):
-	cmpdi	cr1, rWORD1, 0
-	bdz	L(u4)
-	cmpd	rWORD1, rWORD2
-	beq-	cr1, L(u4)
-	bne-	L(u4)
-	lbzu    rWORD3, 1(rSTR1)
-	lbzu	rWORD4, 1(rSTR2)
-	cmpdi	cr1, rWORD3, 0
-	bdz	L(u3)
-	cmpd	rWORD3, rWORD4
-	beq-    cr1, L(u3)
-	bne-    L(u3)
-	lbzu	rWORD1, 1(rSTR1)
-	lbzu	rWORD2, 1(rSTR2)
-	cmpdi	cr1, rWORD1, 0
-	bdz	L(u4)
-	cmpd	rWORD1, rWORD2
-	beq-	cr1, L(u4)
-	bne-	L(u4)
-	lbzu	rWORD3, 1(rSTR1)
-	lbzu	rWORD4, 1(rSTR2)
-	cmpdi	cr1, rWORD3, 0
-	bdz	L(u3)
-	cmpd	rWORD3, rWORD4
-	beq-    cr1, L(u3)
-	bne-    L(u3)
-	lbzu	rWORD1, 1(rSTR1)
-	lbzu	rWORD2, 1(rSTR2)
-	b       L(u1)
-
-L(u3):  sub     rRTN, rWORD3, rWORD4
-	blr
-L(u4):	sub	rRTN, rWORD1, rWORD2
-	blr
-L(ux):
-	li	rRTN, 0
-	blr
-END (STRNCMP)
-libc_hidden_builtin_def (strncmp)