powerpc: Handle worstcase behavior in strstr() for POWER7

Message ID 1439917856-27196-1-git-send-email-raji@linux.vnet.ibm.com
State Committed
Delegated to: Tulio Magno Quites Machado Filho
Headers

Commit Message

Rajalakshmi S Aug. 18, 2015, 5:10 p.m. UTC
  Instead of checking needle length, constant 'n' number of comparisons
is checked to fall back to default implementation.  This patch is tested
on powerpc64 and powerpc64le.

2015-08-18  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.
---
 sysdeps/powerpc/powerpc64/power7/strstr.S | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)
  

Comments

Steven Munroe Aug. 18, 2015, 7:36 p.m. UTC | #1
On Tue, 2015-08-18 at 22:40 +0530, Rajalakshmi Srinivasaraghavan wrote:
> Instead of checking needle length, constant 'n' number of comparisons
> is checked to fall back to default implementation.  This patch is tested
> on powerpc64 and powerpc64le.
> 
> 2015-08-18  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
> 
> 	* sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.
> ---
LGTM
  
Tulio Magno Quites Machado Filho Aug. 25, 2015, 4:45 p.m. UTC | #2
Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com> writes:

> Instead of checking needle length, constant 'n' number of comparisons
> is checked to fall back to default implementation.  This patch is tested
> on powerpc64 and powerpc64le.
>
> 2015-08-18  Rajalakshmi Srinivasaraghavan  <raji@linux.vnet.ibm.com>
>
> 	* sysdeps/powerpc/powerpc64/power7/strstr.S: Handle worst case.

LGTM too.

I'm pushing it.

Thanks!
  

Patch

diff --git a/sysdeps/powerpc/powerpc64/power7/strstr.S b/sysdeps/powerpc/powerpc64/power7/strstr.S
index bfb0c49..fb3c810 100644
--- a/sysdeps/powerpc/powerpc64/power7/strstr.S
+++ b/sysdeps/powerpc/powerpc64/power7/strstr.S
@@ -23,6 +23,8 @@ 
 /* The performance gain is obtained using aligned memory access, load
  * doubleword and usage of cmpb instruction for quicker comparison.  */
 
+#define ITERATIONS	64
+
 #ifndef STRLEN
 /* For builds with no IFUNC support, local calls should be made to internal
    GLIBC symbol (created by libc_hidden_builtin_def).  */
@@ -62,6 +64,8 @@  EALIGN (strstr, 4, 0)
 	cfi_offset(r30, -16)
 	std	r29, -24(r1)		/* Save callers register r29.  */
 	cfi_offset(r29, -24)
+	std	r28, -32(r1)		/* Save callers register r28.  */
+	cfi_offset(r28, -32)
 	std	r0, 16(r1)		/* Store the link register.  */
 	cfi_offset(lr, 16)
 	stdu	r1, -FRAMESIZE(r1)	/* Create the stack frame.  */
@@ -69,7 +73,6 @@  EALIGN (strstr, 4, 0)
 
 	dcbt	0, r3
 	dcbt	0, r4
-
 	cmpdi	cr7, r3, 0
 	beq	cr7, L(retnull)
 	cmpdi	cr7, r4, 0
@@ -84,10 +87,6 @@  EALIGN (strstr, 4, 0)
 	cmpdi	cr7, r3, 0	/* If search str is null.  */
 	beq	cr7, L(ret_r3)
 
-	/* Call __strstr_ppc if needle len > 2048 */
-	cmpdi	cr7, r3, 2048
-	bgt	cr7, L(default)
-
 	mr	r31, r3
 	mr	r4, r3
 	mr	r3, r29
@@ -105,7 +104,8 @@  EALIGN (strstr, 4, 0)
 	/* If first char of search str is not present.  */
 	cmpdi	cr7, r3, 0
 	ble	cr7, L(end)
-
+	/* Reg r28 is used to count the number of iterations. */
+	li	r28, 0
 	rldicl	r8, r3, 0, 52	/* Page cross check.  */
 	cmpldi	cr7, r8, 4096-16
 	bgt	cr7, L(bytebybyte)
@@ -324,6 +324,10 @@  L(return4):
 	.align	4
 L(begin):
 	mr	r3, r8
+	/* When our iterations exceed ITERATIONS,fall back to default. */
+	addi	r28, r28, 1
+	cmpdi	cr7, r28, ITERATIONS
+	beq	cr7, L(default)
 	lbz	r4, 0(r30)
 	bl	STRCHR
 	nop
@@ -423,6 +427,10 @@  L(nextbyte):
 	cmpdi	cr7, r9, -1
 	beq	cr7, L(end)
 	addi	r3, r4, 1
+	/* When our iterations exceed ITERATIONS,fall back to default. */
+	addi	r28, r28, 1
+	cmpdi	cr7, r28, ITERATIONS
+	beq	cr7, L(default)
 	lbz	r4, 0(r30)
 	bl	STRCHR
 	nop
@@ -490,7 +498,6 @@  L(retnull):
 
 	.align	4
 L(default):
-	mr	r3, r29
 	mr	r4, r30
 	bl	__strstr_ppc
 	nop
@@ -500,6 +507,7 @@  L(end):
 	addi	r1, r1, FRAMESIZE	/* Restore stack pointer.  */
 	cfi_adjust_cfa_offset(-FRAMESIZE)
 	ld	r0, 16(r1)	/* Restore the saved link register.  */
+	ld	r28, -32(r1)	/* Restore callers save register r28.  */
 	ld	r29, -24(r1)	/* Restore callers save register r29.  */
 	ld	r30, -16(r1)	/* Restore callers save register r30.  */
 	ld	r31, -8(r1)	/* Restore callers save register r31.  */