[4/4] Remove powerpc64 strspn, strcspn, and strpbrk implementation

Message ID 1459432863-20749-5-git-send-email-adhemerval.zanella@linaro.org
State Committed
Headers

Commit Message

Adhemerval Zanella March 31, 2016, 2:01 p.m. UTC
  This patch removes the powerpc64 optimized strspn, strcspn, and
strpbrk assembly implementation now that the default C one
implements the same strategy.  On internal glibc benchtests
current implementations shows similar performance with -O2.

Tested on powerpc64le (POWER8).

	* sysdeps/powerpc/powerpc64/strcspn.S: Remove file.
	* sysdeps/powerpc/powerpc64/strpbrk.S: Remove file.
	* sysdeps/powerpc/powerpc64/strspn.S: Remove file.
---
 ChangeLog                           |   4 +
 sysdeps/powerpc/powerpc64/strcspn.S | 127 -------------------------------
 sysdeps/powerpc/powerpc64/strpbrk.S | 135 ---------------------------------
 sysdeps/powerpc/powerpc64/strspn.S  | 144 ------------------------------------
 4 files changed, 4 insertions(+), 406 deletions(-)
 delete mode 100644 sysdeps/powerpc/powerpc64/strcspn.S
 delete mode 100644 sysdeps/powerpc/powerpc64/strpbrk.S
 delete mode 100644 sysdeps/powerpc/powerpc64/strspn.S
  

Patch

diff --git a/sysdeps/powerpc/powerpc64/strcspn.S b/sysdeps/powerpc/powerpc64/strcspn.S
deleted file mode 100644
index 31e619d..0000000
--- a/sysdeps/powerpc/powerpc64/strcspn.S
+++ /dev/null
@@ -1,127 +0,0 @@ 
-/* Optimized strcspn implementation for PowerPC64.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* size_t [r3] strcspn (const char [r4] *s, const char [r5] *reject)  */
-
-EALIGN (strcspn, 4, 0)
-	CALL_MCOUNT 3
-
-	/* The idea to speed up the algorithm is to create a lookup table
-	   for fast check if input character should be considered.  For ASCII
-	   or ISO-8859-X character sets it has 256 positions.  */
-
-	/* PPC64 ELF ABI stack is aligned to 16 bytes.  */
-	addi 	r9,r1,-256
-	/* Clear the table with 0 values  */
-	li	r6, 0
-	li	r8, 4
-	mtctr	r8
-	mr	r10, r9
-	.align 	4
-L(zerohash):
-	std	r6, 0(r10)
-	std	r6, 8(r10)
-	std	r6, 16(r10)
-	std	r6, 24(r10)
-	std	r6, 32(r10)
-	std	r6, 40(r10)
-	std	r6, 48(r10)
-	std	r6, 56(r10)
-	addi	r10, r10, 64
-	bdnz	L(zerohash)
-
-	lbz	r10,0(r4)
-	cmpdi	cr7,r10,0	/* reject[0] == '\0' ?  */
-	li	r8,1
-	beq     cr7,L(finish_table)  /* If reject[0] == '\0' skip  */
-
-	/* Initialize the table as:
-	   for (i=0; reject[i]; i++
-	     table[reject[i]]] = 1  */
-	.align	4
-L(init_table):
-	stbx	r8,r9,r10
-	lbzu	r10,1(r4)
-	cmpdi	cr7,r10,0           /* If reject[0] == '\0' finish  */
-	bne	cr7,L(init_table)
-L(finish_table):
-	/* set table[0] = 1  */
-	li 	r10,1
-	stb	r10,0(r9)
-	li	r10,0
-	b	L(mainloop)
-
-	/* Unrool the loop 4 times and check using the table as:
-	   i = 0;
-	   while (1)
-	     {
-	       if (table[input[i++]] == 1)
-	         return i - 1;
-	       if (table[input[i++]] == 1)
-	         return i - 1;
-	       if (table[input[i++]] == 1)
-	         return i - 1;
-	       if (table[input[i++]] == 1)
-	         return i - 1;
-	     }  */
-	.align 4
-L(unroll):
-	lbz	r8,1(r3)
-	addi	r10,r10,4
-	lbzx	r8,r9,r8
-	cmpwi	r7,r8,1
-	beq	cr7,L(end)
-	lbz	r8,2(r3)
-	addi	r3,r3,4
-	lbzx	r8,r9,r8
-	cmpwi	cr7,r8,1
-	beq	cr7,L(end2)
-	lbz	r8,3(r7)
-	lbzx	r8,r9,r8
-	cmpwi	cr7,r8,1
-	beq	cr7,L(end3)
-L(mainloop):
-	lbz	r8,0(r3)
-	mr	r7,r3
-	addi	r6,r10,1
-	addi	r4,r10,2
-	addi	r5,r10,3
-	lbzx	r8,r9,8
-	cmpwi	cr7,r8,1
-	bne	cr7,L(unroll)
-	mr	r3,r10
-	blr
-
-	.align 4
-L(end):
-	mr	r3,r6
-	blr
-
-	.align 4
-L(end2):
-	mr	r3,r4
-	blr
-
-	.align 4
-L(end3):
-	mr	r3,r5
-	blr
-END (strcspn)
-libc_hidden_builtin_def (strcspn)
diff --git a/sysdeps/powerpc/powerpc64/strpbrk.S b/sysdeps/powerpc/powerpc64/strpbrk.S
deleted file mode 100644
index 5e9d1a6..0000000
--- a/sysdeps/powerpc/powerpc64/strpbrk.S
+++ /dev/null
@@ -1,135 +0,0 @@ 
-/* Optimized strpbrk implementation for PowerPC64.
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-/* char [r3] *strpbrk(const char [r4] *s, const char [r5] *accept)  */
-
-EALIGN (strpbrk, 4, 0)
-	CALL_MCOUNT 3
-
-	lbz	r10,0(r4)
-	cmpdi	cr7,r10,0	/* accept[0] == '\0' ?  */
-	beq	cr7,L(nullfound)
-
-	/* The idea to speed up the algorithm is to create a lookup table
-	   for fast check if input character should be considered.  For ASCII
-	   or ISO-8859-X character sets it has 256 positions.  */
-
-	/* PPC64 ELF ABI stack is aligned to 16 bytes.  */
-	addi 	r9,r1,-256
-	/* Clear the table with 0 values  */
-	li	r6, 0
-	li	r7, 4
-	mtctr	r7
-	mr	r8, r9
-	.align 	4
-L(zerohash):
-	std	r6, 0(r8)
-	std	r6, 8(r8)
-	std	r6, 16(r8)
-	std	r6, 24(r8)
-	std	r6, 32(r8)
-	std	r6, 40(r8)
-	std	r6, 48(r8)
-	std	r6, 56(r8)
-	addi	r8, r8, 64
-	bdnz	L(zerohash)
-
-	/* Initialize the table as:
-	   for (i=0; accept[i]; i++
-	     table[accept[i]]] = 1  */
-	li      r0,1
-	.align 4
-L(init_table):
-	stbx	r0,r9,r10
-	lbzu	r10,1(r4)
-	cmpdi	r0,r10,0
-	bne	cr0,L(init_table)
-L(finish_table):
-	/* set table[0] = 1  */
-	li	r4,1
-	stb	r4,0(r9)
-	b	L(mainloop)
-
-	/* Unrool the loop 4 times and check using the table as:
-	   i = 0;
-	   while (1)
-	     {
-	       if (table[input[i++]] == 1)
-	         return (s[i -1] ? s + i - 1: NULL);
-	       if (table[input[i++]] == 1)
-	         return (s[i -1] ? s + i - 1: NULL);
-	       if (table[input[i++]] == 1)
-	         return (s[i -1] ? s + i - 1: NULL);
-	       if (table[input[i++]] == 1)
-	         return (s[i -1] ? s + i - 1: NULL);
-	     }  */
-	.align 4
-L(unroll):
-	lbz	r0,1(r3)
-	lbzx	r8,r9,r0
-	cmpwi	cr6,r8,1
-	beq	cr6,L(checkend2)
-	lbz	r10,2(r3)
-	lbzx	r4,r9,r10
-	cmpwi	cr7,r4,1
-	beq	cr7,L(checkend3)
-	lbz	r12,3(r3)
-	addi	r3,r3,4
-	lbzx	r11,r9,r12
-	cmpwi	cr0,r11,1
-	beq	cr0,L(checkend)
-L(mainloop):
-	lbz	r12,0(r3)
-	addi	r11,r3,1
-	addi	r5,r3,2
-	addi	r7,r3,3
-	lbzx	r6,r9,r12
-	cmpwi	cr1,r6,1
-	bne	cr1,L(unroll)
-	cmpdi	cr0,r12,0
-	beq	cr0,L(nullfound)
-L(end):
-	blr
-
-	.align 4
-L(checkend):
-	cmpdi	cr1,r12,0
-	mr	r3,r7
-	bne	cr1,L(end)
-L(nullfound):
-	/* return NULL  */
-	li 3,0
-	blr
-
-	.align 4
-L(checkend2):
-	cmpdi	cr7,r0,0
-	mr	r3,r11
-	beq	cr7,L(nullfound)
-	blr
-
-	.align 4
-L(checkend3):
-	cmpdi	cr6,r10,0
-	mr	r3,r5
-	beq	cr6,L(nullfound)
-	blr
-END (strpbrk)
-libc_hidden_builtin_def (strpbrk)
diff --git a/sysdeps/powerpc/powerpc64/strspn.S b/sysdeps/powerpc/powerpc64/strspn.S
deleted file mode 100644
index cf10da1..0000000
--- a/sysdeps/powerpc/powerpc64/strspn.S
+++ /dev/null
@@ -1,144 +0,0 @@ 
-/* Optimized strspn implementation for PowerPC64.
-
-   Copyright (C) 2014-2016 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* size_t [r3] strspn (const char *string [r3],
-                       const char *needleAccept [r4]  */
-
-/* Performance gains are grabbed through following techniques:
-
-   > hashing of needle.
-   > hashing avoids scanning of duplicate entries in needle
-     across the string.
-   > unrolling when scanning for character in string
-     across hash table.  */
-
-/* Algorithm is as below:
-   1. A empty hash table/dictionary is created comprising of
-      256 ascii character set
-   2. When hash entry is found in needle , the hash index
-      is initialized to 1
-   3. The string is scanned until end and for every character,
-      its corresponding hash index is compared.
-   4. initial length of string (count) until first hit of
-      accept needle to be found is set to 0
-   4. If hash index is set to 1 for the index of string,
-      count is returned.
-   5. Otherwise count is incremented and scanning continues
-      until end of string.  */
-
-#include <sysdep.h>
-
-EALIGN(strspn, 4, 0)
-	CALL_MCOUNT 3
-
-	/* PPC64 ELF ABI stack is aligned to 16 bytes.  */
-	addi 	r9,r1,-256
-	/* Clear the table with 0 values  */
-	li	r6, 0
-	li	r8, 4
-	mtctr	r8
-	mr	r10, r9
-	.align 	4
-L(zerohash):
-	std	r6, 0(r10)
-	std	r6, 8(r10)
-	std	r6, 16(r10)
-	std	r6, 24(r10)
-	std	r6, 32(r10)
-	std	r6, 40(r10)
-	std	r6, 48(r10)
-	std	r6, 56(r10)
-	addi	r10, r10, 64
-	bdnz	L(zerohash)
-
-	lbz	r10,0(r4)
-	li r8, 1		/* r8=1, marker into hash if found in
-				   needle  */
-	cmpdi cr7, r10, 0	/* accept needle is NULL  */
-	beq cr7, L(skipHashing)	/* if needle is NULL, skip hashing  */
-
-	.align 4		/* align section to 16 byte boundary  */
-L(hashing):
-	stbx r8, r9, r10	/* update hash with marker for the pivot of
-				   the needle  */
-	lbzu r10, 1(r4)		/* load needle into r10 and update to next  */
-	cmpdi cr7, r10, 0	/* if needle is has reached NULL, continue  */
-	bne cr7, L(hashing)	/* loop to hash the needle  */
-
-L(skipHashing):
-	li r10, 0		/* load counter = 0  */
-	b L(beginScan)
-
-	.align 4		/* align section to 16 byte boundary  */
-L(scanUnroll):
-	lbzx r8, r9, r8		/* load r8 with hash value at index  */
-	cmpwi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	beq cr7, L(ret1stIndex)	/* we have hit accept needle, return the
-				   count  */
-
-	lbz r8, 1(r3)		/* load string[1] into r8  */
-	addi r10, r10, 4	/* increment counter  */
-	lbzx r8, r9, r8		/* load r8 with hash value at index  */
-	cmpwi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	beq cr7, L(ret2ndIndex)	/* we have hit accept needle, return the
-				   count  */
-
-	lbz r8, 2(r3)		/* load string[2] into r8  */
-	lbzx r8, r9, r8		/* load r8 with hash value at index  */
-	cmpwi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	beq cr7, L(ret3rdIndex)	/* we have hit accept needle, return the
-				   count  */
-
-	lbz r8, 3(r3)		/* load string[3] into r8  */
-	lbzx r8, r9, r8		/* load r8 with hash value at index  */
-	addi r3, r3, 4		/* unroll factor , increment string by 4  */
-	cmpwi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	beq cr7,L(ret4thIndex)	/* we have hit accept needle, return the
-				   count  */
-
-L(beginScan):
-	lbz r8, 0(r3)		/* load string[0] into r8  */
-	addi r6, r10, 1		/* place holder for counter + 1  */
-	addi r5, r10, 2		/* place holder for counter + 2  */
-	addi r4, r10, 3		/* place holder for counter + 3  */
-	cmpdi cr7, r8, 0	/* if we hit marker in hash, we have found
-				   accept needle  */
-	bne cr7, L(scanUnroll)	/* continue scanning  */
-
-L(ret1stIndex):
-	mr r3, r10		/* update r3 for return  */
-	blr			/* return  */
-
-L(ret2ndIndex):
-	mr r3, r6		/* update r3 for return  */
-	blr			/* return  */
-
-L(ret3rdIndex):
-	mr r3, r5		/* update r3 for return  */
-	blr			/* return  */
-
-L(ret4thIndex):
-	mr r3, r4		/* update r3 for return  */
-	blr			/* done  */
-END(strspn)
-libc_hidden_builtin_def (strspn)