PowerPC: optimized strcspn for POWER7

Message ID 531B8923.3010907@linux.vnet.ibm.com
State Committed
Delegated to: Adhemerval Zanella Netto
Headers

Commit Message

Adhemerval Zanella Netto March 8, 2014, 9:18 p.m. UTC
  I am resending this patch to keep track using the patchwork and with
some corrections (string/strpbrk.c cleanup and the no need to align
stack, since stack is already aligned to 16 bytes in PPC64 abi).

This patch add a optimized strcspn for POWER7 by using a different
algorithm than default implementation: it constructs a table based on
the 'accept' argument and use this table to check for any occurance
on the input string. The idea is similar as x86_64 uses.
For PowerPC some tunings were added, such as unroll loops and align
stack memory to table to 16 bytes (so VSX clean can ran without
alignment issues).

--

2014-03-08  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

        * string/strcspn.c (strcspn): Using macro to redefine symbol name.
        * sysdeps/powerpc/powerpc64/multiarch/Makefile: Add strcspn-power7
	and strcspn-ppc64 objects.
	* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
	(__libc_ifunc_impl_list): Add new strcspn optimized symbols.
	* sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S: New file:
	multiarch strcspn for POWER7.
	* sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c: New file:
	multiarch strcspn for PPC64.
	* sysdeps/powerpc/powerpc64/multiarch/strcspn.c: New file: strcspn
	ifunc selector.
	* sysdeps/powerpc/powerpc64/power7/strcspn.S: New file: optimited
	strcspn for POWER7.

---
  

Comments

Steven Munroe March 20, 2014, 4:04 p.m. UTC | #1
On Sat, 2014-03-08 at 18:18 -0300, Adhemerval Zanella wrote:
> I am resending this patch to keep track using the patchwork and with
> some corrections (string/strpbrk.c cleanup and the no need to align
> stack, since stack is already aligned to 16 bytes in PPC64 abi).
> 
> This patch add a optimized strcspn for POWER7 by using a different
> algorithm than default implementation: it constructs a table based on
> the 'accept' argument and use this table to check for any occurance
> on the input string. The idea is similar as x86_64 uses.
> For PowerPC some tunings were added, such as unroll loops and align
> stack memory to table to 16 bytes (so VSX clean can ran without
> alignment issues).
> 
> --
> 
> 2014-03-08  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>
> 
>         * string/strcspn.c (strcspn): Using macro to redefine symbol name.
>         * sysdeps/powerpc/powerpc64/multiarch/Makefile: Add strcspn-power7
> 	and strcspn-ppc64 objects.
> 	* sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
> 	(__libc_ifunc_impl_list): Add new strcspn optimized symbols.
> 	* sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S: New file:
> 	multiarch strcspn for POWER7.
> 	* sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c: New file:
> 	multiarch strcspn for PPC64.
> 	* sysdeps/powerpc/powerpc64/multiarch/strcspn.c: New file: strcspn
> 	ifunc selector.
> 	* sysdeps/powerpc/powerpc64/power7/strcspn.S: New file: optimited
> 	strcspn for POWER7.
> 
> ---

Looks good.
  

Patch

diff --git a/string/strcspn.c b/string/strcspn.c
index 7c39f79..4316205 100644
--- a/string/strcspn.c
+++ b/string/strcspn.c
@@ -15,27 +15,18 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#if HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#if defined _LIBC || HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-# ifndef strchr
-#  define strchr index
-# endif
-#endif
+#include <string.h>
 
 #undef strcspn
 
+#ifndef STRCSPN
+# define STRCSPN strcspn
+#endif
+
 /* Return the length of the maximum initial segment of S
    which contains no characters from REJECT.  */
 size_t
-strcspn (s, reject)
-     const char *s;
-     const char *reject;
+STRCSPN (const char *s, const char *reject)
 {
   size_t count = 0;
 
diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile
index a93a737..1cf963b 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/Makefile
+++ b/sysdeps/powerpc/powerpc64/multiarch/Makefile
@@ -14,7 +14,8 @@  sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \
 		   wcsrchr-ppc64 wcscpy-power7 wcscpy-power6 wcscpy-ppc64 \
 		   wordcopy-power7 wordcopy-power6 wordcopy-ppc64 \
 		   strcpy-power7 strcpy-ppc64 stpcpy-power7 stpcpy-ppc64 \
-		   strrchr-power7 strrchr-ppc64 strpbrk-power7 strpbrk-ppc64
+		   strrchr-power7 strrchr-ppc64 strpbrk-power7 strpbrk-ppc64 \
+		   strcspn-power7 strcspn-ppc64
 
 CFLAGS-strncase-power7.c += -mcpu=power7 -funroll-loops
 CFLAGS-strncase_l-power7.c += -mcpu=power7 -funroll-loops
diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
index cfba0af..5635416 100644
--- a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c
@@ -254,5 +254,13 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, strpbrk, 1,
 			     __strpbrk_ppc))
 
+  /* Support sysdeps/powerpc/powerpc64/multiarch/strcspn.c.  */
+  IFUNC_IMPL (i, name, strcspn,
+	      IFUNC_IMPL_ADD (array, i, strcspn,
+			      hwcap & PPC_FEATURE_HAS_VSX,
+			      __strcspn_power7)
+	      IFUNC_IMPL_ADD (array, i, strcspn, 1,
+			     __strcspn_ppc))
+
   return i;
 }
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S
new file mode 100644
index 0000000..02ffcc8
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn-power7.S
@@ -0,0 +1,40 @@ 
+/* Optimized strcspn implementation for POWER7.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#undef EALIGN
+#define EALIGN(name, alignt, words)				\
+  .section ".text";						\
+  ENTRY_2(__strcspn_power7)					\
+  .align ALIGNARG(alignt);					\
+  EALIGN_W_##words;						\
+  BODY_LABEL(__strcspn_power7):					\
+  cfi_startproc;						\
+  LOCALENTRY(__strcspn_power7)
+
+#undef END
+#define END(name)						\
+  cfi_endproc;							\
+  TRACEBACK(__strcspn_power7)					\
+  END_2(__strcspn_power7)
+
+#undef libc_hidden_builtin_def
+#define libc_hidden_builtin_def(name)
+
+#include <sysdeps/powerpc/powerpc64/power7/strcspn.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c
new file mode 100644
index 0000000..5f8b610
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn-ppc64.c
@@ -0,0 +1,30 @@ 
+/* Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+#define STRCSPN __strcspn_ppc
+#ifdef SHARED
+
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
+  __hidden_ver1 (__strcspn_ppc, __GI_strcspn, __strcspn_ppc);
+#endif
+
+extern __typeof (strcspn) __strcspn_ppc attribute_hidden;
+
+#include <string/strcspn.c>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcspn.c b/sysdeps/powerpc/powerpc64/multiarch/strcspn.c
new file mode 100644
index 0000000..3609d93
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strcspn.c
@@ -0,0 +1,31 @@ 
+/* Multiple versions of strcspn. PowerPC64 version.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef NOT_IN_libc
+# include <string.h>
+# include <shlib-compat.h>
+# include "init-arch.h"
+
+extern __typeof (strcspn) __strcspn_ppc attribute_hidden;
+extern __typeof (strcspn) __strcspn_power7 attribute_hidden;
+
+libc_ifunc (strcspn,
+	    (hwcap & PPC_FEATURE_HAS_VSX)
+	    ? __strcspn_power7
+	    : __strcspn_ppc);
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/strcspn.S b/sysdeps/powerpc/powerpc64/power7/strcspn.S
new file mode 100644
index 0000000..3f6aa0a
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/strcspn.S
@@ -0,0 +1,139 @@ 
+/* Optimized strcspn implementation for PowerPC64.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+/* size_t [r3] strcspn (const char [r4] *s, const char [r5] *reject)  */
+
+	.machine power7
+EALIGN (strcspn, 4, 0)
+	CALL_MCOUNT 3
+
+	/* The idea to speed up the algorithm is to create a lookup table
+	   for fast check if input character should be considered.  For ASCII
+	   or ISO-8859-X character sets it has 256 positions.  */
+	lbz	r10,0(r4)
+
+	/* First the table should be cleared and to avoid unaligned accesses
+	   when using the VSX stores the table address is aligned to 16
+	   bytes.  */
+	xxlxor	v0,v0,v0
+
+	/* PPC64 ELF ABI stack is aligned to 16 bytes.  */
+	addi 	r9,r1,-256
+
+	li	r8,48
+	li	r5,16
+	li	r6,32
+	cmpdi	cr7,r10,0	/* reject[0] == '\0' ?  */
+	addi	r12,r9,64
+	/* Clear the table with 0 values  */
+	stxvw4x	v0,r0,r9
+	addi	r11,r9,128
+	addi	r7,r9,192
+	stxvw4x v0,r9,r5
+	stxvw4x v0,r9,r6
+	stxvw4x v0,r9,r8
+	stxvw4x v0,r0,r12
+	stxvw4x v0,r12,r5
+	stxvw4x v0,r12,r6
+	stxvw4x v0,r12,r8
+	stxvw4x v0,r0,r11
+	stxvw4x v0,r11,r5
+	stxvw4x v0,r11,r6
+	stxvw4x v0,r11,r8
+	stxvw4x v0,r0,r7
+	stxvw4x v0,r7,r5
+	stxvw4x v0,r7,r6
+	stxvw4x v0,r7,r8
+	li	r8,1
+	beq     cr7,L(finish_table)  /* If reject[0] == '\0' skip  */
+
+	/* Initialize the table as:
+	   for (i=0; reject[i]; i++
+	     table[reject[i]]] = 1  */
+	.p2align 4,,15
+L(init_table):
+	stbx	r8,r9,r10
+	lbzu	r10,1(r4)
+	cmpdi	cr7,r10,0           /* If reject[0] == '\0' finish  */
+	bne	cr7,L(init_table)
+L(finish_table):
+	/* set table[0] = 1  */
+	li 	r10,1
+	stb	r10,0(r9)
+	li	r10,0
+	b	L(mainloop)
+
+	/* Unrool the loop 4 times and check using the table as:
+	   i = 0;
+	   while (1)
+	     {
+	       if (table[input[i++]] == 1)
+	         return i - 1;
+	       if (table[input[i++]] == 1)
+	         return i - 1;
+	       if (table[input[i++]] == 1)
+	         return i - 1;
+	       if (table[input[i++]] == 1)
+	         return i - 1;
+	     }  */
+	.p2align 4,,15
+L(unroll):
+	lbz	r8,1(r3)
+	addi	r10,r10,4
+	lbzx	r8,r9,r8
+	cmpwi	r7,r8,1
+	beq	cr7,L(end)
+	lbz	r8,2(r3)
+	addi	r3,r3,4
+	lbzx	r8,r9,r8
+	cmpwi	cr7,r8,1
+	beq	cr7,L(end2)
+	lbz	r8,3(r7)
+	lbzx	r8,r9,r8
+	cmpwi	cr7,r8,1
+	beq	cr7,L(end3)
+L(mainloop):
+	lbz	r8,0(r3)
+	mr	r7,r3
+	addi	r6,r10,1
+	addi	r4,r10,2
+	addi	r5,r10,3
+	lbzx	r8,r9,8
+	cmpwi	cr7,r8,1
+	bne	cr7,L(unroll)
+	mr	r3,r10
+	blr
+
+	.p2align 4,,15
+L(end):
+	mr	r3,r6
+	blr
+
+	.p2align 4,,15
+L(end2):
+	mr	r3,r4
+	blr
+
+	.p2align 4,,15
+L(end3):
+	mr	r3,r5
+	blr
+END (strcspn)
+libc_hidden_builtin_def (strcspn)