Improve generic strpbrk performance

Message ID AM3PR08MB00880A06F9FB2A2C1ADDDABD83F60@AM3PR08MB0088.eurprd08.prod.outlook.com
State Superseded
Headers

Commit Message

Wilco Dijkstra Jan. 8, 2016, 6:44 p.m. UTC
  Improve strpbrk performance using a much faster algorithm. It is kept simple
so it works well on most targets. It is generally at least 5 times faster than
the existing implementation on bench-strpbrk on a few AArch64 implementations,
and for some tests 40-100 times as fast. In fact the string/bits/string2.h
inlines make no longer sense, as GCC already returns NULL if accept is an empty
string, calls strchr if accept has size 1, while __strpbrk_c2 and __strpbrk_c3
are slower than the strpbrk main loop for large strings (though accept length
2-4 could be special cased in the future to gain even more performance).

Built and tested on AArch64. OK for GLIBC 2.23?

ChangeLog:
2016-01-08  Wilco Dijkstra  <wdijkstr@arm.com>

	* string/strpbrk.c (strpbrk): Rewrite function.
	* string/bits/string2.h (strpbrk): Use __builtin_strpbrk.
	(__strpbrk_c2) Remove inline function.
	(__strpbrk_c3) Likewise.
  

Patch

diff --git a/string/bits/string2.h b/string/bits/string2.h
index d0926f1e7898a13852081f45d54bff5145751695..3c7b7bbb4d1bccec8f149268e9f30978feb25354 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -269,53 +269,12 @@  __strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
 
 
 /* Find the first occurrence in S of any character in ACCEPT.  */
-#if !defined _HAVE_STRING_ARCH_strpbrk
-# ifndef _HAVE_STRING_ARCH_strpbrk
-#  if __GNUC_PREREQ (3, 2)
-#   define strpbrk(s, accept) \
-  __extension__								      \
-  ({ char __a0, __a1, __a2;						      \
-     (__builtin_constant_p (accept) && __string2_1bptr_p (accept)	      \
-      ? ((__builtin_constant_p (s) && __string2_1bptr_p (s))		      \
-	 ? __builtin_strpbrk (s, accept)				      \
-	 : ((__a0 = ((const char  *) (accept))[0], __a0 == '\0')	      \
-	    ? ((void) (s), (char *) NULL)				      \
-	    : ((__a1 = ((const char *) (accept))[1], __a1 == '\0')	      \
-	       ? __builtin_strchr (s, __a0)				      \
-	       : ((__a2 = ((const char *) (accept))[2], __a2 == '\0')	      \
-		  ? __strpbrk_c2 (s, __a0, __a1)			      \
-		  : (((const char *) (accept))[3] == '\0'		      \
-		     ? __strpbrk_c3 (s, __a0, __a1, __a2)		      \
-		     : __builtin_strpbrk (s, accept))))))		      \
-      : __builtin_strpbrk (s, accept)); })
-#  endif
+#ifndef _HAVE_STRING_ARCH_strpbrk
+# if __GNUC_PREREQ (3, 2)
+#  define strpbrk(s, accept) __builtin_strpbrk (s, accept)
 # endif
-
-__STRING_INLINE char *__strpbrk_c2 (const char *__s, int __accept1,
-				    int __accept2);
-__STRING_INLINE char *
-__strpbrk_c2 (const char *__s, int __accept1, int __accept2)
-{
-  /* Please note that __accept1 and __accept2 never can be '\0'.  */
-  while (*__s != '\0' && *__s != __accept1 && *__s != __accept2)
-    ++__s;
-  return *__s == '\0' ? NULL : (char *) (size_t) __s;
-}
-
-__STRING_INLINE char *__strpbrk_c3 (const char *__s, int __accept1,
-				    int __accept2, int __accept3);
-__STRING_INLINE char *
-__strpbrk_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
-{
-  /* Please note that __accept1 to __accept3 never can be '\0'.  */
-  while (*__s != '\0' && *__s != __accept1 && *__s != __accept2
-	 && *__s != __accept3)
-    ++__s;
-  return *__s == '\0' ? NULL : (char *) (size_t) __s;
-}
 #endif
 
-
 #if !defined _HAVE_STRING_ARCH_strtok_r
 # ifndef _HAVE_STRING_ARCH_strtok_r
 #  define __strtok_r(s, sep, nextp) \
diff --git a/string/strpbrk.c b/string/strpbrk.c
index 4f1d9b72bb80f5690e6b10bacc496ddf26fdb056..1db271308846f2c9727803e2db096119b3bf89c3 100644
--- a/string/strpbrk.c
+++ b/string/strpbrk.c
@@ -25,17 +25,47 @@ 
 
 /* Find the first occurrence in S of any character in ACCEPT.  */
 char *
-STRPBRK (const char *s, const char *accept)
+STRPBRK (const char *str, const char *accept)
 {
-  while (*s != '\0')
+  unsigned char table[256];
+  unsigned char *p, *s;
+  unsigned int c0, c1, c2, c3;
+
+  if (accept[0] == '\0')
+    return NULL;
+  if (accept[1] == '\0')
+    return strchr (str, accept[0]);
+
+  /* Use multiple small memsets to enable inlining on most targets.  */
+  p = memset (table, 0, 64);
+  memset (p + 64, 0, 64);
+  memset (p + 128, 0, 64);
+  memset (p + 192, 0, 64);
+
+  s = (unsigned char*) accept;
+  do
+    p[c0 = *s++] = 1;
+  while (c0);
+
+  s = (unsigned char*) str;
+  if (p[s[0]]) return s[0] == '\0' ? NULL : (char *)s;
+  if (p[s[1]]) return s[1] == '\0' ? NULL : (char *)s + 1;
+  if (p[s[2]]) return s[2] == '\0' ? NULL : (char *)s + 2;
+  if (p[s[3]]) return s[3] == '\0' ? NULL : (char *)s + 3;
+
+  s = (unsigned char *) ((size_t)s & ~3);
+
+  do
     {
-      const char *a = accept;
-      while (*a != '\0')
-	if (*a++ == *s)
-	  return (char *) s;
-      ++s;
+      s += 4;
+      c0 = p[s[0]];
+      c1 = p[s[1]];
+      c2 = p[s[2]];
+      c3 = p[s[3]];
     }
+  while ((c0 | c1 | c2 | c3) == 0);
 
-  return NULL;
+  s += (c0 | c1) != 0 ? 1 - c0 : 3 - c2;
+  return *s == '\0' ? NULL : (char *)s;
 }
 libc_hidden_builtin_def (strpbrk)