Improve strpbrk performance using a much faster algorithm. It is kept simple
so it works well on most targets. It is generally at least 5 times faster than
the existing implementation on bench-strpbrk on a few AArch64 implementations,
and for some tests 40-100 times as fast. In fact the string/bits/string2.h
inlines make no longer sense, as GCC already returns NULL if accept is an empty
string, calls strchr if accept has size 1, while __strpbrk_c2 and __strpbrk_c3
are slower than the strpbrk main loop for large strings (though accept length
2-4 could be special cased in the future to gain even more performance).
Built and tested on AArch64. OK for GLIBC 2.23?
ChangeLog:
2016-01-08 Wilco Dijkstra <wdijkstr@arm.com>
* string/strpbrk.c (strpbrk): Rewrite function.
* string/bits/string2.h (strpbrk): Use __builtin_strpbrk.
(__strpbrk_c2) Remove inline function.
(__strpbrk_c3) Likewise.
@@ -269,53 +269,12 @@ __strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
/* Find the first occurrence in S of any character in ACCEPT. */
-#if !defined _HAVE_STRING_ARCH_strpbrk
-# ifndef _HAVE_STRING_ARCH_strpbrk
-# if __GNUC_PREREQ (3, 2)
-# define strpbrk(s, accept) \
- __extension__ \
- ({ char __a0, __a1, __a2; \
- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \
- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \
- ? __builtin_strpbrk (s, accept) \
- : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \
- ? ((void) (s), (char *) NULL) \
- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \
- ? __builtin_strchr (s, __a0) \
- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \
- ? __strpbrk_c2 (s, __a0, __a1) \
- : (((const char *) (accept))[3] == '\0' \
- ? __strpbrk_c3 (s, __a0, __a1, __a2) \
- : __builtin_strpbrk (s, accept)))))) \
- : __builtin_strpbrk (s, accept)); })
-# endif
+#ifndef _HAVE_STRING_ARCH_strpbrk
+# if __GNUC_PREREQ (3, 2)
+# define strpbrk(s, accept) __builtin_strpbrk (s, accept)
# endif
-
-__STRING_INLINE char *__strpbrk_c2 (const char *__s, int __accept1,
- int __accept2);
-__STRING_INLINE char *
-__strpbrk_c2 (const char *__s, int __accept1, int __accept2)
-{
- /* Please note that __accept1 and __accept2 never can be '\0'. */
- while (*__s != '\0' && *__s != __accept1 && *__s != __accept2)
- ++__s;
- return *__s == '\0' ? NULL : (char *) (size_t) __s;
-}
-
-__STRING_INLINE char *__strpbrk_c3 (const char *__s, int __accept1,
- int __accept2, int __accept3);
-__STRING_INLINE char *
-__strpbrk_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
-{
- /* Please note that __accept1 to __accept3 never can be '\0'. */
- while (*__s != '\0' && *__s != __accept1 && *__s != __accept2
- && *__s != __accept3)
- ++__s;
- return *__s == '\0' ? NULL : (char *) (size_t) __s;
-}
#endif
-
#if !defined _HAVE_STRING_ARCH_strtok_r
# ifndef _HAVE_STRING_ARCH_strtok_r
# define __strtok_r(s, sep, nextp) \
@@ -25,17 +25,47 @@
/* Find the first occurrence in S of any character in ACCEPT. */
char *
-STRPBRK (const char *s, const char *accept)
+STRPBRK (const char *str, const char *accept)
{
- while (*s != '\0')
+ unsigned char table[256];
+ unsigned char *p, *s;
+ unsigned int c0, c1, c2, c3;
+
+ if (accept[0] == '\0')
+ return NULL;
+ if (accept[1] == '\0')
+ return strchr (str, accept[0]);
+
+ /* Use multiple small memsets to enable inlining on most targets. */
+ p = memset (table, 0, 64);
+ memset (p + 64, 0, 64);
+ memset (p + 128, 0, 64);
+ memset (p + 192, 0, 64);
+
+ s = (unsigned char*) accept;
+ do
+ p[c0 = *s++] = 1;
+ while (c0);
+
+ s = (unsigned char*) str;
+ if (p[s[0]]) return s[0] == '\0' ? NULL : (char *)s;
+ if (p[s[1]]) return s[1] == '\0' ? NULL : (char *)s + 1;
+ if (p[s[2]]) return s[2] == '\0' ? NULL : (char *)s + 2;
+ if (p[s[3]]) return s[3] == '\0' ? NULL : (char *)s + 3;
+
+ s = (unsigned char *) ((size_t)s & ~3);
+
+ do
{
- const char *a = accept;
- while (*a != '\0')
- if (*a++ == *s)
- return (char *) s;
- ++s;
+ s += 4;
+ c0 = p[s[0]];
+ c1 = p[s[1]];
+ c2 = p[s[2]];
+ c3 = p[s[3]];
}
+ while ((c0 | c1 | c2 | c3) == 0);
- return NULL;
+ s += (c0 | c1) != 0 ? 1 - c0 : 3 - c2;
+ return *s == '\0' ? NULL : (char *)s;
}
libc_hidden_builtin_def (strpbrk)