From patchwork Thu Mar 31 14:01:00 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella X-Patchwork-Id: 11579 Received: (qmail 117453 invoked by alias); 31 Mar 2016 14:01:31 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 117349 invoked by uid 89); 31 Mar 2016 14:01:30 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=REJECT X-HELO: mail-yw0-f182.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=0MRkMl3zTuzKguUwEcn8JxlbYLjqlhdtBZ2UPGRj3oU=; b=BZiT5FuXZlCAn5plRPHIcqxcnsUv3GzRH7rC1oGT7ZxXG/CODLQZHUlHodBXcy0rDe xMph+7ciN8OB6TzaOxhXttJsgcxVhyYGJYpwKSyaXaI7Svxb8v/UeNljn/rrtSBYfFqa /qPn0wp8bVlOtNNcq/Fq5QWlANuRYPvKmk1LssFSaINzCzfh62t7ZJ8llERZSid1d3Gv 23D4j/NXW4lMCpK4tHMwdQb7tGvkmW77ISrdB1Ot5W7s6nF/4AYE6Sv0+tHLRNY4lruT Go5vvdVOQtt5xMwdr3bW3IyKZGMZWzJ2hvTDRFHuQij159IY0zw5PyBzUeDvg+9Ki0KR FdEA== X-Gm-Message-State: AD7BkJL/BMbF3xe2u5wUdYPqlA+VU8yO1+QRgYc+9IobxrwrxVfHWxv7u3Kqwu9cwtihPkvy X-Received: by 10.37.59.140 with SMTP id i134mr4538182yba.35.1459432871566; Thu, 31 Mar 2016 07:01:11 -0700 (PDT) From: Adhemerval Zanella To: libc-alpha@sourceware.org Cc: Wilco Dijkstra Subject: [PATCH 1/4] Improve generic strcspn performance Date: Thu, 31 Mar 2016 11:01:00 -0300 Message-Id: <1459432863-20749-2-git-send-email-adhemerval.zanella@linaro.org> In-Reply-To: <1459432863-20749-1-git-send-email-adhemerval.zanella@linaro.org> References: <1459432863-20749-1-git-send-email-adhemerval.zanella@linaro.org> From: Wilco Dijkstra Improve strcspn performance using a much faster algorithm. It is kept simple so it works well on most targets. It is generally at least 10 times faster than the existing implementation on bench-strcspn on a few AArch64 implementations, and for some tests 100 times as fast (repeatedly calling strchr on a small string is extremely slow...). In fact the string/bits/string2.h inlines make no longer sense, as GCC already uses strlen if reject is an empty string, strchrnul is 5 times as fast as __strcspn_c1, while __strcspn_c2 and __strcspn_c3 are slower than the strcspn main loop for large strings (though reject length 2-4 could be special cased in the future to gain even more performance). Tested on x86_64, i686, and aarch64. * string/Version (libc): Add GLIBC_2.24. * string/strcspn.c (strcspn): Rewrite function. * string/bits/string2.h (strcspn): Use __builtin_strcspn. (__strcspn_c1): Remove inline function. (__strcspn_c2): Likewise. (__strcspn_c3): Likewise. * string/string-inline.c [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c1): Add compatibility symbol. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c2): Likewise. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c3): Likewise. --- ChangeLog | 17 ++++++++++ string/Versions | 2 ++ string/bits/string2.h | 73 ++----------------------------------------- string/strcspn.c | 44 +++++++++++++++++++++----- string/string-inlines.c | 40 ++++++++++++++++++++++++ sysdeps/i386/string-inlines.c | 19 +---------- 6 files changed, 99 insertions(+), 96 deletions(-) diff --git a/string/Versions b/string/Versions index 59bf35a..475c1fd 100644 --- a/string/Versions +++ b/string/Versions @@ -80,4 +80,6 @@ libc { GLIBC_2.6 { strerror_l; } + GLIBC_2.24 { + } } diff --git a/string/bits/string2.h b/string/bits/string2.h index 8200ef1..a8df0db 100644 --- a/string/bits/string2.h +++ b/string/bits/string2.h @@ -905,77 +905,10 @@ __stpcpy_small (char *__dest, /* Return the length of the initial segment of S which consists entirely of characters not in REJECT. */ -#if !defined _HAVE_STRING_ARCH_strcspn || defined _FORCE_INLINES -# ifndef _HAVE_STRING_ARCH_strcspn -# if __GNUC_PREREQ (3, 2) -# define strcspn(s, reject) \ - __extension__ \ - ({ char __r0, __r1, __r2; \ - (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \ - ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ - ? __builtin_strcspn (s, reject) \ - : ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \ - ? strlen (s) \ - : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \ - ? __strcspn_c1 (s, __r0) \ - : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \ - ? __strcspn_c2 (s, __r0, __r1) \ - : (((const char *) (reject))[3] == '\0' \ - ? __strcspn_c3 (s, __r0, __r1, __r2) \ - : __builtin_strcspn (s, reject)))))) \ - : __builtin_strcspn (s, reject)); }) -# else -# define strcspn(s, reject) \ - __extension__ \ - ({ char __r0, __r1, __r2; \ - (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \ - ? ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \ - ? strlen (s) \ - : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \ - ? __strcspn_c1 (s, __r0) \ - : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \ - ? __strcspn_c2 (s, __r0, __r1) \ - : (((const char *) (reject))[3] == '\0' \ - ? __strcspn_c3 (s, __r0, __r1, __r2) \ - : strcspn (s, reject))))) \ - : strcspn (s, reject)); }) -# endif +#ifndef _HAVE_STRING_ARCH_strcspn +# if __GNUC_PREREQ (3, 2) +# define strcspn(s, reject) __builtin_strcspn (s, reject) # endif - -__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject); -__STRING_INLINE size_t -__strcspn_c1 (const char *__s, int __reject) -{ - size_t __result = 0; - while (__s[__result] != '\0' && __s[__result] != __reject) - ++__result; - return __result; -} - -__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1, - int __reject2); -__STRING_INLINE size_t -__strcspn_c2 (const char *__s, int __reject1, int __reject2) -{ - size_t __result = 0; - while (__s[__result] != '\0' && __s[__result] != __reject1 - && __s[__result] != __reject2) - ++__result; - return __result; -} - -__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1, - int __reject2, int __reject3); -__STRING_INLINE size_t -__strcspn_c3 (const char *__s, int __reject1, int __reject2, - int __reject3) -{ - size_t __result = 0; - while (__s[__result] != '\0' && __s[__result] != __reject1 - && __s[__result] != __reject2 && __s[__result] != __reject3) - ++__result; - return __result; -} #endif diff --git a/string/strcspn.c b/string/strcspn.c index 8888919..89ba4ca 100644 --- a/string/strcspn.c +++ b/string/strcspn.c @@ -26,16 +26,44 @@ /* Return the length of the maximum initial segment of S which contains no characters from REJECT. */ size_t -STRCSPN (const char *s, const char *reject) +STRCSPN (const char *str, const char *reject) { - size_t count = 0; + if (reject[0] == '\0' || reject[1] == '\0') + return __strchrnul (str, reject [0]) - str; - while (*s != '\0') - if (strchr (reject, *s++) == NULL) - ++count; - else - return count; + /* Use multiple small memsets to enable inlining on most targets. */ + unsigned char table[256]; + unsigned char *p = memset (table, 0, 64); + memset (p + 64, 0, 64); + memset (p + 128, 0, 64); + memset (p + 192, 0, 64); - return count; + unsigned char *s = (unsigned char*) reject; + unsigned char tmp; + do + p[tmp = *s++] = 1; + while (tmp); + + s = (unsigned char*) str; + if (p[s[0]]) return 0; + if (p[s[1]]) return 1; + if (p[s[2]]) return 2; + if (p[s[3]]) return 3; + + s = (unsigned char *) ((size_t)s & ~3); + + unsigned int c0, c1, c2, c3; + do + { + s += 4; + c0 = p[s[0]]; + c1 = p[s[1]]; + c2 = p[s[2]]; + c3 = p[s[3]]; + } + while ((c0 | c1 | c2 | c3) == 0); + + size_t count = s - (unsigned char *) str; + return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3; } libc_hidden_builtin_def (strcspn) diff --git a/string/string-inlines.c b/string/string-inlines.c index 16db3ea..83bdd6c 100644 --- a/string/string-inlines.c +++ b/string/string-inlines.c @@ -32,3 +32,43 @@ #undef __NO_INLINE__ #include #include + +#include "shlib-compat.h" + +#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24) +/* The inline functions are not used from GLIBC 2.24 and forward, however + they are required to provide the symbols through string-inlines.c + (if inlining is not possible for compatibility reasons). */ +size_t +__old_strcspn_c1 (const char *__s, int __reject) +{ + size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject) + ++__result; + return __result; +} +compat_symbol (libc, __old_strcspn_c1, __strcspn_c1, GLIBC_2_1_1); + +size_t +__old_strcspn_c2 (const char *__s, int __reject1, int __reject2) +{ + size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject1 + && __s[__result] != __reject2) + ++__result; + return __result; +} +compat_symbol (libc, __old_strcspn_c2, __strcspn_c2, GLIBC_2_1_1); + +size_t +__old_strcspn_c3 (const char *__s, int __reject1, int __reject2, + int __reject3) +{ + size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject1 + && __s[__result] != __reject2 && __s[__result] != __reject3) + ++__result; + return __result; +} +compat_symbol (libc, __old_strcspn_c3, __strcspn_c3, GLIBC_2_1_1); +#endif diff --git a/sysdeps/i386/string-inlines.c b/sysdeps/i386/string-inlines.c index c7de270..64d80e8 100644 --- a/sysdeps/i386/string-inlines.c +++ b/sysdeps/i386/string-inlines.c @@ -15,27 +15,10 @@ License along with the GNU C Library; if not, see . */ -/* and declare some extern inline - functions. These functions are declared additionally here if - inlining is not possible. */ - -#undef __USE_STRING_INLINES -#define __USE_STRING_INLINES -#define _FORCE_INLINES -#define __STRING_INLINE /* empty */ -#define __NO_INLINE__ - /* This is to avoid PLT entries for the x86 version. */ #define __memcpy_g __memcpy_g_internal #define __strchr_g __strchr_g_internal - -#include -#undef index -#undef rindex - -#undef __NO_INLINE__ -#include -#include +#include void * (__memcpy_c) (void *d, const void *s, size_t n)