From patchwork Fri Jan 8 18:44:30 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wilco Dijkstra X-Patchwork-Id: 10301 Received: (qmail 35251 invoked by alias); 8 Jan 2016 18:44:41 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 35239 invoked by uid 89); 8 Jan 2016 18:44:40 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.6 required=5.0 tests=AWL, BAYES_00, SPF_PASS autolearn=ham version=3.3.2 spammy=2517, H*MI:outlook, H*MI:prod X-HELO: eu-smtp-delivery-143.mimecast.com From: Wilco Dijkstra To: 'GNU C Library' CC: nd Subject: [PATCH] Improve generic strpbrk performance Date: Fri, 8 Jan 2016 18:44:30 +0000 Message-ID: x-microsoft-exchange-diagnostics: 1; AM3PR08MB0087; 5:7Nuai+Q2Bt7NjLm24PGuYFlDknQA0ysSx2YYyiVQSOP9Uf+6xNBXD3RJ0bL9eTTAa2Ws0wOItQRDK67Rtjecow+QFxXcueGw6IbgaZzv37YKvOMoOWDLSY5R5mAKu1vI1fX0Ii/jJvDJWnWftEGS0A==; 24:83Gb/wN2L8VsxuKAJKGNjaXeXwQV82tFBDODnQysgNXVld4XZENlTZYjYEXCyeACRrs7ON9cS83v18eiIjdKoxBW0SgeQtC97oA4ZBODXfw=; 20:61h1ruYVodVg5cedgTeWvaDMPy8du7spuCFi6KKgKsA2jNfEYaCFYmrKobnOmg03TysRMyHCX6FzhPjJgpq6HQE4gnem4E7L59weoUDjINPeWWoBT2XxE4v3shJX2z3uZKZ2UrageNZzFWm4zGC+G8J41GGI7V4Qrvti95loGvc= x-microsoft-antispam: UriScan:;BCL:0;PCL:0;RULEID:;SRVR:AM3PR08MB0087; x-ms-office365-filtering-correlation-id: 5d74acd6-be0f-408d-537a-08d3185bbed1 nodisclaimer: True x-microsoft-antispam-prvs: x-exchange-antispam-report-test: UriScan:(180628864354917); x-exchange-antispam-report-cfa-test: BCL:0; PCL:0; RULEID:(601004)(2401047)(5005006)(520078)(8121501046)(3002001)(10201501046); SRVR:AM3PR08MB0087; BCL:0; PCL:0; RULEID:; SRVR:AM3PR08MB0087; x-forefront-prvs: 0815F8251E x-forefront-antispam-report: SFV:NSPM; SFS:(10009020)(6009001)(189002)(377424004)(54534003)(199003)(5250100002)(40100003)(86362001)(5004730100002)(2900100001)(5002640100001)(97736004)(5003600100002)(189998001)(110136002)(19580395003)(5001960100002)(11100500001)(450100001)(54356999)(50986999)(19580405001)(5008740100001)(4326007)(74316001)(66066001)(87936001)(106356001)(229853001)(106116001)(33656002)(2906002)(81156007)(6116002)(3846002)(76576001)(102836003)(105586002)(92566002)(586003)(101416001)(1220700001)(1096002)(41533002)(40753002)(133343001); DIR:OUT; SFP:1101; SCL:1; SRVR:AM3PR08MB0087; H:AM3PR08MB0088.eurprd08.prod.outlook.com; FPR:; SPF:None; PTR:InfoNoRecords; MX:1; A:1; LANG:en; spamdiagnosticoutput: 1:23 spamdiagnosticmetadata: NSPM MIME-Version: 1.0 X-OriginatorOrg: arm.com X-MS-Exchange-CrossTenant-originalarrivaltime: 08 Jan 2016 18:44:30.9976 (UTC) X-MS-Exchange-CrossTenant-fromentityheader: Hosted X-MS-Exchange-CrossTenant-id: f34e5979-57d9-4aaa-ad4d-b122a662184d X-MS-Exchange-Transport-CrossTenantHeadersStamped: AM3PR08MB0087 X-MC-Unique: kv4Yd_xvRWuG0-n7x9U0JQ-1 Improve strpbrk performance using a much faster algorithm. It is kept simple so it works well on most targets. It is generally at least 5 times faster than the existing implementation on bench-strpbrk on a few AArch64 implementations, and for some tests 40-100 times as fast. In fact the string/bits/string2.h inlines make no longer sense, as GCC already returns NULL if accept is an empty string, calls strchr if accept has size 1, while __strpbrk_c2 and __strpbrk_c3 are slower than the strpbrk main loop for large strings (though accept length 2-4 could be special cased in the future to gain even more performance). Built and tested on AArch64. OK for GLIBC 2.23? ChangeLog: 2016-01-08 Wilco Dijkstra * string/strpbrk.c (strpbrk): Rewrite function. * string/bits/string2.h (strpbrk): Use __builtin_strpbrk. (__strpbrk_c2) Remove inline function. (__strpbrk_c3) Likewise. diff --git a/string/bits/string2.h b/string/bits/string2.h index d0926f1e7898a13852081f45d54bff5145751695..3c7b7bbb4d1bccec8f149268e9f30978feb25354 100644 --- a/string/bits/string2.h +++ b/string/bits/string2.h @@ -269,53 +269,12 @@ __strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3) /* Find the first occurrence in S of any character in ACCEPT. */ -#if !defined _HAVE_STRING_ARCH_strpbrk -# ifndef _HAVE_STRING_ARCH_strpbrk -# if __GNUC_PREREQ (3, 2) -# define strpbrk(s, accept) \ - __extension__ \ - ({ char __a0, __a1, __a2; \ - (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ - ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ - ? __builtin_strpbrk (s, accept) \ - : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ - ? ((void) (s), (char *) NULL) \ - : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ - ? __builtin_strchr (s, __a0) \ - : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ - ? __strpbrk_c2 (s, __a0, __a1) \ - : (((const char *) (accept))[3] == '\0' \ - ? __strpbrk_c3 (s, __a0, __a1, __a2) \ - : __builtin_strpbrk (s, accept)))))) \ - : __builtin_strpbrk (s, accept)); }) -# endif +#ifndef _HAVE_STRING_ARCH_strpbrk +# if __GNUC_PREREQ (3, 2) +# define strpbrk(s, accept) __builtin_strpbrk (s, accept) # endif - -__STRING_INLINE char *__strpbrk_c2 (const char *__s, int __accept1, - int __accept2); -__STRING_INLINE char * -__strpbrk_c2 (const char *__s, int __accept1, int __accept2) -{ - /* Please note that __accept1 and __accept2 never can be '\0'. */ - while (*__s != '\0' && *__s != __accept1 && *__s != __accept2) - ++__s; - return *__s == '\0' ? NULL : (char *) (size_t) __s; -} - -__STRING_INLINE char *__strpbrk_c3 (const char *__s, int __accept1, - int __accept2, int __accept3); -__STRING_INLINE char * -__strpbrk_c3 (const char *__s, int __accept1, int __accept2, int __accept3) -{ - /* Please note that __accept1 to __accept3 never can be '\0'. */ - while (*__s != '\0' && *__s != __accept1 && *__s != __accept2 - && *__s != __accept3) - ++__s; - return *__s == '\0' ? NULL : (char *) (size_t) __s; -} #endif - #if !defined _HAVE_STRING_ARCH_strtok_r # ifndef _HAVE_STRING_ARCH_strtok_r # define __strtok_r(s, sep, nextp) \ diff --git a/string/strpbrk.c b/string/strpbrk.c index 4f1d9b72bb80f5690e6b10bacc496ddf26fdb056..1db271308846f2c9727803e2db096119b3bf89c3 100644 --- a/string/strpbrk.c +++ b/string/strpbrk.c @@ -25,17 +25,47 @@ /* Find the first occurrence in S of any character in ACCEPT. */ char * -STRPBRK (const char *s, const char *accept) +STRPBRK (const char *str, const char *accept) { - while (*s != '\0') + unsigned char table[256]; + unsigned char *p, *s; + unsigned int c0, c1, c2, c3; + + if (accept[0] == '\0') + return NULL; + if (accept[1] == '\0') + return strchr (str, accept[0]); + + /* Use multiple small memsets to enable inlining on most targets. */ + p = memset (table, 0, 64); + memset (p + 64, 0, 64); + memset (p + 128, 0, 64); + memset (p + 192, 0, 64); + + s = (unsigned char*) accept; + do + p[c0 = *s++] = 1; + while (c0); + + s = (unsigned char*) str; + if (p[s[0]]) return s[0] == '\0' ? NULL : (char *)s; + if (p[s[1]]) return s[1] == '\0' ? NULL : (char *)s + 1; + if (p[s[2]]) return s[2] == '\0' ? NULL : (char *)s + 2; + if (p[s[3]]) return s[3] == '\0' ? NULL : (char *)s + 3; + + s = (unsigned char *) ((size_t)s & ~3); + + do { - const char *a = accept; - while (*a != '\0') - if (*a++ == *s) - return (char *) s; - ++s; + s += 4; + c0 = p[s[0]]; + c1 = p[s[1]]; + c2 = p[s[2]]; + c3 = p[s[3]]; } + while ((c0 | c1 | c2 | c3) == 0); - return NULL; + s += (c0 | c1) != 0 ? 1 - c0 : 3 - c2; + return *s == '\0' ? NULL : (char *)s; } libc_hidden_builtin_def (strpbrk)