Message ID | DB5PR08MB1030DA9F862D70A49C3FA40383780@DB5PR08MB1030.eurprd08.prod.outlook.com |
---|---|
State | New, archived |
Headers | show |
On 11/06/2018 07:31, Wilco Dijkstra wrote: > Improve strstr performance. Strstr tends to be slow because it uses > many calls to memchr and a slow byte loop to scan for the next match. > Performance is significantly improved by using strnlen on larger blocks > and using strchr to search for the next matching character. strcasestr > can also use strnlen to scan ahead, and memmem can use memchr to check > for the next match. > > On the GLIBC bench tests the overall performance gains on Cortex-A72 are: > strstr: +25% > strcasestr: +4.3% > memmem: +18% > > On a 256KB dataset strstr performance improves by 67%, strcasestr by 47%. > > Tested on AArch64, passes GLIBC tests. I am checking if it is worth to keep the implementation in sync with gnulib, since it contains some changes over the years which did not make into glibc. I create a branch with these changes [1] along with your changes, but results are mixed: - For strstr/strcasestr performance is the same for neddles larger than 4, but faster for shorter needles. - For memmem performance is faster for needle of size 2, but slower for large haystack. My plan is to check if we can simplify the code a bit, as gnulib one has done; but in overall the change should be ok (modulo one fix below). [1] https://sourceware.org/git/?p=glibc.git;a=shortlog;h=refs/heads/azanella/str-two-way-optimization > diff --git a/string/strcasestr.c b/string/strcasestr.c > index 90ba189790481a58eb7627eb8c396530f988f985..d54c4b56853735fb8f5ebd59419b222ca8827651 100644 > --- a/string/strcasestr.c > +++ b/string/strcasestr.c > @@ -37,8 +37,8 @@ > /* Two-Way algorithm. */ > #define RETURN_TYPE char * > #define AVAILABLE(h, h_l, j, n_l) \ > - (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ > - && ((h_l) = (j) + (n_l))) > + (((j) + (n_l) <= (h_l)) || ((h_l) += strnlen ((void*)((h) + (h_l)), 512), \ > + (j) + (n_l) <= (h_l))) Why did it not trigger linknamespace issues with strnlen usage? For instance: $ cat conform/ISO/assert.h/linknamespace.out [initial] __assert_fail -> [libc.a(assert.o)] __dcgettext -> [libc.a(dcgettext.o)] __dcigettext -> [libc.a(dcigettext.o)] strstr -> [libc.a(strstr.o)] strnlen On the branch I created it contains a fix for this [2]. [2] https://sourceware.org/git/?p=glibc.git;a=commit;h=6cc58a9708ff256323c34d200fd9362f25bf66f7 > #define CHECK_EOL (1) > #define RET0_IF_0(a) if (!a) goto ret0 > #define CANON_ELEMENT(c) TOLOWER (c) > diff --git a/string/strstr.c b/string/strstr.c > index b3b5deb673758510616d32849bcf2fc15ce941cd..346f303c7423c95de0fbcdeb1a9f31293b303372 100644 > --- a/string/strstr.c > +++ b/string/strstr.c > @@ -33,10 +33,11 @@ > > #define RETURN_TYPE char * > #define AVAILABLE(h, h_l, j, n_l) \ > - (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ > - && ((h_l) = (j) + (n_l))) > + (((j) + (n_l) <= (h_l)) || ((h_l) += strnlen ((void*)((h) + (h_l)), 512), \ > + (j) + (n_l) <= (h_l))) > #define CHECK_EOL (1) > #define RET0_IF_0(a) if (!a) goto ret0 > +#define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C)) > #include "str-two-way.h" > > #undef strstr >
diff --git a/string/memmem.c b/string/memmem.c index c17e1cf6a63c0709df0c1c7a45410c5178b2876e..43efaa3fb718e7a22c3b4122c278720768644d0f 100644 --- a/string/memmem.c +++ b/string/memmem.c @@ -31,6 +31,7 @@ #define RETURN_TYPE void * #define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l)) +#define FASTSEARCH(S,C,N) (void*) memchr ((void *)(S), (C), (N)) #include "str-two-way.h" #undef memmem diff --git a/string/str-two-way.h b/string/str-two-way.h index cd2605857d9b5c432c3c2b1c5e35f55d8e9285b6..6ce6798ea4d595bb4c95335233eb92595f447a28 100644 --- a/string/str-two-way.h +++ b/string/str-two-way.h @@ -281,50 +281,50 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, } else { - const unsigned char *phaystack = &haystack[suffix]; + const unsigned char *phaystack; /* The comparison always starts from needle[suffix], so cache it and use an optimized first-character loop. */ unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]); -#if CHECK_EOL - /* We start matching from the SUFFIX'th element, so make sure we - don't hit '\0' before that. */ - if (haystack_len < suffix + 1 - && !AVAILABLE (haystack, haystack_len, 0, suffix + 1)) - return NULL; -#endif - /* The two halves of needle are distinct; no extra memory is required, and any mismatch results in a maximal shift. */ period = MAX (suffix, needle_len - suffix) + 1; j = 0; - while (1 -#if !CHECK_EOL - && AVAILABLE (haystack, haystack_len, j, needle_len) -#endif - ) + while (AVAILABLE (haystack, haystack_len, j, needle_len)) { unsigned char haystack_char; const unsigned char *pneedle; - /* TODO: The first-character loop can be sped up by adapting - longword-at-a-time implementation of memchr/strchr. */ - if (needle_suffix + phaystack = &haystack[suffix + j]; + +#ifdef FASTSEARCH + if (*phaystack++ != needle_suffix) + { + phaystack = FASTSEARCH (phaystack, needle_suffix, + haystack_len - needle_len - j); + if (phaystack == NULL) + goto ret0; + j = phaystack - &haystack[suffix]; + phaystack++; + } +#else + while (needle_suffix != (haystack_char = CANON_ELEMENT (*phaystack++))) { RET0_IF_0 (haystack_char); -#if !CHECK_EOL +# if !CHECK_EOL ++j; -#endif - continue; + if (!AVAILABLE (haystack, haystack_len, j, needle_len)) + goto ret0; +# endif } -#if CHECK_EOL +# if CHECK_EOL /* Calculate J if it wasn't kept up-to-date in the first-character loop. */ j = phaystack - &haystack[suffix] - 1; +# endif #endif - /* Scan for matches in right half. */ i = suffix + 1; pneedle = &needle[i]; @@ -338,6 +338,11 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, } ++i; } +#if CHECK_EOL + /* Update minimal length of haystack. */ + if (phaystack > haystack + haystack_len) + haystack_len = phaystack - haystack; +#endif if (needle_len <= i) { /* Scan for matches in left half. */ @@ -360,13 +365,6 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, } else j += i - suffix + 1; - -#if CHECK_EOL - if (!AVAILABLE (haystack, haystack_len, j, needle_len)) - break; -#endif - - phaystack = &haystack[suffix + j]; } } ret0: __attribute__ ((unused)) diff --git a/string/strcasestr.c b/string/strcasestr.c index 90ba189790481a58eb7627eb8c396530f988f985..d54c4b56853735fb8f5ebd59419b222ca8827651 100644 --- a/string/strcasestr.c +++ b/string/strcasestr.c @@ -37,8 +37,8 @@ /* Two-Way algorithm. */ #define RETURN_TYPE char * #define AVAILABLE(h, h_l, j, n_l) \ - (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ - && ((h_l) = (j) + (n_l))) + (((j) + (n_l) <= (h_l)) || ((h_l) += strnlen ((void*)((h) + (h_l)), 512), \ + (j) + (n_l) <= (h_l))) #define CHECK_EOL (1) #define RET0_IF_0(a) if (!a) goto ret0 #define CANON_ELEMENT(c) TOLOWER (c) diff --git a/string/strstr.c b/string/strstr.c index b3b5deb673758510616d32849bcf2fc15ce941cd..346f303c7423c95de0fbcdeb1a9f31293b303372 100644 --- a/string/strstr.c +++ b/string/strstr.c @@ -33,10 +33,11 @@ #define RETURN_TYPE char * #define AVAILABLE(h, h_l, j, n_l) \ - (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ - && ((h_l) = (j) + (n_l))) + (((j) + (n_l) <= (h_l)) || ((h_l) += strnlen ((void*)((h) + (h_l)), 512), \ + (j) + (n_l) <= (h_l))) #define CHECK_EOL (1) #define RET0_IF_0(a) if (!a) goto ret0 +#define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C)) #include "str-two-way.h" #undef strstr