Commit Message
Hi.
This is same optimization for memmem as for strstr.
On x64 memmem performance sucks as its order of magnitude slower than strstr.
This equalizes performance somewhat and improves it in general case.
OK to commit this?
Comments
Ondřej Bílka wrote:
> + if (needle_len == 1)
> + return memchr (haystack, needle[0], haystack_end - haystack);
> +
> + while ((haystack = memchr (haystack, needle[0], haystack_end - haystack)))
> + {
> + if (haystack[1] == needle[1]
> + && (needle_len == 2 || haystack[2] == needle[2]))
> + {
> + if (needle_len == 2)
> + return haystack;
> +
> + if (!memcmp (haystack + 2, needle + 2, needle_len - 2))
> + return haystack;
> + else
> + goto slow_path;
> + }
> + haystack++;
> + }
> +
> + return NULL;
> +
> + slow_path:;
First, that "haystack[1] == needle[1]" could access past the end of the
haystack. Second, can't this be rewritten to avoid that ugly goto? Something
like the attached, perhaps.
On 05/12/2015 08:03 PM, Ondřej Bílka wrote:
> Hi.
>
> This is same optimization for memmem as for strstr.
> On x64 memmem performance sucks as its order of magnitude slower than strstr.
> This equalizes performance somewhat and improves it in general case.
>
> OK to commit this?
What did you use to test the performance?
c.
@@ -46,6 +46,10 @@ __memmem (const void *haystack_start, size_t haystack_len,
not an array of 'char' values. See ISO C 99 section 6.2.6.1. */
const unsigned char *haystack = (const unsigned char *) haystack_start;
const unsigned char *needle = (const unsigned char *) needle_start;
+ const unsigned char *haystack_end = (const unsigned char *)
+ haystack_start + haystack_len
+ - needle_len + 1;
+
if (needle_len == 0)
/* The first occurrence of the empty string is deemed to occur at
@@ -57,6 +61,29 @@ __memmem (const void *haystack_start, size_t haystack_len,
if (__glibc_unlikely (haystack_len < needle_len))
return NULL;
+ if (needle_len == 1)
+ return memchr (haystack, needle[0], haystack_end - haystack);
+
+ while ((haystack = memchr (haystack, needle[0], haystack_end - haystack)))
+ {
+ if (haystack[1] == needle[1]
+ && (needle_len == 2 || haystack[2] == needle[2]))
+ {
+ if (needle_len == 2)
+ return haystack;
+
+ if (!memcmp (haystack + 2, needle + 2, needle_len - 2))
+ return haystack;
+ else
+ goto slow_path;
+ }
+ haystack++;
+ }
+
+ return NULL;
+
+ slow_path:;
+
/* Use optimizations in memchr when possible, to reduce the search
size of haystack using a linear algorithm with a smaller
coefficient. However, avoid memchr for long needles, since we