Improve memmem.

Message ID 20150513000329.GA23595@domone
State Superseded
Headers

Commit Message

Ondrej Bilka May 13, 2015, 12:03 a.m. UTC
  Hi.

This is same optimization for memmem as for strstr.
On x64 memmem performance sucks as its order of magnitude slower than strstr.
This equalizes performance somewhat and improves it in general case.

OK to commit this?
  

Comments

Paul Eggert May 13, 2015, 4:31 p.m. UTC | #1
Ondřej Bílka wrote:
> +  if (needle_len == 1)
> +    return memchr (haystack, needle[0], haystack_end - haystack);
> +
> +  while ((haystack = memchr (haystack, needle[0], haystack_end - haystack)))
> +    {
> +      if (haystack[1] == needle[1]
> +          && (needle_len == 2 || haystack[2] == needle[2]))
> +        {
> +          if (needle_len == 2)
> +            return haystack;
> +
> +          if (!memcmp (haystack + 2, needle + 2, needle_len - 2))
> +            return haystack;
> +          else
> +            goto slow_path;
> +        }
> +      haystack++;
> +    }
> +
> +  return NULL;
> +
> +  slow_path:;

First, that "haystack[1] == needle[1]" could access past the end of the 
haystack.  Second, can't this be rewritten to avoid that ugly goto?  Something 
like the attached, perhaps.
  
Carlos O'Donell May 14, 2015, 12:34 a.m. UTC | #2
On 05/12/2015 08:03 PM, Ondřej Bílka wrote:
> Hi.
> 
> This is same optimization for memmem as for strstr.
> On x64 memmem performance sucks as its order of magnitude slower than strstr.
> This equalizes performance somewhat and improves it in general case.
> 
> OK to commit this?

What did you use to test the performance?

c.
  

Patch

diff --git a/string/memmem.c b/string/memmem.c
index 8a81f65..ed21ee4 100644
--- a/string/memmem.c
+++ b/string/memmem.c
@@ -46,6 +46,10 @@  __memmem (const void *haystack_start, size_t haystack_len,
      not an array of 'char' values.  See ISO C 99 section 6.2.6.1.  */
   const unsigned char *haystack = (const unsigned char *) haystack_start;
   const unsigned char *needle = (const unsigned char *) needle_start;
+  const unsigned char *haystack_end = (const unsigned char *) 
+				      haystack_start + haystack_len 
+				      - needle_len + 1;
+
 
    if (needle_len == 0)
      /* The first occurrence of the empty string is deemed to occur at
@@ -57,6 +61,29 @@  __memmem (const void *haystack_start, size_t haystack_len,
   if (__glibc_unlikely (haystack_len < needle_len))
     return NULL;
 
+  if (needle_len == 1)
+    return memchr (haystack, needle[0], haystack_end - haystack);
+
+  while ((haystack = memchr (haystack, needle[0], haystack_end - haystack)))
+    {
+      if (haystack[1] == needle[1] 
+          && (needle_len == 2 || haystack[2] == needle[2]))
+        {
+          if (needle_len == 2)
+            return haystack;
+
+          if (!memcmp (haystack + 2, needle + 2, needle_len - 2))
+            return haystack;
+          else  
+            goto slow_path;
+        }
+      haystack++;
+    }
+
+  return NULL;
+
+  slow_path:;
+
   /* Use optimizations in memchr when possible, to reduce the search
      size of haystack using a linear algorithm with a smaller
      coefficient.  However, avoid memchr for long needles, since we