[6/*] Generic string function optimization: strcasestr

Message ID 20150528174534.GB4299@domone
State New, archived
Headers

Commit Message

Ondrej Bilka May 28, 2015, 5:45 p.m. UTC
  Hi,

Here I made wrong assumption for long time that prevented me to see
optimization in strcasestr. Now I realized that looking for leading pair
would likely work. I though that it would be necessary to construct
table that assigns to each characters list of characters in same
equivalence class.

I don't have to do that if like in strchr will check if its ascii. Then
I could approximate toupper(x) = toupper(c) by expression 
(x == toupper (c)) | (x == tolower (c)) | (x > 128)
which is easily evaluable in parallel, then and it with shifted
expression for second character.

I added it now just as simple heuristic, a more buy-or-rent approach
will follow in next patch.

I also needed to remove test/bench-strcasestr as I need locale check for
ascii-compatibility of towlower map.

Comments?

	* benchtests/bench-strcasestr.c: Remove simple_strcasestr.
	* string/test-strcasestr.c: Likewise.
	* string/skeleton.h: Customize cmask parameter with CMASK_PARAM macro
	* string/strcasestr.c: Optimize by fast search of leading digraph.
  

Patch

diff --git a/benchtests/bench-strcasestr.c b/benchtests/bench-strcasestr.c
index 33531a4..6c6309f 100644
--- a/benchtests/bench-strcasestr.c
+++ b/benchtests/bench-strcasestr.c
@@ -21,10 +21,6 @@ 
 #include "bench-string.h"
 
 
-#define STRCASESTR simple_strcasestr
-#define NO_ALIAS
-#define __strncasecmp strncasecmp
-#include "../string/strcasestr.c"
 
 
 static char *
@@ -53,7 +49,6 @@  stupid_strcasestr (const char *s1, const char *s2)
 typedef char *(*proto_t) (const char *, const char *);
 
 IMPL (stupid_strcasestr, 0)
-IMPL (simple_strcasestr, 0)
 IMPL (strcasestr, 1)
 
 
diff --git a/string/skeleton.h b/string/skeleton.h
index 26f2d9f..74d4b9f 100644
--- a/string/skeleton.h
+++ b/string/skeleton.h
@@ -31,9 +31,17 @@ 
 #define EXPRESSION(x,y) EXPRESSION_NOCARRY(x,y)
 #endif
 
+#ifdef CUSTOM_CMASK
+#define CMASK_PARAM_MASK CMASK_PARAM
+#else
+#define CMASK_PARAM int c_in
+#define CMASK_PARAM_MASK unsigned long int cmask
+#endif
+
+
 static __always_inline
 int
-found_in_long_bytes(char *s, unsigned long int cmask, char **result)
+found_in_long_bytes(char *s, CMASK_PARAM_MASK, char **result)
 {
   const unsigned long int *lptr = (const unsigned long int *) s;
   unsigned long int mask = EXPRESSION(*lptr, cmask);
@@ -46,16 +54,21 @@  found_in_long_bytes(char *s, unsigned long int cmask, char **result)
     return 0;
 }
 
+
+
+
 static __always_inline
 char *
-string_skeleton (const char *s_in, int c_in, char *end)
+string_skeleton (const char *s_in, CMASK_PARAM, char *end)
 {
   unsigned long int mask;
   const unsigned long int *lptr;
   char *s = (char *) s_in;
-  unsigned char c = (unsigned char) c_in;
   char *r;
+#ifndef CUSTOM_CMASK
+  unsigned char c = (unsigned char) c_in;
   unsigned long int __attribute__ ((unused)) cmask = c * ones;
+#endif
 
 #if _STRING_ARCH_unaligned
   /* We fetch 32 bytes while not crossing page boundary. 
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 400fab8..7c984c8 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -57,6 +57,37 @@ 
 #define STRCASESTR __strcasestr
 #endif
 
+#include "string/common.h"
+
+struct cmask
+{
+  unsigned long int l0, u0, l1, u1;
+};
+#define CUSTOM_CMASK
+#define CMASK_PARAM struct cmask cmask
+
+#define EXPRESSION(x, cmask) (\
+ ( contains_zero ((x >> 8) ^ cmask.l0) \
+   | contains_zero ((x >> 8) ^ cmask.u0) \
+   | (x >> 8)) \
+   | (1UL << (8 * LSIZE - 1)) \
+ & (contains_zero (x ^ cmask.l1) \
+    | contains_zero (x ^ cmask.l1) \
+    | x))
+#define EXPRESSION_NOCARRY(x,cmask) (\
+ ( contains_zero_nocarry ((x >> 8) ^ cmask.l0) \
+   | contains_zero_nocarry ((x >> 8) ^ cmask.u0) \
+   | (x >> 8)) \
+   | (1UL << (8 * LSIZE - 1)) \
+ & (contains_zero_nocarry (x ^ cmask.l1) \
+    | contains_zero_nocarry (x ^ cmask.l1) \
+    | x))
+
+#include "string/skeleton.h"
+
+#include "../locale/localeinfo.h"
+
+
 
 /* Find the first occurrence of NEEDLE in HAYSTACK, using
    case-insensitive comparison.  This function gives unspecified
@@ -70,6 +101,23 @@  STRCASESTR (const char *haystack_start, const char *needle_start)
   size_t haystack_len; /* Known minimum length of HAYSTACK.  */
   bool ok = true; /* True if NEEDLE is prefix of HAYSTACK.  */
 
+  __locale_t loc = _NL_CURRENT_LOCALE;
+  struct __locale_data *ctype = loc->__locales[LC_CTYPE];
+  int nonascii = ctype->values[_NL_ITEM_INDEX (_NL_CTYPE_NONASCII_CASE)].word;
+
+  unsigned char *n = (unsigned char *) needle;  
+  if (!nonascii && haystack[0] != 0 && n[0] != 0 && n[0] < 128 
+                                    && n[1] != 0 && n[1] < 128)
+    {
+      struct cmask cmask;
+      cmask.l0 = tolower (n[0]) * ones;
+      cmask.u0 = toupper (n[0]) * ones;
+      cmask.l1 = tolower (n[1]) * ones;
+      cmask.u1 = toupper (n[1]) * ones;
+      haystack = string_skeleton (haystack + 1, cmask, NULL) - 1;
+    }
+
+
   /* Determine length of NEEDLE, and in the process, make sure
      HAYSTACK is at least as long (no point processing all of a long
      NEEDLE if HAYSTACK is too short).  */
diff --git a/string/test-strcasestr.c b/string/test-strcasestr.c
index 489dc84..3c01881 100644
--- a/string/test-strcasestr.c
+++ b/string/test-strcasestr.c
@@ -25,7 +25,6 @@ 
 #define STRCASESTR simple_strcasestr
 #define NO_ALIAS
 #define __strncasecmp strncasecmp
-#include "strcasestr.c"
 
 
 static char *
@@ -54,7 +53,6 @@  stupid_strcasestr (const char *s1, const char *s2)
 typedef char *(*proto_t) (const char *, const char *);
 
 IMPL (stupid_strcasestr, 0)
-IMPL (simple_strcasestr, 0)
 IMPL (strcasestr, 1)