[08/11] Improve generic strcmp

Message ID 20161217065729.28561-9-rth@twiddle.net
State New, archived
Headers

Commit Message

Richard Henderson Dec. 17, 2016, 6:57 a.m. UTC
  * string/strcmp.c: Rewrite using memcopy.h and haszero.h.
---
 string/strcmp.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 92 insertions(+), 8 deletions(-)
  

Patch

diff --git a/string/strcmp.c b/string/strcmp.c
index 4b16f99..2b32854 100644
--- a/string/strcmp.c
+++ b/string/strcmp.c
@@ -16,6 +16,11 @@ 
    <http://www.gnu.org/licenses/>.  */
 
 #include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include <haszero.h>
+#include <extractbyte.h>
+#include <memcopy.h>
 
 #undef strcmp
 
@@ -29,19 +34,98 @@ 
 int
 STRCMP (const char *p1, const char *p2)
 {
-  const unsigned char *s1 = (const unsigned char *) p1;
-  const unsigned char *s2 = (const unsigned char *) p2;
+  const unsigned long int *x1, *x2;
+  unsigned long int w1, w2;
   unsigned char c1, c2;
+  uintptr_t i, n, ofs;
+  int diff;
 
-  do
+  /* Handle the unaligned bytes of p1 first.  */
+  n = -(uintptr_t)p1 % sizeof(unsigned long int);
+  for (i = 0; i < n; ++i)
     {
-      c1 = (unsigned char) *s1++;
-      c2 = (unsigned char) *s2++;
-      if (c1 == '\0')
-	return c1 - c2;
+      c1 = *p1++;
+      c2 = *p2++;
+      diff = c1 - c2;
+      if (c1 == '\0' || diff)
+	return diff;
     }
-  while (c1 == c2);
 
+  /* P1 is now aligned to unsigned long.  P2 may or may not be.  */
+  x1 = (const unsigned long int *)p1;
+  w1 = *x1++;
+  ofs = (uintptr_t)p2 % sizeof(unsigned long int);
+  if (ofs == 0)
+    {
+      x2 = (const unsigned long int *)p2;
+      w2 = *x2++;
+      /* Aligned loop.  If a difference is found, exit to compare the
+         bytes.  Else if a zero is found we have equal strings.  */
+      while (w1 == w2)
+	{
+	  if (haszero (w1))
+	    return 0;
+          w1 = *x1++;
+          w2 = *x2++;
+	}
+    }
+  else
+    {
+      unsigned long int w2a, w2b;
+      uintptr_t sh_1, sh_2;
+
+      x2 = (const unsigned long int *)(p2 - ofs);
+      w2a = *x2++;
+      sh_1 = ofs * CHAR_BIT;
+      sh_2 = sizeof(unsigned long int) * CHAR_BIT - sh_1;
+
+      /* Align the first partial of P2, with 0xff for the rest of the
+         bytes so that we can also apply the haszero test to see if we
+         have already reached EOS.  If we have, then we can simply fall
+         through to the final comparison.  */
+      w2 = MERGE (w2a, sh_1, -1UL, sh_2);
+      if (!haszero (w2))
+	{
+	  /* Unaligned loop.  The invariant is that W2B, which is "ahead"
+             of W1, does not contain end-of-string.  Therefore it is safe
+             (and necessary) to read another word from each while we do
+             not have a difference.  */
+	  while (1)
+	    {
+	      w2b = *x2++;
+	      w2 = MERGE (w2a, sh_1, w2b, sh_2);
+	      if (w1 != w2)
+		goto final_cmp;
+	      if (haszero (w2b))
+		break;
+	      w1 = *x1++;
+	      w2a = w2b;
+	    }
+
+	  /* Zero found in the second partial of P2.  If we had EOS
+	     in the aligned word, we have equality.  */
+	  if (haszero (w1))
+	    return 0;
+
+          /* Load the final word of P1 and align the final partial of P2.  */
+	  w1 = *x1++;
+          w2 = MERGE (w2b, sh_1, 0, sh_2);
+	}
+    }
+
+ final_cmp:
+  /* We have two aligned words of data.  */
+  for (i = 0; i < sizeof(unsigned long int) - 1; ++i)
+    {
+      c1 = extractbyte (w1, i);
+      c2 = extractbyte (w2, i);
+      diff = c1 - c2;
+      if (c1 == '\0' || diff)
+	return diff;
+    }
+  c1 = extractbyte (w1, i);
+  c2 = extractbyte (w2, i);
   return c1 - c2;
 }
+
 libc_hidden_builtin_def (strcmp)