From patchwork Wed Dec 21 23:05:52 2016
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Richard Henderson <rth@twiddle.net>
X-Patchwork-Id: 18627
Received: (qmail 51843 invoked by alias); 21 Dec 2016 23:06:23 -0000
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: libc-alpha-owner@sourceware.org
Delivered-To: mailing list libc-alpha@sourceware.org
Received: (qmail 51600 invoked by uid 89); 21 Dec 2016 23:06:22 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-1.6 required=5.0 tests=BAYES_00,
	FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE,
	SPF_PASS autolearn=no version=3.3.2 spammy=0x80, caveats, 2090
X-HELO: mail-pg0-f65.google.com
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=1e100.net; s=20161025;
	h=x-gm-message-state:sender:from:to:subject:date:message-id
	:in-reply-to:references;
	bh=NHY/cK5Rllv6EwzLyVIqycoAxV59nKLDWBHAXO4qZUY=;
	b=KFXDflZbNSdr+RIuItgSx+NQVKVPogDulBQ/ULz0JZLYFhXtUYOAYD6qQDkt1RtCJi
	pOPhfUIAb1aQ6+N/vnv19mxLqCzQpMfaQWkAqTrqe7PH/Ec+1+G+mCQujPLho82r0DPy
	8fop5IWkafEGMiEarHhR/TocvRSbFekxXUcU37ppbuJa6MZ7bQMlCDmu0n0/xEeqxvzv
	bBNZndK/lz4Wd3oKZTWHi2tPQnT6txj9CyRtChs/J9rvP4L2i82fzDkvrC2tRLmF2bmY
	bDjtQD/RfowGi681Sp7SgLg4zvPQqXXxxJ0x4AMDo0Ntz11TdNn6oeOAr4fqWY8NDfEq
	QkQw==
X-Gm-Message-State: 
 AIkVDXL3WjTOQIj7E1LLOuYWKd8l0N3Ey71+DvS76aXJjVpplv+qAFYC+juyd9t/rcBDqQ==
X-Received: by 10.98.152.212 with SMTP id d81mr6294827pfk.12.1482361570124;
	Wed, 21 Dec 2016 15:06:10 -0800 (PST)
From: Richard Henderson <rth@twiddle.net>
To: libc-alpha@sourceware.org
Subject: [PATCH v2 03/16] Improve generic strlen
Date: Wed, 21 Dec 2016 15:05:52 -0800
Message-Id: <20161221230605.28638-4-rth@twiddle.net>
In-Reply-To: <20161221230605.28638-1-rth@twiddle.net>
References: <20161221230605.28638-1-rth@twiddle.net>

Extract has_zero and index_first_zero tests into headers that
can be tailored for the architecture.

	[BZ #5806]
    	* sysdeps/generic/string-fza.h: New file.
    	* sysdeps/generic/string-fzb.h: New file.
    	* sysdeps/generic/string-fzi.h: New file.
    	* sysdeps/generic/string-extbyte.h: New file.
    	* string/strlen.c: Use them.
---
 string/strlen.c                  |  89 ++++++------------------
 sysdeps/generic/string-extbyte.h |  35 ++++++++++
 sysdeps/generic/string-fza.h     | 117 +++++++++++++++++++++++++++++++
 sysdeps/generic/string-fzb.h     |  49 +++++++++++++
 sysdeps/generic/string-fzi.h     | 146 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 369 insertions(+), 67 deletions(-)
 create mode 100644 sysdeps/generic/string-extbyte.h
 create mode 100644 sysdeps/generic/string-fza.h
 create mode 100644 sysdeps/generic/string-fzb.h
 create mode 100644 sysdeps/generic/string-fzi.h

diff --git a/string/strlen.c b/string/strlen.c
index 4943ce2..4aa95d5 100644
--- a/string/strlen.c
+++ b/string/strlen.c
@@ -20,90 +20,45 @@
 
 #include <string.h>
 #include <stdlib.h>
+#include <stdint.h>
+#include <string-fzb.h>
+#include <string-fzi.h>
 
 #undef strlen
 
-#ifndef STRLEN
-# define STRLEN strlen
+#ifdef STRLEN
+# define strlen STRLEN
 #endif
 
 /* Return the length of the null-terminated string STR.  Scan for
    the null terminator quickly by testing four bytes at a time.  */
 size_t
-STRLEN (const char *str)
+strlen (const char *str)
 {
-  const char *char_ptr;
-  const unsigned long int *longword_ptr;
-  unsigned long int longword, himagic, lomagic;
+  const char *char_ptr = str;
+  const op_t *word_ptr;
+  op_t word;
+  uintptr_t i, align;
 
   /* Handle the first few characters by reading one character at a time.
      Do this until CHAR_PTR is aligned on a longword boundary.  */
-  for (char_ptr = str; ((unsigned long int) char_ptr
-			& (sizeof (longword) - 1)) != 0;
-       ++char_ptr)
+  align = -(uintptr_t)char_ptr % sizeof(word);
+  for (i = 0; i < align; ++i, ++char_ptr)
     if (*char_ptr == '\0')
       return char_ptr - str;
 
-  /* All these elucidatory comments refer to 4-byte longwords,
-     but the theory applies equally well to 8-byte longwords.  */
-
-  longword_ptr = (unsigned long int *) char_ptr;
-
-  /* Bits 31, 24, 16, and 8 of this number are zero.  Call these bits
-     the "holes."  Note that there is a hole just to the left of
-     each byte, with an extra at the end:
-
-     bits:  01111110 11111110 11111110 11111111
-     bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
-
-     The 1-bits make sure that carries propagate to the next 0-bit.
-     The 0-bits provide holes for carries to fall into.  */
-  himagic = 0x80808080L;
-  lomagic = 0x01010101L;
-  if (sizeof (longword) > 4)
+  word_ptr = (const op_t *) char_ptr;
+  do
     {
-      /* 64-bit version of the magic.  */
-      /* Do the shift in two steps to avoid a warning if long has 32 bits.  */
-      himagic = ((himagic << 16) << 16) | himagic;
-      lomagic = ((lomagic << 16) << 16) | lomagic;
+      word = *word_ptr++;
     }
-  if (sizeof (longword) > 8)
-    abort ();
-
-  /* Instead of the traditional loop which tests each character,
-     we will test a longword at a time.  The tricky part is testing
-     if *any of the four* bytes in the longword in question are zero.  */
-  for (;;)
-    {
-      longword = *longword_ptr++;
+  while (!has_zero (word));
 
-      if (((longword - lomagic) & ~longword & himagic) != 0)
-	{
-	  /* Which of the bytes was the zero?  If none of them were, it was
-	     a misfire; continue the search.  */
-
-	  const char *cp = (const char *) (longword_ptr - 1);
-
-	  if (cp[0] == 0)
-	    return cp - str;
-	  if (cp[1] == 0)
-	    return cp - str + 1;
-	  if (cp[2] == 0)
-	    return cp - str + 2;
-	  if (cp[3] == 0)
-	    return cp - str + 3;
-	  if (sizeof (longword) > 4)
-	    {
-	      if (cp[4] == 0)
-		return cp - str + 4;
-	      if (cp[5] == 0)
-		return cp - str + 5;
-	      if (cp[6] == 0)
-		return cp - str + 6;
-	      if (cp[7] == 0)
-		return cp - str + 7;
-	    }
-	}
-    }
+  char_ptr = (const char *) (word_ptr - 1);
+  char_ptr += index_first_zero (word);
+  return char_ptr - str;
 }
+
+#ifndef STRLEN
 libc_hidden_builtin_def (strlen)
+#endif
diff --git a/sysdeps/generic/string-extbyte.h b/sysdeps/generic/string-extbyte.h
new file mode 100644
index 0000000..1ccd5b3
--- /dev/null
+++ b/sysdeps/generic/string-extbyte.h
@@ -0,0 +1,35 @@
+/* string-extbyte.h -- function memory order byte extract.  Generic C version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_EXTBYTE_H
+#define STRING_EXTBYTE_H 1
+
+#include <limits.h>
+#include <endian.h>
+#include <string-optype.h>
+
+static inline unsigned char
+extractbyte (op_t x, unsigned idx)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    return x >> (idx * CHAR_BIT);
+  else
+    return x >> (sizeof (x) - 1 - idx) * CHAR_BIT;
+}
+
+#endif /* STRING_EXTBYTE_H */
diff --git a/sysdeps/generic/string-fza.h b/sysdeps/generic/string-fza.h
new file mode 100644
index 0000000..638df2e
--- /dev/null
+++ b/sysdeps/generic/string-fza.h
@@ -0,0 +1,117 @@
+/* string-fza.h -- zero byte detection; basics.  Generic C version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZA_H
+#define STRING_FZA_H 1
+
+#include <limits.h>
+#include <string-optype.h>
+
+/* This function returns non-zero if any byte in X is zero.
+   More specifically, at least one bit set within the least significant
+   byte that was zero; other bytes within the word are indeterminate.  */
+
+static inline op_t
+find_zero_low (op_t x)
+{
+  /* This expression comes from
+       https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
+     Subtracting 1 sets 0x80 in a byte that was 0; anding ~x clears
+     0x80 in a byte that was >= 128; anding 0x80 isolates that test bit.  */
+  op_t lsb = (op_t)-1 / 0xff;
+  op_t msb = lsb << (CHAR_BIT - 1);
+  return (x - lsb) & ~x & msb;
+}
+
+/* This function returns at least one bit set within every byte of X that
+   is zero.  The result is exact in that, unlike find_zero_low, all bytes
+   are determinate.  This is usually used for finding the index of the
+   most significant byte that was zero.  */
+
+static inline op_t
+find_zero_all (op_t x)
+{
+  /* For each byte, find not-zero by
+     (0) And 0x7f so that we cannot carry between bytes,
+     (1) Add 0x7f so that non-zero carries into 0x80,
+     (2) Or in the original byte (which might have had 0x80 set).
+     Then invert and mask such that 0x80 is set iff that byte was zero.  */
+  op_t m = ((op_t)-1 / 0xff) * 0x7f;
+  return ~(((x & m) + m) | x | m);
+}
+
+/* With similar caveats, identify bytes that are equal between X1 and X2.  */
+
+static inline op_t
+find_eq_low (op_t x1, op_t x2)
+{
+  return find_zero_low (x1 ^ x2);
+}
+
+static inline op_t
+find_eq_all (op_t x1, op_t x2)
+{
+  return find_zero_all (x1 ^ x2);
+}
+
+/* With similar caveats, identify zero bytes in X1 and bytes that are
+   equal between in X1 and X2.  */
+
+static inline op_t
+find_zero_eq_low (op_t x1, op_t x2)
+{
+  op_t lsb = (op_t)-1 / 0xff;
+  op_t msb = lsb << (CHAR_BIT - 1);
+  op_t eq = x1 ^ x2;
+  return (((x1 - lsb) & ~x1) | ((eq - lsb) & ~eq)) & msb;
+}
+
+static inline op_t
+find_zero_eq_all (op_t x1, op_t x2)
+{
+  op_t m = ((op_t)-1 / 0xff) * 0x7f;
+  op_t eq = x1 ^ x2;
+  op_t c1 = ((x1 & m) + m) | x1;
+  op_t c2 = ((eq & m) + m) | eq;
+  return ~((c1 & c2) | m);
+}
+
+/* With similar caveats, identify zero bytes in X1 and bytes that are
+   not equal between in X1 and X2.  */
+
+static inline op_t
+find_zero_ne_low (op_t x1, op_t x2)
+{
+  op_t m = ((op_t)-1 / 0xff) * 0x7f;
+  op_t eq = x1 ^ x2;
+  op_t nz1 = (x1 + m) | x1;	/* msb set if byte not zero */
+  op_t ne2 = (eq + m) | eq;	/* msb set if byte not equal */
+  return (ne2 | ~nz1) & ~m;	/* msb set if x1 zero or x2 not equal */
+}
+
+static inline op_t
+find_zero_ne_all (op_t x1, op_t x2)
+{
+  op_t m = ((op_t)-1 / 0xff) * 0x7f;
+  op_t eq = x1 ^ x2;
+  op_t nz1 = ((x1 & m) + m) | x1;
+  op_t ne2 = ((eq & m) + m) | eq;
+  return (ne2 | ~nz1) & ~m;
+}
+
+#endif /* STRING_FZA_H */
diff --git a/sysdeps/generic/string-fzb.h b/sysdeps/generic/string-fzb.h
new file mode 100644
index 0000000..e0fc26f
--- /dev/null
+++ b/sysdeps/generic/string-fzb.h
@@ -0,0 +1,49 @@
+/* string-fzb.h -- zero byte detection, boolean.  Generic C version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZB_H
+#define STRING_FZB_H 1
+
+#include <endian.h>
+#include <string-fza.h>
+
+/* Determine if any byte within X is zero.  This is a pure boolean test.  */
+
+static inline _Bool
+has_zero (op_t x)
+{
+  return find_zero_low (x) != 0;
+}
+
+/* Likewise, but for byte equality between X1 and X2.  */
+
+static inline _Bool
+has_eq (op_t x1, op_t x2)
+{
+  return find_eq_low (x1, x2) != 0;
+}
+
+/* Likewise, but for zeros in X1 and equal bytes between X1 and X2.  */
+
+static inline _Bool
+has_zero_eq (op_t x1, op_t x2)
+{
+  return find_zero_eq_low (x1, x2);
+}
+
+#endif /* STRING_FZB_H */
diff --git a/sysdeps/generic/string-fzi.h b/sysdeps/generic/string-fzi.h
new file mode 100644
index 0000000..ea2408f
--- /dev/null
+++ b/sysdeps/generic/string-fzi.h
@@ -0,0 +1,146 @@
+/* string-fzi.h -- zero byte detection; indexes.  Generic C version.
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef STRING_FZI_H
+#define STRING_FZI_H 1
+
+#include <limits.h>
+#include <endian.h>
+#include <string-fza.h>
+
+/* A subroutine for the index_zero functions.  Given a test word C, return
+   the (memory order) index of the first byte (in memory order) that is
+   non-zero.  */
+
+static inline unsigned int
+index_first_ (op_t c)
+{
+  _Static_assert (sizeof (op_t) == sizeof (long)
+		  || sizeof (op_t) == sizeof (long long),
+		  "Unhandled word size");
+
+  unsigned r;
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    {
+      if (sizeof (op_t) == sizeof (long))
+	r = __builtin_ctzl (c);
+      else
+	r = __builtin_ctzll (c);
+    }
+  else
+    {
+      if (sizeof (op_t) == sizeof (long))
+	r = __builtin_clzl (c);
+      else
+	r = __builtin_clzll (c);
+    }
+  return r / CHAR_BIT;
+}
+
+/* Similarly, but return the (memory order) index of the last byte
+   that is non-zero.  */
+
+static inline unsigned int
+index_last_ (op_t c)
+{
+  _Static_assert (sizeof (op_t) == sizeof (long)
+		  || sizeof (op_t) == sizeof (long long),
+		  "Unhandled word size");
+
+  unsigned r;
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    {
+      if (sizeof (op_t) == sizeof (long))
+	r = __builtin_clzl (c);
+      else
+	r = __builtin_clzll (c);
+    }
+  else
+    {
+      if (sizeof (op_t) == sizeof (long))
+	r = __builtin_ctzl (c);
+      else
+	r = __builtin_ctzll (c);
+    }
+  return sizeof (op_t) - 1 - (r / CHAR_BIT);
+}
+
+/* Given a word X that is known to contain a zero byte, return the
+   index of the first such within the word in memory order.  */
+
+static inline unsigned int
+index_first_zero (op_t x)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x = find_zero_low (x);
+  else
+    x = find_zero_all (x);
+  return index_first_ (x);
+}
+
+/* Similarly, but perform the search for byte equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_eq (op_t x1, op_t x2)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x1 = find_eq_low (x1, x2);
+  else
+    x1 = find_eq_all (x1, x2);
+  return index_first_ (x1);
+}
+
+/* Similarly, but perform the search for zero within X1 or
+   equality between X1 and X2.  */
+
+static inline unsigned int
+index_first_zero_eq (op_t x1, op_t x2)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x1 = find_zero_eq_low (x1, x2);
+  else
+    x1 = find_zero_eq_all (x1, x2);
+  return index_first_ (x1);
+}
+
+/* Similarly, but perform the search for zero within X1 or
+   inequality between X1 and X2.  */
+
+static inline unsigned int
+index_first_zero_ne (op_t x1, op_t x2)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x1 = find_zero_ne_low (x1, x2);
+  else
+    x1 = find_zero_ne_all (x1, x2);
+  return index_first_ (x1);
+}
+
+/* Similarly, but search for the last zero within X.  */
+
+static inline unsigned int
+index_last_zero (op_t x)
+{
+  if (__BYTE_ORDER == __LITTLE_ENDIAN)
+    x = find_zero_all (x);
+  else
+    x = find_zero_low (x);
+  return index_last_ (x);
+}
+
+#endif /* STRING_FZI_H */