From patchwork Sat Dec 17 06:57:19 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 18532 Received: (qmail 93468 invoked by alias); 17 Dec 2016 06:57:54 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 92943 invoked by uid 89); 17 Dec 2016 06:57:51 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.6 required=5.0 tests=BAYES_00, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=no version=3.3.2 spammy=tailored, simultaneously, holes X-HELO: mail-pf0-f194.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:sender:from:to:subject:date:message-id :in-reply-to:references; bh=uwvChCzhK11Ox7uuvldEabLUJiee2yhctP2R+pQJNPM=; b=pnIaGV2HZonHtZGzxLzXcAF81WKIBZUK0dsMJatkCIh/LwGvZ8Ux2LOkg1QPAY5xcS IisySXdMXdscQoPiXMtLcFgw5vkgU0nzL6hh6VJTOOe/EvGtLKHps9pi0sjW88kl/c8u 3ClxVbhCRewrs3HaAg8MGp5EUN4eId1F5MRuqwnXLcduPQY2UR8O84dEVl4uh9jfO+52 y39xqu2LffjOnqNOshdEhLRoAEJqCUt2fefxjsYD5FF67o70UGyVYWDlrEtTBpDS93JW S8KDNhDEIGg6UE4Czotd0RDOAGdjFtNU0PMzfUk3F4up7BeHKXmpDG1FtWL8ZjQFJZ0h pWDg== X-Gm-Message-State: AKaTC03YmMyeqHeYw0YyneRxx67fULe6wufyVNKWa9H1ji5aycLn6TD8chRng6Xd6fbGQA== X-Received: by 10.99.152.69 with SMTP id l5mr11314507pgo.97.1481957859284; Fri, 16 Dec 2016 22:57:39 -0800 (PST) From: Richard Henderson To: libc-alpha@sourceware.org Subject: [PATCH 01/11] Improve generic strlen Date: Fri, 16 Dec 2016 22:57:19 -0800 Message-Id: <20161217065729.28561-2-rth@twiddle.net> In-Reply-To: <20161217065729.28561-1-rth@twiddle.net> References: <20161217065729.28561-1-rth@twiddle.net> Extract haszero and whichzero tests into headers that can be tailored for the architecture. * sysdeps/generic/haszero.h: New file. * sysdeps/generic/whichzero.h: New file. * sysdeps/generic/extractbyte.h: New file. * string/strlen.c: Use them. --- string/strlen.c | 80 +++++++++---------------------------------- sysdeps/generic/extractbyte.h | 34 ++++++++++++++++++ sysdeps/generic/haszero.h | 42 +++++++++++++++++++++++ sysdeps/generic/whichzero.h | 67 ++++++++++++++++++++++++++++++++++++ 4 files changed, 160 insertions(+), 63 deletions(-) create mode 100644 sysdeps/generic/extractbyte.h create mode 100644 sysdeps/generic/haszero.h create mode 100644 sysdeps/generic/whichzero.h diff --git a/string/strlen.c b/string/strlen.c index 4943ce2..f2a53a6 100644 --- a/string/strlen.c +++ b/string/strlen.c @@ -20,6 +20,9 @@ #include #include +#include +#include +#include #undef strlen @@ -32,78 +35,29 @@ size_t STRLEN (const char *str) { - const char *char_ptr; + const char *char_ptr = str; const unsigned long int *longword_ptr; - unsigned long int longword, himagic, lomagic; + unsigned long int longword; + uintptr_t i, align; /* Handle the first few characters by reading one character at a time. Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = str; ((unsigned long int) char_ptr - & (sizeof (longword) - 1)) != 0; - ++char_ptr) + align = -(uintptr_t)char_ptr % sizeof(longword); + for (i = 0; i < align; ++i, ++char_ptr) if (*char_ptr == '\0') - return char_ptr - str; + goto found; - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to 8-byte longwords. */ - - longword_ptr = (unsigned long int *) char_ptr; - - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits - the "holes." Note that there is a hole just to the left of - each byte, with an extra at the end: - - bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD - - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. */ - himagic = 0x80808080L; - lomagic = 0x01010101L; - if (sizeof (longword) > 4) - { - /* 64-bit version of the magic. */ - /* Do the shift in two steps to avoid a warning if long has 32 bits. */ - himagic = ((himagic << 16) << 16) | himagic; - lomagic = ((lomagic << 16) << 16) | lomagic; - } - if (sizeof (longword) > 8) - abort (); - - /* Instead of the traditional loop which tests each character, - we will test a longword at a time. The tricky part is testing - if *any of the four* bytes in the longword in question are zero. */ - for (;;) + longword_ptr = (const unsigned long int *) char_ptr; + do { longword = *longword_ptr++; + } + while (!haszero(longword)); - if (((longword - lomagic) & ~longword & himagic) != 0) - { - /* Which of the bytes was the zero? If none of them were, it was - a misfire; continue the search. */ - - const char *cp = (const char *) (longword_ptr - 1); + char_ptr = (const char *) (longword_ptr - 1); + char_ptr += whichzero(longword); - if (cp[0] == 0) - return cp - str; - if (cp[1] == 0) - return cp - str + 1; - if (cp[2] == 0) - return cp - str + 2; - if (cp[3] == 0) - return cp - str + 3; - if (sizeof (longword) > 4) - { - if (cp[4] == 0) - return cp - str + 4; - if (cp[5] == 0) - return cp - str + 5; - if (cp[6] == 0) - return cp - str + 6; - if (cp[7] == 0) - return cp - str + 7; - } - } - } + found: + return char_ptr - str; } libc_hidden_builtin_def (strlen) diff --git a/sysdeps/generic/extractbyte.h b/sysdeps/generic/extractbyte.h new file mode 100644 index 0000000..ace2cda --- /dev/null +++ b/sysdeps/generic/extractbyte.h @@ -0,0 +1,34 @@ +/* extractbyte.h -- function memory order byte extract. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef EXTRACTBYTE_H +#define EXTRACTBYTE_H 1 + +#include +#include + +static inline unsigned char +extractbyte(unsigned long int x, unsigned idx) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + return x >> (idx * CHAR_BIT); + else + return x >> (sizeof(x) - 1 - idx) * CHAR_BIT; +} + +#endif /* EXTRACTBYTE_H */ diff --git a/sysdeps/generic/haszero.h b/sysdeps/generic/haszero.h new file mode 100644 index 0000000..084741c --- /dev/null +++ b/sysdeps/generic/haszero.h @@ -0,0 +1,42 @@ +/* haszero.h -- function for zero byte detection. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef HASZERO_H +#define HASZERO_H 1 + +/* See https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord */ + +static inline unsigned long int +haszero(unsigned long int x) +{ + unsigned long int ones = -1UL / 0xff; + unsigned long int signs = ones * 0x80; + return (x - ones) & ~x & signs; +} + +/* Likewise, but for two words simultaneously. */ + +static inline unsigned long int +haszero2(unsigned long int x1, unsigned long int x2) +{ + unsigned long int ones = -1UL / 0xff; + unsigned long int signs = ones * 0x80; + return (((x1 - ones) & ~x1) | ((x2 - ones) & ~x2)) & signs; +} + +#endif /* haszero.h */ diff --git a/sysdeps/generic/whichzero.h b/sysdeps/generic/whichzero.h new file mode 100644 index 0000000..346b09a --- /dev/null +++ b/sysdeps/generic/whichzero.h @@ -0,0 +1,67 @@ +/* whichzero.h -- functions for zero byte searching. Generic C version. + Copyright (C) 2016 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef WHICHZERO_H +#define WHICHZERO_H 1 + +#include + +/* A subroutine for the whichzero and whichzero2 functions. Given a + test word C, return the index of the first byte in memory order + that contains 0x80 (all other bits will be zero). */ + +static inline unsigned int +whichzero_ffs(unsigned long int c) +{ + if (__BYTE_ORDER == __LITTLE_ENDIAN) + return __builtin_ctzl (c) / 8u; + else + return __builtin_clzl (c) / 8u; +} + +/* Given a long that is known to contain a zero byte, return the + index of the first such within the long in host memory order. */ + +static inline unsigned int +whichzero(unsigned long int x) +{ + /* For each byte, find not-zero by + (0) And 0x7f so that we cannot carry between bytes, + (1) Add 0x7f so that we carry into 0x80, + (2) Or in the original byte which might have contained 0x80. + Then invert and mask such that 0x80 is set iff the byte was zero. */ + unsigned long int m = (-1UL / 0xff) * 0x7f; + unsigned long int c = ~(((x & m) + m) | x | m); + + return whichzero_ffs(c); +} + +/* Similarly, but perform the test for two longs simultaneously. */ + +static inline unsigned int +whichzero2(unsigned long int x1, unsigned long int x2) +{ + unsigned long int m = (-1UL / 0xff) * 0x7f; + unsigned long int c1 = ((x1 & m) + m) | x1; + unsigned long int c2 = ((x2 & m) + m) | x2; + unsigned long int c = ~((c1 & c2) | m); + + return whichzero_ffs(c); +} + +#endif /* whichzero.h */