From patchwork Wed Dec 21 23:05:58 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 18623 Received: (qmail 51516 invoked by alias); 21 Dec 2016 23:06:22 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 51240 invoked by uid 89); 21 Dec 2016 23:06:20 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.6 required=5.0 tests=BAYES_00, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=no version=3.3.2 spammy= X-HELO: mail-pg0-f66.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:sender:from:to:subject:date:message-id :in-reply-to:references; bh=uptzXWwbElUzowtO2sudzZkSXzgXwkKLZz2NwvvxEwM=; b=EUV9L2pEg9NfHBUV/YdaDOeN+srn8QohxmMguCLp9pWFkXdHc5rcbyITKB8L8tX89T 7CdAXTMgHSsZaNp624o3z1X+NC5QjIGumZbcJFkhJ1LVGhTI1avRNV+zYCBunE1IZAUy /WefIdu+y6Z43aivalVVTN2exeuMjklV75TnsTp5j6U20GNomqMQKRUxG+CafwNYsDSz VKEFsYrWs6E3TuLrlLQRvWaxf/sbqshdTomX1674FWP4n040B83AC9W4/0q72WLvM06e WtYA9XG17QB1cgeL6XdBOvJ489Jfiw82fDZgY7OmfAGH9YOBimW+xU4z+OfU71sy2tck mx3A== X-Gm-Message-State: AIkVDXKBmbmMJlMFIM/qzMarWDX0qKCNGf5dAh7tuTrNsC2kcaM6qUDzzK29nNucoNjfdw== X-Received: by 10.84.150.129 with SMTP id h1mr13202620plh.3.1482361574955; Wed, 21 Dec 2016 15:06:14 -0800 (PST) From: Richard Henderson To: libc-alpha@sourceware.org Subject: [PATCH v2 09/16] Improve generic strnlen Date: Wed, 21 Dec 2016 15:05:58 -0800 Message-Id: <20161221230605.28638-10-rth@twiddle.net> In-Reply-To: <20161221230605.28638-1-rth@twiddle.net> References: <20161221230605.28638-1-rth@twiddle.net> [BZ #5806] * string/strnlen.c: Rewrite in terms of __memchr. --- string/strnlen.c | 133 +++---------------------------------------------------- 1 file changed, 5 insertions(+), 128 deletions(-) diff --git a/string/strnlen.c b/string/strnlen.c index b2b0664..d01f9c4 100644 --- a/string/strnlen.c +++ b/string/strnlen.c @@ -21,7 +21,6 @@ not, see . */ #include -#include /* Find the length of S, but scan at most MAXLEN characters. If no '\0' terminator is found in that many characters, return MAXLEN. */ @@ -31,136 +30,14 @@ #endif size_t -__strnlen (const char *str, size_t maxlen) +__strnlen (const char *s, size_t maxlen) { - const char *char_ptr, *end_ptr = str + maxlen; - const unsigned long int *longword_ptr; - unsigned long int longword, himagic, lomagic; - - if (maxlen == 0) - return 0; - - if (__glibc_unlikely (end_ptr < str)) - end_ptr = (const char *) ~0UL; - - /* Handle the first few characters by reading one character at a time. - Do this until CHAR_PTR is aligned on a longword boundary. */ - for (char_ptr = str; ((unsigned long int) char_ptr - & (sizeof (longword) - 1)) != 0; - ++char_ptr) - if (*char_ptr == '\0') - { - if (char_ptr > end_ptr) - char_ptr = end_ptr; - return char_ptr - str; - } - - /* All these elucidatory comments refer to 4-byte longwords, - but the theory applies equally well to 8-byte longwords. */ - - longword_ptr = (unsigned long int *) char_ptr; - - /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits - the "holes." Note that there is a hole just to the left of - each byte, with an extra at the end: - - bits: 01111110 11111110 11111110 11111111 - bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD - - The 1-bits make sure that carries propagate to the next 0-bit. - The 0-bits provide holes for carries to fall into. */ - himagic = 0x80808080L; - lomagic = 0x01010101L; - if (sizeof (longword) > 4) - { - /* 64-bit version of the magic. */ - /* Do the shift in two steps to avoid a warning if long has 32 bits. */ - himagic = ((himagic << 16) << 16) | himagic; - lomagic = ((lomagic << 16) << 16) | lomagic; - } - if (sizeof (longword) > 8) - abort (); - - /* Instead of the traditional loop which tests each character, - we will test a longword at a time. The tricky part is testing - if *any of the four* bytes in the longword in question are zero. */ - while (longword_ptr < (unsigned long int *) end_ptr) - { - /* We tentatively exit the loop if adding MAGIC_BITS to - LONGWORD fails to change any of the hole bits of LONGWORD. - - 1) Is this safe? Will it catch all the zero bytes? - Suppose there is a byte with all zeros. Any carry bits - propagating from its left will fall into the hole at its - least significant bit and stop. Since there will be no - carry from its most significant bit, the LSB of the - byte to the left will be unchanged, and the zero will be - detected. - - 2) Is this worthwhile? Will it ignore everything except - zero bytes? Suppose every byte of LONGWORD has a bit set - somewhere. There will be a carry into bit 8. If bit 8 - is set, this will carry into bit 16. If bit 8 is clear, - one of bits 9-15 must be set, so there will be a carry - into bit 16. Similarly, there will be a carry into bit - 24. If one of bits 24-30 is set, there will be a carry - into bit 31, so all of the hole bits will be changed. - - The one misfire occurs when bits 24-30 are clear and bit - 31 is set; in this case, the hole at bit 31 is not - changed. If we had access to the processor carry flag, - we could close this loophole by putting the fourth hole - at bit 32! - - So it ignores everything except 128's, when they're aligned - properly. */ - - longword = *longword_ptr++; - - if ((longword - lomagic) & himagic) - { - /* Which of the bytes was the zero? If none of them were, it was - a misfire; continue the search. */ - - const char *cp = (const char *) (longword_ptr - 1); - - char_ptr = cp; - if (cp[0] == 0) - break; - char_ptr = cp + 1; - if (cp[1] == 0) - break; - char_ptr = cp + 2; - if (cp[2] == 0) - break; - char_ptr = cp + 3; - if (cp[3] == 0) - break; - if (sizeof (longword) > 4) - { - char_ptr = cp + 4; - if (cp[4] == 0) - break; - char_ptr = cp + 5; - if (cp[5] == 0) - break; - char_ptr = cp + 6; - if (cp[6] == 0) - break; - char_ptr = cp + 7; - if (cp[7] == 0) - break; - } - } - char_ptr = end_ptr; - } - - if (char_ptr > end_ptr) - char_ptr = end_ptr; - return char_ptr - str; + const char *found = memchr (s, '\0', maxlen); + return found ? found - s : maxlen; } + #ifndef STRNLEN -libc_hidden_def (__strnlen) weak_alias (__strnlen, strnlen) +libc_hidden_def (__strnlen) #endif libc_hidden_def (strnlen)