From patchwork Wed Jan 10 12:47:56 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adhemerval Zanella X-Patchwork-Id: 25311 Received: (qmail 130119 invoked by alias); 10 Jan 2018 12:48:43 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 125944 invoked by uid 89); 10 Jan 2018 12:48:36 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-25.9 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy= X-HELO: mail-qt0-f194.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=6KhtS4DC+QtoOJU9OLLhmLgWpCcCuAvgWYSgiJ6sNEk=; b=oXMbIsyN8f2kAQ5fHsZR0B3VpixaLlWUhqvWrqURPkN8DFva2tGoS5nybTDJp3ZdEs nhaE25ovrsLzQ2/6axDhpMuj0iDCASk0yTkI6zce1UgWjrn7vv/EEXc7oqENnafKBN2j 6mm+OZn4HI6pSgP1RyiQCwXJbW8n8VkyxtH1CnoOgSUYFqpLANAL9qR4mj/Q4Y37zC+Q pxbL+84MmmZxUIrUAJpkWVq/ErT3gAWcqLg6u6GTNQDT+IyD2WKwdYNB5L3bUAvZxJDN Isls3kVKTFCzMA43xIpnGa6I6K2V+JBAQlPK5ajVtTfW76cibB2HUu9PP6M6paEuFZfe l++w== X-Gm-Message-State: AKwxytcxg1/D1Lokoz4Espb8hFHQfb+t43gqVjmVygizh+sr/EfyCh/o Ya4h17yuyL1HGa4vPpbss+97k08rdc4= X-Google-Smtp-Source: ACJfBovsJ/nRth+F6d/MYk2q4vijwE+XmRsXsmzIXf6DPILxd99BPTL32BEFrvYPy8v/1Dfjy58Xjw== X-Received: by 10.200.37.119 with SMTP id 52mr26911565qtn.270.1515588507648; Wed, 10 Jan 2018 04:48:27 -0800 (PST) From: Adhemerval Zanella To: libc-alpha@sourceware.org Cc: Adhemerval Zanella Subject: [PATCH v3 12/18] string: Improve generic strcpy Date: Wed, 10 Jan 2018 10:47:56 -0200 Message-Id: <1515588482-15744-13-git-send-email-adhemerval.zanella@linaro.org> In-Reply-To: <1515588482-15744-1-git-send-email-adhemerval.zanella@linaro.org> References: <1515588482-15744-1-git-send-email-adhemerval.zanella@linaro.org> From: Adhemerval Zanella New generic implementation tries to use word operations along with the new string-fz{b,i} functions even for inputs with different alignments (with still uses aligned access plus merge operation to get a correct word by word comparison). Checked on x86_64-linux-gnu, i686-linux-gnu, sparc64-linux-gnu, and sparcv9-linux-gnu by removing the arch-specific assembly implementation and disabling multi-arch (it covers both LE and BE for 64 and 32 bits). Richard Henderson Adhemerval Zanella * string/strcpy.c: Rewrite using memcopy.h, string-fzb.h, string-fzi.h. * string/test-strcpy.c (test_main): Add move coverage. --- string/strcpy.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++- string/test-strcpy.c | 24 +++++++++++- 2 files changed, 130 insertions(+), 3 deletions(-) diff --git a/string/strcpy.c b/string/strcpy.c index a4cce89..358b1b1 100644 --- a/string/strcpy.c +++ b/string/strcpy.c @@ -15,8 +15,13 @@ License along with the GNU C Library; if not, see . */ -#include #include +#include +#include +#include +#include +#include +#include #undef strcpy @@ -28,6 +33,106 @@ char * STRCPY (char *dest, const char *src) { - return memcpy (dest, src, strlen (src) + 1); + char *dst = dest; + const op_t *xs; + op_t *xd; + op_t ws; + +#if _STRING_ARCH_unaligned + /* For architectures which supports unaligned memory operations, it first + aligns the source pointer, reads op_t bytes at time until a zero is + found, and writes unaligned to destination. */ + uintptr_t n = -(uintptr_t) src % sizeof (op_t); + for (uintptr_t i = 0; i < n; ++i) + { + unsigned c = *src++; + *dst++ = c; + if (c == '\0') + return dest; + } + xs = (const op_t *) src; + ws = *xs++; + xd = (op_t *) dst; + while (!has_zero (ws)) + { + *xd++ = ws; + ws = *xs++; + } +#else + /* For architectures which only supports aligned accesses, it first align + the destination pointer. */ + uintptr_t n = -(uintptr_t) dst % sizeof (op_t); + for (uintptr_t i = 0; i < n; ++i) + { + unsigned c = *src++; + *dst++ = c; + if (c == '\0') + return dest; + } + xd = (op_t *) dst; + + /* Destination is aligned to op_t while source might be not. */ + uintptr_t ofs = (uintptr_t) src % sizeof (op_t); + if (ofs == 0) + { + /* Aligned loop. If a zero is found, exit to copy the remaining + bytes. */ + xs = (const op_t *) src; + + ws = *xs++; + while (!has_zero (ws)) + { + *xd++ = ws; + ws = *xs++; + } + } + else + { + /* Unaligned loop: align the source pointer and mask off the + undesirable bytes which is not part of the string. */ + op_t wsa, wsb; + uintptr_t sh_1, sh_2; + + xs = (const op_t *)(src - ofs); + wsa = *xs++; + sh_1 = ofs * CHAR_BIT; + sh_2 = sizeof(op_t) * CHAR_BIT - sh_1; + + /* Align the first partial op_t from source, with 0xff for the rest + of the bytes so that we can also apply the has_zero test to see if we + have already reached EOS. If we have, then we can simply fall + through to the final byte copies. */ + ws = MERGE (wsa, sh_1, (op_t)-1, sh_2); + if (!has_zero (ws)) + { + while (1) + { + wsb = *xs++; + ws = MERGE (wsa, sh_1, wsb, sh_2); + if (has_zero (wsb)) + break; + *xd++ = ws; + wsa = wsb; + } + + /* WS may contain bytes that we not written yet in destination. + Write them down and merge with the op_t containing the EOS + byte. */ + if (!has_zero (ws)) + { + *xd++ = ws; + ws = MERGE (wsb, sh_1, ws, sh_2); + } + } + } +#endif + + /* Just copy the final bytes from op_t. */ + dst = (char *) xd; + uintptr_t fz = index_first_zero (ws); + for (uintptr_t i = 0; i < fz + 1; i++) + *dst++ = extractbyte (ws, i); + + return dest; } libc_hidden_builtin_def (strcpy) diff --git a/string/test-strcpy.c b/string/test-strcpy.c index 2a1bf93..fa03c73 100644 --- a/string/test-strcpy.c +++ b/string/test-strcpy.c @@ -207,7 +207,7 @@ do_random_tests (void) int test_main (void) { - size_t i; + size_t i, j; test_init (); @@ -222,12 +222,26 @@ test_main (void) do_test (0, 0, i, BIG_CHAR); do_test (0, i, i, SMALL_CHAR); do_test (i, 0, i, BIG_CHAR); + + for (j = 1; j < 16; ++j) + { + do_test (0, 0, i + j, SMALL_CHAR); + do_test (0, 0, i + j, BIG_CHAR); + do_test (0, i, i + j, SMALL_CHAR); + do_test (i, 0, i + j, BIG_CHAR); + } } for (i = 1; i < 8; ++i) { do_test (0, 0, 8 << i, SMALL_CHAR); do_test (8 - i, 2 * i, 8 << i, SMALL_CHAR); + + for (j = 1; j < 8; ++j) + { + do_test (0, 0, (8 << i) + j, SMALL_CHAR); + do_test (8 - i, 2 * i, (8 << i) + j, SMALL_CHAR); + } } for (i = 1; i < 8; ++i) @@ -236,6 +250,14 @@ test_main (void) do_test (2 * i, i, 8 << i, BIG_CHAR); do_test (i, i, 8 << i, SMALL_CHAR); do_test (i, i, 8 << i, BIG_CHAR); + + for (j = 1; j < 8; ++j) + { + do_test (i, 2 * i, (8 << i) + j, SMALL_CHAR); + do_test (2 * i, i, (8 << i) + j, BIG_CHAR); + do_test (i, i, (8 << i) + j, SMALL_CHAR); + do_test (i, i, (8 << i) + j, BIG_CHAR); + } } do_random_tests ();