From patchwork Wed Jun 14 18:30:41 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Palmer Dabbelt X-Patchwork-Id: 21015 Received: (qmail 97871 invoked by alias); 14 Jun 2017 18:33:13 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 97721 invoked by uid 89); 14 Jun 2017 18:33:12 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-26.0 required=5.0 tests=AWL, BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, RCVD_IN_DNSWL_NONE, SPF_PASS autolearn=ham version=3.3.2 spammy=msk, *la, dozen, i1 X-HELO: mail-pg0-f46.google.com X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:cc:cc:cc:subject:date:message-id :in-reply-to:references; bh=qypVEC1FZt9p92tP+lXQqor60CUN+OxycrXrjtjEzvQ=; b=B5vtFbVLDas9Q0EeO2jpExTLVHdhUteSpCT5nLRR3GQ0gypx+w5nMr4/XUVY7kaOZy kmzdDyreMD0GagbIQx82FyKN/wXdqNTxqfqgtSnZHxZl+SDX8TKS7It1Tb0rK3YJa+/x VjoFPtXVnbxMqFiQpsPyH/Q8NfBKUKHtbE6sjKWkCDNDnK5gn1oFKXk5KBCoWmpd2v6m AOey0jwpSV0mn4v+Ro/rawOpAGoBo8AYpP/1WOojKje2yc15p2krkSAFyQPu6FYRsP1P cO2Qd6sDgwwd9Z2GJQVYlqueUgU5C+R8p6zRwXGemx1Hf86pL7x3H0xQR4fKot5uEYo2 +KsQ== X-Gm-Message-State: AKS2vOxgazztauDuqmbzIVxqCQ5wBlw15DGINwVxm7ULV+QPioqwdBv+ 8blhsgsXqoM1z4SGyHMkew== X-Received: by 10.84.218.134 with SMTP id r6mr1535373pli.288.1497465191210; Wed, 14 Jun 2017 11:33:11 -0700 (PDT) From: Palmer Dabbelt To: libc-alpha@sourceware.org Cc: Andrew Waterman Cc: patches@groups.riscv.org Cc: Darius Rad Cc: Palmer Dabbelt Subject: [PATCH 05/12] RISC-V: Generic Routines Date: Wed, 14 Jun 2017 11:30:41 -0700 Message-Id: <20170614183048.11040-6-palmer@dabbelt.com> In-Reply-To: <20170614183048.11040-1-palmer@dabbelt.com> References: <20170614183048.11040-1-palmer@dabbelt.com> This patch contains fast versions of the various routines from string.h that have been implemented for RISC-V. Since RISC-V doesn't define any specific performance characteristics they're not optimized for any particular microarchitecture, but are designed to be generally good. --- sysdeps/riscv/bits/string.h | 25 +++++++ sysdeps/riscv/memcpy.c | 74 +++++++++++++++++++++ sysdeps/riscv/memset.S | 107 +++++++++++++++++++++++++++++ sysdeps/riscv/strcmp.S | 159 ++++++++++++++++++++++++++++++++++++++++++++ sysdeps/riscv/strcpy.c | 54 +++++++++++++++ sysdeps/riscv/strlen.c | 39 +++++++++++ 6 files changed, 458 insertions(+) create mode 100644 sysdeps/riscv/bits/string.h create mode 100644 sysdeps/riscv/memcpy.c create mode 100644 sysdeps/riscv/memset.S create mode 100644 sysdeps/riscv/strcmp.S create mode 100644 sysdeps/riscv/strcpy.c create mode 100644 sysdeps/riscv/strlen.c diff --git a/sysdeps/riscv/bits/string.h b/sysdeps/riscv/bits/string.h new file mode 100644 index 0000000000..8160b8cc77 --- /dev/null +++ b/sysdeps/riscv/bits/string.h @@ -0,0 +1,25 @@ +/* This file should provide inline versions of string functions. + + Surround GCC-specific parts with #ifdef __GNUC__, and use `__extern_inline'. + + This file should define __STRING_INLINES if functions are actually defined + as inlines. */ + +#ifndef _BITS_STRING_H +#define _BITS_STRING_H 1 + +#define _STRING_INLINE_unaligned 0 + +#if defined(__GNUC__) && !defined(__cplusplus) + +static __inline__ unsigned long __libc_detect_null(unsigned long w) +{ + unsigned long mask = 0x7f7f7f7f; + if (sizeof(long) == 8) + mask = ((mask << 16) << 16) | mask; + return ~(((w & mask) + mask) | w | mask); +} + +#endif /* __GNUC__ && !__cplusplus */ + +#endif /* bits/string.h */ diff --git a/sysdeps/riscv/memcpy.c b/sysdeps/riscv/memcpy.c new file mode 100644 index 0000000000..f816a54b9b --- /dev/null +++ b/sysdeps/riscv/memcpy.c @@ -0,0 +1,74 @@ +#include +#include + +#define MEMCPY_LOOP_BODY(a, b, t) { \ + t tt = *b; \ + a++, b++; \ + *(a - 1) = tt; \ + } + +void *__memcpy(void *aa, const void *bb, size_t n) +{ + uintptr_t msk = sizeof(long) - 1; + char *a = (char *)aa, *end = a + n; + const char *b = (const char *)bb; + long *la, *lend; + const long *lb; + int same_alignment = ((uintptr_t)a & msk) == ((uintptr_t)b & msk); + + /* Handle small cases, and those without mutual alignment. */ + if (__glibc_unlikely(!same_alignment || n < sizeof(long))) + { +small: + while (a < end) + MEMCPY_LOOP_BODY(a, b, char); + return aa; + } + + /* Obtain alignment. */ + if (__glibc_unlikely(((uintptr_t)a & msk) != 0)) + while ((uintptr_t)a & msk) + MEMCPY_LOOP_BODY(a, b, char); + + la = (long *)a; + lb = (const long *)b; + lend = (long *)((uintptr_t)end & ~msk); + + /* Handle large, aligned cases. */ + if (__glibc_unlikely(la < lend - 8)) + while (la < lend - 8) + { + long b0 = *lb++; + long b1 = *lb++; + long b2 = *lb++; + long b3 = *lb++; + long b4 = *lb++; + long b5 = *lb++; + long b6 = *lb++; + long b7 = *lb++; + long b8 = *lb++; + *la++ = b0; + *la++ = b1; + *la++ = b2; + *la++ = b3; + *la++ = b4; + *la++ = b5; + *la++ = b6; + *la++ = b7; + *la++ = b8; + } + + /* Handle aligned, small case. */ + while (la < lend) + MEMCPY_LOOP_BODY(la, lb, long); + + /* Handle misaligned remainder. */ + a = (char *)la; + b = (const char *)lb; + if (__glibc_unlikely(a < end)) + goto small; + + return aa; +} +weak_alias (__memcpy, memcpy) +libc_hidden_builtin_def (memcpy) diff --git a/sysdeps/riscv/memset.S b/sysdeps/riscv/memset.S new file mode 100644 index 0000000000..a85d72b4fc --- /dev/null +++ b/sysdeps/riscv/memset.S @@ -0,0 +1,107 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + Contributed by Andrew Waterman (andrew@sifive.com). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include + +ENTRY(memset) + li a6, 15 + mv a4, a0 + bleu a2, a6, .Ltiny + and a5, a4, 15 + bnez a5, .Lmisaligned + +.Laligned: + bnez a1, .Lwordify + +.Lwordified: + and a3, a2, ~15 + and a2, a2, 15 + add a3, a3, a4 + +#if __riscv_xlen == 64 +1:sd a1, 0(a4) + sd a1, 8(a4) +#else +1:sw a1, 0(a4) + sw a1, 4(a4) + sw a1, 8(a4) + sw a1, 12(a4) +#endif + add a4, a4, 16 + bltu a4, a3, 1b + + bnez a2, .Ltiny + ret + +.Ltiny: + sub a3, a6, a2 + sll a3, a3, 2 +1:auipc t0, %pcrel_hi(.Ltable) + add a3, a3, t0 +.option push +.option norvc +.Ltable_misaligned: + jr a3, %pcrel_lo(1b) +.Ltable: + sb a1,14(a4) + sb a1,13(a4) + sb a1,12(a4) + sb a1,11(a4) + sb a1,10(a4) + sb a1, 9(a4) + sb a1, 8(a4) + sb a1, 7(a4) + sb a1, 6(a4) + sb a1, 5(a4) + sb a1, 4(a4) + sb a1, 3(a4) + sb a1, 2(a4) + sb a1, 1(a4) + sb a1, 0(a4) +.option pop + ret + +.Lwordify: + and a1, a1, 0xFF + sll a3, a1, 8 + or a1, a1, a3 + sll a3, a1, 16 + or a1, a1, a3 +#if __riscv_xlen == 64 + sll a3, a1, 32 + or a1, a1, a3 +#endif + j .Lwordified + +.Lmisaligned: + sll a3, a5, 2 +1:auipc t0, %pcrel_hi(.Ltable_misaligned) + add a3, a3, t0 + mv t0, ra + jalr a3, %pcrel_lo(1b) + mv ra, t0 + + add a5, a5, -16 + sub a4, a4, a5 + add a2, a2, a5 + bleu a2, a6, .Ltiny + j .Laligned +END(memset) + +weak_alias(memset, __GI_memset) diff --git a/sysdeps/riscv/strcmp.S b/sysdeps/riscv/strcmp.S new file mode 100644 index 0000000000..8cd113e5d4 --- /dev/null +++ b/sysdeps/riscv/strcmp.S @@ -0,0 +1,159 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + Contributed by Andrew Waterman (andrew@sifive.com). + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library. If not, see + . */ + +#include +#include + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +# error +#endif + +ENTRY(strcmp) + or a4, a0, a1 + li t2, -1 + and a4, a4, SZREG-1 + bnez a4, .Lmisaligned + +#if SZREG == 4 + li t3, 0x7f7f7f7f +#else + ld t3, mask +#endif + + .macro check_one_word i n + REG_L a2, \i*SZREG(a0) + REG_L a3, \i*SZREG(a1) + + and t0, a2, t3 + or t1, a2, t3 + add t0, t0, t3 + or t0, t0, t1 + + bne t0, t2, .Lnull\i + .if \i+1-\n + bne a2, a3, .Lmismatch + .else + add a0, a0, \n*SZREG + add a1, a1, \n*SZREG + beq a2, a3, .Lloop + # fall through to .Lmismatch + .endif + .endm + + .macro foundnull i n + .ifne \i + .Lnull\i: + add a0, a0, \i*SZREG + add a1, a1, \i*SZREG + .ifeq \i-1 + .Lnull0: + .endif + bne a2, a3, .Lmisaligned + li a0, 0 + ret + .endif + .endm + +.Lloop: + # examine full words at a time, favoring strings of a couple dozen chars +#if __riscv_xlen == 32 + check_one_word 0 5 + check_one_word 1 5 + check_one_word 2 5 + check_one_word 3 5 + check_one_word 4 5 +#else + check_one_word 0 3 + check_one_word 1 3 + check_one_word 2 3 +#endif + # backwards branch to .Lloop contained above + +.Lmismatch: + # words don't match, but a2 has no null byte. +#if __riscv_xlen == 64 + sll a4, a2, 48 + sll a5, a3, 48 + bne a4, a5, .Lmismatch_upper + sll a4, a2, 32 + sll a5, a3, 32 + bne a4, a5, .Lmismatch_upper +#endif + sll a4, a2, 16 + sll a5, a3, 16 + bne a4, a5, .Lmismatch_upper + + srl a4, a2, 8*SZREG-16 + srl a5, a3, 8*SZREG-16 + sub a0, a4, a5 + and a1, a0, 0xff + bnez a1, 1f + ret + +.Lmismatch_upper: + srl a4, a4, 8*SZREG-16 + srl a5, a5, 8*SZREG-16 + sub a0, a4, a5 + and a1, a0, 0xff + bnez a1, 1f + ret + +1:and a4, a4, 0xff + and a5, a5, 0xff + sub a0, a4, a5 + ret + +.Lmisaligned: + # misaligned + lbu a2, 0(a0) + lbu a3, 0(a1) + add a0, a0, 1 + add a1, a1, 1 + bne a2, a3, 1f + bnez a2, .Lmisaligned + +1: + sub a0, a2, a3 + ret + + # cases in which a null byte was detected +#if __riscv_xlen == 32 + foundnull 0 5 + foundnull 1 5 + foundnull 2 5 + foundnull 3 5 + foundnull 4 5 +#else + foundnull 0 3 + foundnull 1 3 + foundnull 2 3 +#endif + +END(strcmp) + +weak_alias(strcmp, __GI_strcmp) + +#if SZREG == 8 +#ifdef __PIC__ +.section .rodata.cst8,"aM",@progbits,8 +#else +.section .srodata.cst8,"aM",@progbits,8 +#endif +.align 3 +mask: .8byte 0x7f7f7f7f7f7f7f7f +#endif diff --git a/sysdeps/riscv/strcpy.c b/sysdeps/riscv/strcpy.c new file mode 100644 index 0000000000..c01c506a55 --- /dev/null +++ b/sysdeps/riscv/strcpy.c @@ -0,0 +1,54 @@ +#include +#include + +#undef strcpy + +char* strcpy(char* dst, const char* src) +{ + char* dst0 = dst; + + int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof(long)-1); + if (__builtin_expect(!misaligned, 1)) + { + long* ldst = (long*)dst; + const long* lsrc = (const long*)src; + + while (!__libc_detect_null(*lsrc)) + *ldst++ = *lsrc++; + + dst = (char*)ldst; + src = (const char*)lsrc; + + char c0 = src[0]; + char c1 = src[1]; + char c2 = src[2]; + if (!(*dst++ = c0)) return dst0; + if (!(*dst++ = c1)) return dst0; + char c3 = src[3]; + if (!(*dst++ = c2)) return dst0; + if (sizeof(long) == 4) goto out; + char c4 = src[4]; + if (!(*dst++ = c3)) return dst0; + char c5 = src[5]; + if (!(*dst++ = c4)) return dst0; + char c6 = src[6]; + if (!(*dst++ = c5)) return dst0; + if (!(*dst++ = c6)) return dst0; + +out: + *dst++ = 0; + return dst0; + } + + char ch; + do + { + ch = *src; + src++; + dst++; + *(dst-1) = ch; + } while(ch); + + return dst0; +} +libc_hidden_def(strcpy) diff --git a/sysdeps/riscv/strlen.c b/sysdeps/riscv/strlen.c new file mode 100644 index 0000000000..049e1ebd8f --- /dev/null +++ b/sysdeps/riscv/strlen.c @@ -0,0 +1,39 @@ +#include +#include + +#undef strlen + +size_t strlen(const char* str) +{ + const char* start = str; + + if (__builtin_expect((uintptr_t)str & (sizeof(long)-1), 0)) do + { + char ch = *str; + str++; + if (!ch) + return str - start - 1; + } while ((uintptr_t)str & (sizeof(long)-1)); + + unsigned long* ls = (unsigned long*)str; + while (!__libc_detect_null(*ls++)) + ; + asm volatile ("" : "+r"(ls)); /* prevent "optimization" */ + + str = (const char*)ls; + size_t ret = str - start, sl = sizeof(long); + + char c0 = str[0-sl], c1 = str[1-sl], c2 = str[2-sl], c3 = str[3-sl]; + if (c0 == 0) return ret + 0 - sl; + if (c1 == 0) return ret + 1 - sl; + if (c2 == 0) return ret + 2 - sl; + if (sl == 4 || c3 == 0) return ret + 3 - sl; + + c0 = str[4-sl], c1 = str[5-sl], c2 = str[6-sl], c3 = str[7-sl]; + if (c0 == 0) return ret + 4 - sl; + if (c1 == 0) return ret + 5 - sl; + if (c2 == 0) return ret + 6 - sl; + + return ret + 7 - sl; +} +libc_hidden_def(strlen)