Patchwork [05/12] RISC-V: Generic <string.h> Routines

login
register
mail settings
Submitter Palmer Dabbelt
Date June 14, 2017, 6:30 p.m.
Message ID <20170614183048.11040-6-palmer@dabbelt.com>
Download mbox | patch
Permalink /patch/21015/
State New
Headers show

Comments

Palmer Dabbelt - June 14, 2017, 6:30 p.m.
This patch contains fast versions of the various routines from string.h
that have been implemented for RISC-V.  Since RISC-V doesn't define any
specific performance characteristics they're not optimized for any
particular microarchitecture, but are designed to be generally good.
---
 sysdeps/riscv/bits/string.h |  25 +++++++
 sysdeps/riscv/memcpy.c      |  74 +++++++++++++++++++++
 sysdeps/riscv/memset.S      | 107 +++++++++++++++++++++++++++++
 sysdeps/riscv/strcmp.S      | 159 ++++++++++++++++++++++++++++++++++++++++++++
 sysdeps/riscv/strcpy.c      |  54 +++++++++++++++
 sysdeps/riscv/strlen.c      |  39 +++++++++++
 6 files changed, 458 insertions(+)
 create mode 100644 sysdeps/riscv/bits/string.h
 create mode 100644 sysdeps/riscv/memcpy.c
 create mode 100644 sysdeps/riscv/memset.S
 create mode 100644 sysdeps/riscv/strcmp.S
 create mode 100644 sysdeps/riscv/strcpy.c
 create mode 100644 sysdeps/riscv/strlen.c
Joseph Myers - June 14, 2017, 8:51 p.m.
On Wed, 14 Jun 2017, Palmer Dabbelt wrote:

>  sysdeps/riscv/bits/string.h |  25 +++++++

bits/string.h is about to go away, you might wish to work relative to 
Zack's patch <https://sourceware.org/ml/libc-alpha/2017-06/msg00497.html> 
(or indeed help review it).

> diff --git a/sysdeps/riscv/bits/string.h b/sysdeps/riscv/bits/string.h
> new file mode 100644
> index 0000000000..8160b8cc77
> --- /dev/null
> +++ b/sysdeps/riscv/bits/string.h
> @@ -0,0 +1,25 @@
> +/* This file should provide inline versions of string functions.
> +
> +   Surround GCC-specific parts with #ifdef __GNUC__, and use `__extern_inline'.
> +
> +   This file should define __STRING_INLINES if functions are actually defined
> +   as inlines.  */
> +
> +#ifndef _BITS_STRING_H
> +#define _BITS_STRING_H	1
> +
> +#define _STRING_INLINE_unaligned   0
> +
> +#if defined(__GNUC__) && !defined(__cplusplus)
> +
> +static __inline__ unsigned long __libc_detect_null(unsigned long w)

That obviously should not go in a public header (and isn't namespace-clean 
anyway).

> diff --git a/sysdeps/riscv/memcpy.c b/sysdeps/riscv/memcpy.c
> new file mode 100644
> index 0000000000..f816a54b9b
> --- /dev/null
> +++ b/sysdeps/riscv/memcpy.c
> @@ -0,0 +1,74 @@

Should have copyright and license notice.

> diff --git a/sysdeps/riscv/strcpy.c b/sysdeps/riscv/strcpy.c
> new file mode 100644
> index 0000000000..c01c506a55
> --- /dev/null
> +++ b/sysdeps/riscv/strcpy.c
> @@ -0,0 +1,54 @@

Likewise.  Is there an actual advantage over the generic version based on 
memcpy and strlen?

> diff --git a/sysdeps/riscv/strlen.c b/sysdeps/riscv/strlen.c
> new file mode 100644
> index 0000000000..049e1ebd8f
> --- /dev/null
> +++ b/sysdeps/riscv/strlen.c
> @@ -0,0 +1,39 @@

Likewise.  This looks like fairly generic C.  Would RTH's generic string 
function improvements 
<https://sourceware.org/ml/libc-alpha/2016-12/msg00830.html> be helpful to 
RISC-V?  If so, helping to review them might save you from maintaining 
some architecture-specific variants in future.
Adhemerval Zanella Netto - June 14, 2017, 9:11 p.m.
On 14/06/2017 17:51, Joseph Myers wrote:
> On Wed, 14 Jun 2017, Palmer Dabbelt wrote:
> 
>>  sysdeps/riscv/bits/string.h |  25 +++++++
> 
> bits/string.h is about to go away, you might wish to work relative to 
> Zack's patch <https://sourceware.org/ml/libc-alpha/2017-06/msg00497.html> 
> (or indeed help review it).
> 
>> diff --git a/sysdeps/riscv/bits/string.h b/sysdeps/riscv/bits/string.h
>> new file mode 100644
>> index 0000000000..8160b8cc77
>> --- /dev/null
>> +++ b/sysdeps/riscv/bits/string.h
>> @@ -0,0 +1,25 @@
>> +/* This file should provide inline versions of string functions.
>> +
>> +   Surround GCC-specific parts with #ifdef __GNUC__, and use `__extern_inline'.
>> +
>> +   This file should define __STRING_INLINES if functions are actually defined
>> +   as inlines.  */
>> +
>> +#ifndef _BITS_STRING_H
>> +#define _BITS_STRING_H	1
>> +
>> +#define _STRING_INLINE_unaligned   0
>> +
>> +#if defined(__GNUC__) && !defined(__cplusplus)
>> +
>> +static __inline__ unsigned long __libc_detect_null(unsigned long w)
> 
> That obviously should not go in a public header (and isn't namespace-clean 
> anyway).
> 
>> diff --git a/sysdeps/riscv/memcpy.c b/sysdeps/riscv/memcpy.c
>> new file mode 100644
>> index 0000000000..f816a54b9b
>> --- /dev/null
>> +++ b/sysdeps/riscv/memcpy.c
>> @@ -0,0 +1,74 @@
> 
> Should have copyright and license notice.
> 
>> diff --git a/sysdeps/riscv/strcpy.c b/sysdeps/riscv/strcpy.c
>> new file mode 100644
>> index 0000000000..c01c506a55
>> --- /dev/null
>> +++ b/sysdeps/riscv/strcpy.c
>> @@ -0,0 +1,54 @@
> 
> Likewise.  Is there an actual advantage over the generic version based on 
> memcpy and strlen?
> 
>> diff --git a/sysdeps/riscv/strlen.c b/sysdeps/riscv/strlen.c
>> new file mode 100644
>> index 0000000000..049e1ebd8f
>> --- /dev/null
>> +++ b/sysdeps/riscv/strlen.c
>> @@ -0,0 +1,39 @@
> 
> Likewise.  This looks like fairly generic C.  Would RTH's generic string 
> function improvements 
> <https://sourceware.org/ml/libc-alpha/2016-12/msg00830.html> be helpful to 
> RISC-V?  If so, helping to review them might save you from maintaining 
> some architecture-specific variants in future.
> 

Btw, I plan to update RTH's patch with some fixes and changes.

Patch

diff --git a/sysdeps/riscv/bits/string.h b/sysdeps/riscv/bits/string.h
new file mode 100644
index 0000000000..8160b8cc77
--- /dev/null
+++ b/sysdeps/riscv/bits/string.h
@@ -0,0 +1,25 @@ 
+/* This file should provide inline versions of string functions.
+
+   Surround GCC-specific parts with #ifdef __GNUC__, and use `__extern_inline'.
+
+   This file should define __STRING_INLINES if functions are actually defined
+   as inlines.  */
+
+#ifndef _BITS_STRING_H
+#define _BITS_STRING_H	1
+
+#define _STRING_INLINE_unaligned   0
+
+#if defined(__GNUC__) && !defined(__cplusplus)
+
+static __inline__ unsigned long __libc_detect_null(unsigned long w)
+{
+  unsigned long mask = 0x7f7f7f7f;
+  if (sizeof(long) == 8)
+    mask = ((mask << 16) << 16) | mask;
+  return ~(((w & mask) + mask) | w | mask);
+}
+
+#endif /* __GNUC__ && !__cplusplus */
+
+#endif /* bits/string.h */
diff --git a/sysdeps/riscv/memcpy.c b/sysdeps/riscv/memcpy.c
new file mode 100644
index 0000000000..f816a54b9b
--- /dev/null
+++ b/sysdeps/riscv/memcpy.c
@@ -0,0 +1,74 @@ 
+#include <string.h>
+#include <stdint.h>
+
+#define MEMCPY_LOOP_BODY(a, b, t) {	\
+    t tt = *b;				\
+    a++, b++;				\
+    *(a - 1) = tt;			\
+  }
+
+void *__memcpy(void *aa, const void *bb, size_t n)
+{
+  uintptr_t msk = sizeof(long) - 1;
+  char *a = (char *)aa, *end = a + n;
+  const char *b = (const char *)bb;
+  long *la, *lend;
+  const long *lb;
+  int same_alignment = ((uintptr_t)a & msk) == ((uintptr_t)b & msk);
+
+  /* Handle small cases, and those without mutual alignment.  */
+  if (__glibc_unlikely(!same_alignment || n < sizeof(long)))
+    {
+small:
+      while (a < end)
+	MEMCPY_LOOP_BODY(a, b, char);
+      return aa;
+    }
+
+  /* Obtain alignment.  */
+  if (__glibc_unlikely(((uintptr_t)a & msk) != 0))
+    while ((uintptr_t)a & msk)
+      MEMCPY_LOOP_BODY(a, b, char);
+
+  la = (long *)a;
+  lb = (const long *)b;
+  lend = (long *)((uintptr_t)end & ~msk);
+
+  /* Handle large, aligned cases.  */
+  if (__glibc_unlikely(la < lend - 8))
+    while (la < lend - 8)
+      {
+	long b0 = *lb++;
+	long b1 = *lb++;
+	long b2 = *lb++;
+	long b3 = *lb++;
+	long b4 = *lb++;
+	long b5 = *lb++;
+	long b6 = *lb++;
+	long b7 = *lb++;
+	long b8 = *lb++;
+	*la++ = b0;
+	*la++ = b1;
+	*la++ = b2;
+	*la++ = b3;
+	*la++ = b4;
+	*la++ = b5;
+	*la++ = b6;
+	*la++ = b7;
+	*la++ = b8;
+      }
+
+  /* Handle aligned, small case.  */
+  while (la < lend)
+    MEMCPY_LOOP_BODY(la, lb, long);
+
+  /* Handle misaligned remainder.  */
+  a = (char *)la;
+  b = (const char *)lb;
+  if (__glibc_unlikely(a < end))
+    goto small;
+
+  return aa;
+}
+weak_alias (__memcpy, memcpy)
+libc_hidden_builtin_def (memcpy)
diff --git a/sysdeps/riscv/memset.S b/sysdeps/riscv/memset.S
new file mode 100644
index 0000000000..a85d72b4fc
--- /dev/null
+++ b/sysdeps/riscv/memset.S
@@ -0,0 +1,107 @@ 
+/* Copyright (C) 2011-2017 Free Software Foundation, Inc.
+   Contributed by Andrew Waterman (andrew@sifive.com).
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+ENTRY(memset)
+  li a6, 15
+  mv a4, a0
+  bleu a2, a6, .Ltiny
+  and a5, a4, 15
+  bnez a5, .Lmisaligned
+
+.Laligned:
+  bnez a1, .Lwordify
+
+.Lwordified:
+  and a3, a2, ~15
+  and a2, a2, 15
+  add a3, a3, a4
+
+#if __riscv_xlen == 64
+1:sd a1, 0(a4)
+  sd a1, 8(a4)
+#else
+1:sw a1, 0(a4)
+  sw a1, 4(a4)
+  sw a1, 8(a4)
+  sw a1, 12(a4)
+#endif
+  add a4, a4, 16
+  bltu a4, a3, 1b
+
+  bnez a2, .Ltiny
+  ret
+
+.Ltiny:
+  sub a3, a6, a2
+  sll a3, a3, 2
+1:auipc t0, %pcrel_hi(.Ltable)
+  add a3, a3, t0
+.option push
+.option norvc
+.Ltable_misaligned:
+  jr a3, %pcrel_lo(1b)
+.Ltable:
+  sb a1,14(a4)
+  sb a1,13(a4)
+  sb a1,12(a4)
+  sb a1,11(a4)
+  sb a1,10(a4)
+  sb a1, 9(a4)
+  sb a1, 8(a4)
+  sb a1, 7(a4)
+  sb a1, 6(a4)
+  sb a1, 5(a4)
+  sb a1, 4(a4)
+  sb a1, 3(a4)
+  sb a1, 2(a4)
+  sb a1, 1(a4)
+  sb a1, 0(a4)
+.option pop
+  ret
+
+.Lwordify:
+  and a1, a1, 0xFF
+  sll a3, a1, 8
+  or  a1, a1, a3
+  sll a3, a1, 16
+  or  a1, a1, a3
+#if __riscv_xlen == 64
+  sll a3, a1, 32
+  or  a1, a1, a3
+#endif
+  j .Lwordified
+
+.Lmisaligned:
+  sll a3, a5, 2
+1:auipc t0, %pcrel_hi(.Ltable_misaligned)
+  add a3, a3, t0
+  mv t0, ra
+  jalr a3, %pcrel_lo(1b)
+  mv ra, t0
+
+  add a5, a5, -16
+  sub a4, a4, a5
+  add a2, a2, a5
+  bleu a2, a6, .Ltiny
+  j .Laligned
+END(memset)
+
+weak_alias(memset, __GI_memset)
diff --git a/sysdeps/riscv/strcmp.S b/sysdeps/riscv/strcmp.S
new file mode 100644
index 0000000000..8cd113e5d4
--- /dev/null
+++ b/sysdeps/riscv/strcmp.S
@@ -0,0 +1,159 @@ 
+/* Copyright (C) 2011-2017 Free Software Foundation, Inc.
+   Contributed by Andrew Waterman (andrew@sifive.com).
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+# error
+#endif
+
+ENTRY(strcmp)
+  or    a4, a0, a1
+  li    t2, -1
+  and   a4, a4, SZREG-1
+  bnez  a4, .Lmisaligned
+
+#if SZREG == 4
+  li t3, 0x7f7f7f7f
+#else
+  ld t3, mask
+#endif
+
+  .macro check_one_word i n
+    REG_L a2, \i*SZREG(a0)
+    REG_L a3, \i*SZREG(a1)
+
+    and   t0, a2, t3
+    or    t1, a2, t3
+    add   t0, t0, t3
+    or    t0, t0, t1
+
+    bne   t0, t2, .Lnull\i
+    .if \i+1-\n
+      bne   a2, a3, .Lmismatch
+    .else
+      add   a0, a0, \n*SZREG
+      add   a1, a1, \n*SZREG
+      beq   a2, a3, .Lloop
+      # fall through to .Lmismatch
+    .endif
+  .endm
+
+  .macro foundnull i n
+    .ifne \i
+      .Lnull\i:
+      add   a0, a0, \i*SZREG
+      add   a1, a1, \i*SZREG
+      .ifeq \i-1
+        .Lnull0:
+      .endif
+      bne   a2, a3, .Lmisaligned
+      li    a0, 0
+      ret
+    .endif
+  .endm
+
+.Lloop:
+  # examine full words at a time, favoring strings of a couple dozen chars
+#if __riscv_xlen == 32
+  check_one_word 0 5
+  check_one_word 1 5
+  check_one_word 2 5
+  check_one_word 3 5
+  check_one_word 4 5
+#else
+  check_one_word 0 3
+  check_one_word 1 3
+  check_one_word 2 3
+#endif
+  # backwards branch to .Lloop contained above
+
+.Lmismatch:
+  # words don't match, but a2 has no null byte.
+#if __riscv_xlen == 64
+  sll   a4, a2, 48
+  sll   a5, a3, 48
+  bne   a4, a5, .Lmismatch_upper
+  sll   a4, a2, 32
+  sll   a5, a3, 32
+  bne   a4, a5, .Lmismatch_upper
+#endif
+  sll   a4, a2, 16
+  sll   a5, a3, 16
+  bne   a4, a5, .Lmismatch_upper
+
+  srl   a4, a2, 8*SZREG-16
+  srl   a5, a3, 8*SZREG-16
+  sub   a0, a4, a5
+  and   a1, a0, 0xff
+  bnez  a1, 1f
+  ret
+
+.Lmismatch_upper:
+  srl   a4, a4, 8*SZREG-16
+  srl   a5, a5, 8*SZREG-16
+  sub   a0, a4, a5
+  and   a1, a0, 0xff
+  bnez  a1, 1f
+  ret
+
+1:and   a4, a4, 0xff
+  and   a5, a5, 0xff
+  sub   a0, a4, a5
+  ret
+
+.Lmisaligned:
+  # misaligned
+  lbu   a2, 0(a0)
+  lbu   a3, 0(a1)
+  add   a0, a0, 1
+  add   a1, a1, 1
+  bne   a2, a3, 1f
+  bnez  a2, .Lmisaligned
+
+1:
+  sub   a0, a2, a3
+  ret
+
+  # cases in which a null byte was detected
+#if __riscv_xlen == 32
+  foundnull 0 5
+  foundnull 1 5
+  foundnull 2 5
+  foundnull 3 5
+  foundnull 4 5
+#else
+  foundnull 0 3
+  foundnull 1 3
+  foundnull 2 3
+#endif
+
+END(strcmp)
+
+weak_alias(strcmp, __GI_strcmp)
+
+#if SZREG == 8
+#ifdef __PIC__
+.section .rodata.cst8,"aM",@progbits,8
+#else
+.section .srodata.cst8,"aM",@progbits,8
+#endif
+.align 3
+mask: .8byte 0x7f7f7f7f7f7f7f7f
+#endif
diff --git a/sysdeps/riscv/strcpy.c b/sysdeps/riscv/strcpy.c
new file mode 100644
index 0000000000..c01c506a55
--- /dev/null
+++ b/sysdeps/riscv/strcpy.c
@@ -0,0 +1,54 @@ 
+#include <string.h>
+#include <stdint.h>
+
+#undef strcpy
+
+char* strcpy(char* dst, const char* src)
+{
+  char* dst0 = dst;
+
+  int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof(long)-1);
+  if (__builtin_expect(!misaligned, 1))
+  {
+    long* ldst = (long*)dst;
+    const long* lsrc = (const long*)src;
+
+    while (!__libc_detect_null(*lsrc))
+      *ldst++ = *lsrc++;
+
+    dst = (char*)ldst;
+    src = (const char*)lsrc;
+
+    char c0 = src[0];
+    char c1 = src[1];
+    char c2 = src[2];
+    if (!(*dst++ = c0)) return dst0;
+    if (!(*dst++ = c1)) return dst0;
+    char c3 = src[3];
+    if (!(*dst++ = c2)) return dst0;
+    if (sizeof(long) == 4) goto out;
+    char c4 = src[4];
+    if (!(*dst++ = c3)) return dst0;
+    char c5 = src[5];
+    if (!(*dst++ = c4)) return dst0;
+    char c6 = src[6];
+    if (!(*dst++ = c5)) return dst0;
+    if (!(*dst++ = c6)) return dst0;
+
+out:
+    *dst++ = 0;
+    return dst0;
+  }
+
+  char ch;
+  do
+  {
+    ch = *src;
+    src++;
+    dst++;
+    *(dst-1) = ch;
+  } while(ch);
+
+  return dst0;
+}
+libc_hidden_def(strcpy)
diff --git a/sysdeps/riscv/strlen.c b/sysdeps/riscv/strlen.c
new file mode 100644
index 0000000000..049e1ebd8f
--- /dev/null
+++ b/sysdeps/riscv/strlen.c
@@ -0,0 +1,39 @@ 
+#include <string.h>
+#include <stdint.h>
+
+#undef strlen
+
+size_t strlen(const char* str)
+{
+  const char* start = str;
+
+  if (__builtin_expect((uintptr_t)str & (sizeof(long)-1), 0)) do
+  {
+    char ch = *str;
+    str++;
+    if (!ch)
+      return str - start - 1;
+  } while ((uintptr_t)str & (sizeof(long)-1));
+
+  unsigned long* ls = (unsigned long*)str;
+  while (!__libc_detect_null(*ls++))
+    ;
+  asm volatile ("" : "+r"(ls)); /* prevent "optimization" */
+
+  str = (const char*)ls;
+  size_t ret = str - start, sl = sizeof(long);
+
+  char c0 = str[0-sl], c1 = str[1-sl], c2 = str[2-sl], c3 = str[3-sl];
+  if (c0 == 0)            return ret + 0 - sl;
+  if (c1 == 0)            return ret + 1 - sl;
+  if (c2 == 0)            return ret + 2 - sl;
+  if (sl == 4 || c3 == 0) return ret + 3 - sl;
+
+  c0 = str[4-sl], c1 = str[5-sl], c2 = str[6-sl], c3 = str[7-sl];
+  if (c0 == 0)            return ret + 4 - sl;
+  if (c1 == 0)            return ret + 5 - sl;
+  if (c2 == 0)            return ret + 6 - sl;
+
+  return ret + 7 - sl;
+}
+libc_hidden_def(strlen)