[RFC,14/19] riscv: Add accelerated strlen routine

Message ID 20230207001618.458947-15-christoph.muellner@vrull.eu
State New
Headers
Series riscv: ifunc support with optimized mem*/str*/cpu_relax routines |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Christoph Müllner Feb. 7, 2023, 12:16 a.m. UTC
  From: Christoph Müllner <christoph.muellner@vrull.eu>

The implementation of strlen() can be accelerated using Zbb's orc.b
instruction. Let's add an implementation that provides that.
The implementation is part of the Bitmanip specification.

Signed-off-by: Christoph Müllner <christoph.muellner@vrull.eu>
---
 sysdeps/riscv/multiarch/Makefile          |   3 +-
 sysdeps/riscv/multiarch/ifunc-impl-list.c |   1 +
 sysdeps/riscv/multiarch/strlen.c          |   6 +-
 sysdeps/riscv/multiarch/strlen_zbb.S      | 105 ++++++++++++++++++++++
 4 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 sysdeps/riscv/multiarch/strlen_zbb.S
  

Patch

diff --git a/sysdeps/riscv/multiarch/Makefile b/sysdeps/riscv/multiarch/Makefile
index 8e2b020233..b2247b7326 100644
--- a/sysdeps/riscv/multiarch/Makefile
+++ b/sysdeps/riscv/multiarch/Makefile
@@ -8,5 +8,6 @@  sysdep_routines += \
 	memset_rv64_unaligned \
 	memset_rv64_unaligned_cboz64 \
 	\
-	strlen_generic
+	strlen_generic \
+	strlen_zbb
 endif
diff --git a/sysdeps/riscv/multiarch/ifunc-impl-list.c b/sysdeps/riscv/multiarch/ifunc-impl-list.c
index f848fc8401..2b4d2e1c17 100644
--- a/sysdeps/riscv/multiarch/ifunc-impl-list.c
+++ b/sysdeps/riscv/multiarch/ifunc-impl-list.c
@@ -55,6 +55,7 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
 	      IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
 
   IFUNC_IMPL (i, name, strlen,
+	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_zbb)
 	      IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_generic))
 
 
diff --git a/sysdeps/riscv/multiarch/strlen.c b/sysdeps/riscv/multiarch/strlen.c
index 85f7a91c9f..8b2f4d94b2 100644
--- a/sysdeps/riscv/multiarch/strlen.c
+++ b/sysdeps/riscv/multiarch/strlen.c
@@ -30,8 +30,12 @@ 
 
 extern __typeof (__redirect_strlen) __libc_strlen;
 extern __typeof (__redirect_strlen) __strlen_generic attribute_hidden;
+extern __typeof (__redirect_strlen) __strlen_zbb attribute_hidden;
 
-libc_ifunc (__libc_strlen, __strlen_generic);
+libc_ifunc (__libc_strlen,
+	    HAVE_RV(zbb)
+	     ? __strlen_zbb
+	     : __strlen_generic);
 
 # undef strlen
 strong_alias (__libc_strlen, strlen);
diff --git a/sysdeps/riscv/multiarch/strlen_zbb.S b/sysdeps/riscv/multiarch/strlen_zbb.S
new file mode 100644
index 0000000000..a0ca599c8e
--- /dev/null
+++ b/sysdeps/riscv/multiarch/strlen_zbb.S
@@ -0,0 +1,105 @@ 
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+/* Assumptions: rvi_zbb.  */
+/* Implementation from the Bitmanip specification.  */
+
+#define src		a0
+#define result		a0
+#define addr		a1
+#define data		a2
+#define offset		a3
+#define offset_bits	a3
+#define valid_bytes	a4
+#define m1		a4
+
+#if __riscv_xlen == 64
+# define REG_L	ld
+# define SZREG	8
+#else
+# define REG_L	lw
+# define SZREG	4
+#endif
+
+#define BITSPERBYTELOG 3
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define CZ	clz
+# define SHIFT	sll
+#else
+# define CZ	ctz
+# define SHIFT	srl
+#endif
+
+#ifndef STRLEN
+# define STRLEN __strlen_zbb
+#endif
+
+.option push
+.option arch,+zbb
+
+ENTRY_ALIGN (STRLEN, 6)
+	andi	offset, src, SZREG-1
+	andi	addr, src, -SZREG
+
+	li	valid_bytes, SZREG
+	sub	valid_bytes, valid_bytes, offset
+	slli	offset_bits, offset, BITSPERBYTELOG
+	REG_L	data, 0(addr)
+	/* Shift the partial/unaligned chunk we loaded to remove the bytes
+	 * from before the start of the string, adding NUL bytes at the end. */
+	SHIFT	data, data, offset_bits
+	orc.b	data, data
+	not	data, data
+	/* Non-NUL bytes in the string have been expanded to 0x00, while
+	 * NUL bytes have become 0xff. Search for the first set bit
+	 * (corresponding to a NUL byte in the original chunk). */
+	CZ	data, data
+	/* The first chunk is special: compare against the number of valid
+	 * bytes in this chunk. */
+	srli	result, data, 3
+	bgtu	valid_bytes, result, L(done)
+	addi	offset, addr, SZREG
+	li	m1, -1
+
+	/* Our critical loop is 4 instructions and processes data in 4 byte
+	 * or 8 byte chunks.  */
+	.p2align 2
+L(loop):
+	REG_L	data, SZREG(addr)
+	addi	addr, addr, SZREG
+	orc.b	data, data
+	beq	data, m1, L(loop)
+
+L(epilogue):
+	not	data, data
+	CZ	data, data
+	sub	offset, addr, offset
+	add	result, result, offset
+	srli	data, data, 3
+	add	result, result, data
+L(done):
+	ret
+
+.option pop
+
+END (STRLEN)
+libc_hidden_builtin_def (STRLEN)