[2/6] riscv: add vectorized memchr
Commit Message
The vector implementation uses m8 register grouping and processes data in
vector-length chunks, providing significant performance improvements on
RVV-capable hardware. Use conditional compilation to fall back to the
scalar implementation when __riscv_vector is not available, maintaining
compatibility with non-vector RISC-V systems.
Signed-off-by: Pincheng Wang <pincheng.plct@isrc.iscas.ac.cn>
---
newlib/libc/machine/riscv/Makefile.inc | 1 +
newlib/libc/machine/riscv/memchr-asm.S | 37 ++++++++++++++++++++++++++
newlib/libc/machine/riscv/memchr.c | 4 +++
3 files changed, 42 insertions(+)
create mode 100644 newlib/libc/machine/riscv/memchr-asm.S
@@ -3,6 +3,7 @@ libc_a_SOURCES += \
%D%/ieeefp.c \
%D%/memccpy-asm.S \
%D%/memccpy.c \
+ %D%/memchr-asm.S \
%D%/memchr.c \
%D%/memcpy-asm.S \
%D%/memcpy.c \
new file mode 100644
@@ -0,0 +1,37 @@
+#if defined(__riscv_vector) && !defined(__OPTIMIZE_SIZE__) && !defined(PREFER_SIZE_OVER_SPEED)
+.text
+.option push
+.option arch, +zve32x
+.global memchr
+.type memchr, @function
+memchr:
+#if __riscv_landing_pad
+ lpad 0
+#endif
+ beqz a2, .Lnot_found
+ andi a1, a1, 0xff
+.Lloop:
+ vsetvli zero, a2, e8, m8, ta, ma
+
+ vle8ff.v v0, (a0)
+ /* Find the c(a1) inside the loaded data. */
+ vmseq.vx v8, v0, a1
+ vfirst.m a4, v8
+
+ bgez a4, .Lfound
+
+ csrr a3, vl
+ sub a2, a2, a3
+ add a0, a0, a3
+
+ bnez a2, .Lloop
+
+.Lnot_found:
+ li a0, 0
+ ret
+.Lfound:
+ add a0, a0, a4
+ ret
+.size memchr, .-memchr
+.option pop
+#endif
@@ -29,6 +29,9 @@ QUICKREF
memchr ansi pure
*/
+#if defined(__riscv_vector) && !defined(__OPTIMIZE_SIZE__) && !defined(PREFER_SIZE_OVER_SPEED)
+/* memchr defined in memchr-asm.S */
+#else
#include <sys/asm.h>
#include <stddef.h>
#include "rv_string.h"
@@ -150,3 +153,4 @@ memchr (const void *src_void,
return NULL;
}
+#endif