[6/6] riscv: add vectorized rawmemchr
Commit Message
The vector implementation uses m8 register grouping and processes data in
vector-length chunks, providing significant performance improvements on
RVV-capable hardware. Use conditional compilation to fall back to the
generic implementation when __riscv_vector is not available, maintaining
compatibility with non-vector RISC-V systems.
Signed-off-by: Pincheng Wang <pincheng.plct@isrc.iscas.ac.cn>
---
newlib/libc/machine/riscv/Makefile.inc | 2 ++
newlib/libc/machine/riscv/rawmemchr-asm.S | 30 +++++++++++++++++++++++
newlib/libc/machine/riscv/rawmemchr.c | 5 ++++
3 files changed, 37 insertions(+)
create mode 100644 newlib/libc/machine/riscv/rawmemchr-asm.S
create mode 100644 newlib/libc/machine/riscv/rawmemchr.c
Comments
> +++ b/newlib/libc/machine/riscv/rawmemchr-asm.S
> @@ -0,0 +1,30 @@
> +#if defined(__riscv_vector) && !defined(__OPTIMIZE_SIZE__) && !defined(PREFER_SIZE_OVER_SPEED)
> +.text
> +.option push
> +.option arch, +zve32x
> +.global rawmemchr
> +.type rawmemchr, @function
> +rawmemchr:
> +#if __riscv_landing_pad
> + lpad 0
> +#endif
> + andi a1, a1, 0xff
> +.Lloop:
> + vsetvli t0, zero, e8, m8, ta, ma
Same comment as memccpy
> +
> + vle8ff.v v0, (a0)
> + vmseq.vx v8, v0, a1
> + vfirst.m a2, v8
> +
> + bgez a2, .Lfound
> +
> + csrr a3, vl
> + add a0, a0, a3
> + j .Lloop
> +
> +.Lfound:
> + add a0, a0, a2
> + ret
> +.size rawmemchr, .-rawmemchr
> +.option pop
> +#endif
Kito Cheng <kito.cheng@gmail.com> 於 2026年5月22日週五 下午4:48寫道:
>
> > +++ b/newlib/libc/machine/riscv/rawmemchr-asm.S
> > @@ -0,0 +1,30 @@
> > +#if defined(__riscv_vector) && !defined(__OPTIMIZE_SIZE__) && !defined(PREFER_SIZE_OVER_SPEED)
> > +.text
> > +.option push
> > +.option arch, +zve32x
> > +.global rawmemchr
> > +.type rawmemchr, @function
> > +rawmemchr:
> > +#if __riscv_landing_pad
> > + lpad 0
> > +#endif
> > + andi a1, a1, 0xff
> > +.Lloop:
> > + vsetvli t0, zero, e8, m8, ta, ma
>
> Same comment as memccpy
But this is valid since it's VLMAX here
>
> > +
> > + vle8ff.v v0, (a0)
> > + vmseq.vx v8, v0, a1
> > + vfirst.m a2, v8
> > +
> > + bgez a2, .Lfound
> > +
> > + csrr a3, vl
> > + add a0, a0, a3
> > + j .Lloop
> > +
> > +.Lfound:
> > + add a0, a0, a2
> > + ret
> > +.size rawmemchr, .-rawmemchr
> > +.option pop
> > +#endif
@@ -16,6 +16,8 @@ libc_a_SOURCES += \
%D%/memrchr-asm.S \
%D%/memrchr.c \
%D%/memset.S \
+ %D%/rawmemchr-asm.S \
+ %D%/rawmemchr.c \
%D%/setjmp.S \
%D%/stpcpy.c \
%D%/strcmp.S \
new file mode 100644
@@ -0,0 +1,30 @@
+#if defined(__riscv_vector) && !defined(__OPTIMIZE_SIZE__) && !defined(PREFER_SIZE_OVER_SPEED)
+.text
+.option push
+.option arch, +zve32x
+.global rawmemchr
+.type rawmemchr, @function
+rawmemchr:
+#if __riscv_landing_pad
+ lpad 0
+#endif
+ andi a1, a1, 0xff
+.Lloop:
+ vsetvli t0, zero, e8, m8, ta, ma
+
+ vle8ff.v v0, (a0)
+ vmseq.vx v8, v0, a1
+ vfirst.m a2, v8
+
+ bgez a2, .Lfound
+
+ csrr a3, vl
+ add a0, a0, a3
+ j .Lloop
+
+.Lfound:
+ add a0, a0, a2
+ ret
+.size rawmemchr, .-rawmemchr
+.option pop
+#endif
new file mode 100644
@@ -0,0 +1,5 @@
+#if defined(__OPTIMIZE_SIZE__) || defined(PREFER_SIZE_OVER_SPEED) || !defined(__riscv_vector)
+# include "../../string/rawmemchr.c"
+#else
+/* rawmemchr defined in rawmemchr-asm.S */
+#endif