riscv: add RVV-optimized memchr
Checks
| Context |
Check |
Description |
| redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
| linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
| linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
| linaro-tcwg-bot/tcwg_glibc_check--master-arm |
fail
|
Test failed
|
| linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
fail
|
Test failed
|
Commit Message
This patch introduces an RVV (RISC-V Vector Extension) based
implementation of memchr, leveraging vector instructions to
improve data-level parallelism.
The implementation has been tested on SG2044 (RVV 128-bit) and
Banana Pi F3 (RVV 256-bit) platforms.
Benchmark results show no observable regression on small inputs,
while providing significant performance improvements on larger
inputs.
If this approach proves effective, additional optimized mem/str
routines that have already been implemented and validated on
existing platforms, and demonstrate performance gains, will be
submitted in follow-up patches.
Signed-off-by: daichengrong <daichengrong@iscas.ac.cn>
---
sysdeps/riscv/multiarch/memchr-generic.c | 24 ++++
sysdeps/riscv/multiarch/memchr-vector.S | 26 ++++
sysdeps/riscv/rvv/memchr.S | 129 ++++++++++++++++++
.../unix/sysv/linux/riscv/multiarch/Makefile | 3 +
.../linux/riscv/multiarch/ifunc-impl-list.c | 6 +
.../unix/sysv/linux/riscv/multiarch/memchr.c | 57 ++++++++
6 files changed, 245 insertions(+)
create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
create mode 100644 sysdeps/riscv/multiarch/memchr-vector.S
create mode 100644 sysdeps/riscv/rvv/memchr.S
create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
Comments
> @@ -0,0 +1,129 @@
> +/* RISC-V RVV based memchr.
> + Copyright (C) 2026 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +#ifndef MEMCHR
> +# define MEMCHR __memchr
> +#endif
> +
> +#define srcin a0
> +#define chrin a1
> +#define cntin a2
> +#define result a0
> +
> +#define src a0
> +#define cntrem a2
> +#define cntrem_4 a5
> +
> +#define vdata v0
> +#define vmask v16
> +#define first_index a4
> +
> +#define tmp t0
> +#define vset_vl t6
> +
> +#define src_2 a3
> +#define src_3 a6
> +#define src_4 a7
> +
> +#define vdata_1 v0
> +#define vdata_2 v4
> +#define vdata_3 v8
> +#define vdata_4 v12
> +
> +#define vmask_1 v16
> +#define vmask_2 v20
> +#define vmask_3 v24
> +#define vmask_4 v28
> +
> +#define first_index_1 a4
> +#define first_index_2 t2
> +#define first_index_3 t3
> +#define first_index_4 t4
> +
> +ENTRY (MEMCHR)
> +.option push
> +.option arch, +v
> +.option arch, +zba
^^^ do we really need zba here? I don't see any instruction from zba
in the implementation.
> + beqz cntin, L(ret)
> + and tmp, cntin, 0x3F
> + beqz tmp, L(loop_pre)
> + vsetvli vset_vl, tmp, e8, m8, ta, ma
> + vle8.v vdata, (srcin)
> + vmseq.vx vmask, vdata, chrin
> + vfirst.m first_index, vmask
> + bgez first_index, L(found)
> +
> + beq vset_vl, cntin, L(ret)
> +
> + add src, srcin, vset_vl
> + sub cntrem, cntin, vset_vl
> +
> +L(loop_pre):
> + srli cntrem_4, cntrem, 2
> +L(loop):
> + vsetvli vset_vl, cntrem_4, e8, m4, ta, ma
> +
> + vle8.v vdata_1, (src)
> + add src_2, src, vset_vl
> + vle8.v vdata_2, (src_2)
> + add src_3, src_2, vset_vl
> + vle8.v vdata_3, (src_3)
> + add src_4, src_3, vset_vl
> + vle8.v vdata_4, (src_4)
> +
> + vmseq.vx vmask_1, vdata_1, chrin
> + vmseq.vx vmask_2, vdata_2, chrin
> + vmseq.vx vmask_3, vdata_3, chrin
> + vmseq.vx vmask_4, vdata_4, chrin
> +
> + vfirst.m first_index, vmask_1
> + vfirst.m first_index_2, vmask_2
> + vfirst.m first_index_3, vmask_3
> + vfirst.m first_index_4, vmask_4
> +
> + bgez first_index_1, L(found1)
> + bgez first_index_2, L(found2)
> + bgez first_index_3, L(found3)
> + bgez first_index_4, L(found4)
> +
> + add src, src_4, vset_vl
> + sub cntrem_4, cntrem_4, vset_vl
> + bnez cntrem_4, L(loop)
> +L(ret):
> + li result, 0
> + ret
> +L(found4):
> + add result, src_4, first_index_4
> + ret
> +L(found3):
> + add result, src_3, first_index_3
> + ret
> +L(found2):
> + add result, src_2, first_index_2
> + ret
> +L(found1):
> +L(found):
> + add result, src, first_index
> + ret
> +.option pop
> +END (MEMCHR)
> +weak_alias (MEMCHR, memchr)
> +libc_hidden_builtin_def (memchr)
new file mode 100644
@@ -0,0 +1,24 @@
+/* Re-include the default memchr implementation.
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <string.h>
+
+#if IS_IN(libc)
+# define MEMCHR __memchr_generic
+# include <string/memchr.c>
+#endif
new file mode 100644
@@ -0,0 +1,26 @@
+/* Re-include the RISC-V RVV based memchr implementation.
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN(libc)
+# define MEMCHR __memchr_vector
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)
+# undef weak_alias
+# define weak_alias(name, alias)
+# include <sysdeps/riscv/rvv/memchr.S>
+#endif
new file mode 100644
@@ -0,0 +1,129 @@
+/* RISC-V RVV based memchr.
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#ifndef MEMCHR
+# define MEMCHR __memchr
+#endif
+
+#define srcin a0
+#define chrin a1
+#define cntin a2
+#define result a0
+
+#define src a0
+#define cntrem a2
+#define cntrem_4 a5
+
+#define vdata v0
+#define vmask v16
+#define first_index a4
+
+#define tmp t0
+#define vset_vl t6
+
+#define src_2 a3
+#define src_3 a6
+#define src_4 a7
+
+#define vdata_1 v0
+#define vdata_2 v4
+#define vdata_3 v8
+#define vdata_4 v12
+
+#define vmask_1 v16
+#define vmask_2 v20
+#define vmask_3 v24
+#define vmask_4 v28
+
+#define first_index_1 a4
+#define first_index_2 t2
+#define first_index_3 t3
+#define first_index_4 t4
+
+ENTRY (MEMCHR)
+.option push
+.option arch, +v
+.option arch, +zba
+ beqz cntin, L(ret)
+ and tmp, cntin, 0x3F
+ beqz tmp, L(loop_pre)
+ vsetvli vset_vl, tmp, e8, m8, ta, ma
+ vle8.v vdata, (srcin)
+ vmseq.vx vmask, vdata, chrin
+ vfirst.m first_index, vmask
+ bgez first_index, L(found)
+
+ beq vset_vl, cntin, L(ret)
+
+ add src, srcin, vset_vl
+ sub cntrem, cntin, vset_vl
+
+L(loop_pre):
+ srli cntrem_4, cntrem, 2
+L(loop):
+ vsetvli vset_vl, cntrem_4, e8, m4, ta, ma
+
+ vle8.v vdata_1, (src)
+ add src_2, src, vset_vl
+ vle8.v vdata_2, (src_2)
+ add src_3, src_2, vset_vl
+ vle8.v vdata_3, (src_3)
+ add src_4, src_3, vset_vl
+ vle8.v vdata_4, (src_4)
+
+ vmseq.vx vmask_1, vdata_1, chrin
+ vmseq.vx vmask_2, vdata_2, chrin
+ vmseq.vx vmask_3, vdata_3, chrin
+ vmseq.vx vmask_4, vdata_4, chrin
+
+ vfirst.m first_index, vmask_1
+ vfirst.m first_index_2, vmask_2
+ vfirst.m first_index_3, vmask_3
+ vfirst.m first_index_4, vmask_4
+
+ bgez first_index_1, L(found1)
+ bgez first_index_2, L(found2)
+ bgez first_index_3, L(found3)
+ bgez first_index_4, L(found4)
+
+ add src, src_4, vset_vl
+ sub cntrem_4, cntrem_4, vset_vl
+ bnez cntrem_4, L(loop)
+L(ret):
+ li result, 0
+ ret
+L(found4):
+ add result, src_4, first_index_4
+ ret
+L(found3):
+ add result, src_3, first_index_3
+ ret
+L(found2):
+ add result, src_2, first_index_2
+ ret
+L(found1):
+L(found):
+ add result, src, first_index
+ ret
+.option pop
+END (MEMCHR)
+weak_alias (MEMCHR, memchr)
+libc_hidden_builtin_def (memchr)
@@ -1,6 +1,9 @@
ifeq ($(subdir),string)
sysdep_routines += \
memcpy \
+ memchr \
+ memchr-generic \
+ memchr-vector \
memcpy-generic \
memcpy_noalignment \
memset \
@@ -43,6 +43,12 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
rvv_enabled = true;
}
+ IFUNC_IMPL (i, name, memchr,
+ IFUNC_IMPL_ADD (array, i, memchr, rvv_enabled,
+ __memchr_vector)
+ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
+
+
IFUNC_IMPL (i, name, memcpy,
IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
__memcpy_noalignment)
new file mode 100644
@@ -0,0 +1,57 @@
+/* Multiple versions of memchr.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#if IS_IN (libc)
+/* Redefine memchr so that the compiler won't complain about the type
+ mismatch with the IFUNC selector in strong_alias, below. */
+# undef memchr
+# define memchr __redirect_memchr
+# include <stdint.h>
+# include <string.h>
+# include <ifunc-init.h>
+# include <riscv-ifunc.h>
+# include <sys/hwprobe.h>
+
+extern __typeof (__redirect_memchr) __libc_memchr;
+
+extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
+extern __typeof (__redirect_memchr) __memchr_vector attribute_hidden;
+
+static inline __typeof (__redirect_memchr) *
+select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
+{
+ unsigned long long int v;
+ if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
+ && (v & RISCV_HWPROBE_IMA_V) == RISCV_HWPROBE_IMA_V)
+ return __memchr_vector;
+ return __memchr_generic;
+}
+
+riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
+
+# undef memchr
+strong_alias (__libc_memchr, memchr);
+strong_alias (memchr, __memchr)
+# ifdef SHARED
+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
+# endif
+#else
+# include <string/memchr.c>
+#endif