riscv: add RVV-optimized memchr

Message ID 20260323022534.10684-1-daichengrong@iscas.ac.cn (mailing list archive)
State New
Headers
Series riscv: add RVV-optimized memchr |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm fail Test failed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 fail Test failed

Commit Message

daichengrong March 23, 2026, 2:25 a.m. UTC
  This patch introduces an RVV (RISC-V Vector Extension) based
implementation of memchr, leveraging vector instructions to
improve data-level parallelism.

The implementation has been tested on SG2044 (RVV 128-bit) and
Banana Pi F3 (RVV 256-bit) platforms.

Benchmark results show no observable regression on small inputs,
while providing significant performance improvements on larger
inputs.

If this approach proves effective, additional optimized mem/str
routines that have already been implemented and validated on
existing platforms, and demonstrate performance gains, will be
submitted in follow-up patches.

Signed-off-by: daichengrong <daichengrong@iscas.ac.cn>
---
 sysdeps/riscv/multiarch/memchr-generic.c      |  24 ++++
 sysdeps/riscv/multiarch/memchr-vector.S       |  26 ++++
 sysdeps/riscv/rvv/memchr.S                    | 129 ++++++++++++++++++
 .../unix/sysv/linux/riscv/multiarch/Makefile  |   3 +
 .../linux/riscv/multiarch/ifunc-impl-list.c   |   6 +
 .../unix/sysv/linux/riscv/multiarch/memchr.c  |  57 ++++++++
 6 files changed, 245 insertions(+)
 create mode 100644 sysdeps/riscv/multiarch/memchr-generic.c
 create mode 100644 sysdeps/riscv/multiarch/memchr-vector.S
 create mode 100644 sysdeps/riscv/rvv/memchr.S
 create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
  

Comments

Kito Cheng March 27, 2026, 2:35 a.m. UTC | #1
> @@ -0,0 +1,129 @@
> +/* RISC-V RVV based memchr.
> +   Copyright (C) 2026 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +#ifndef MEMCHR
> +# define MEMCHR __memchr
> +#endif
> +
> +#define srcin          a0
> +#define chrin          a1
> +#define cntin          a2
> +#define result         a0
> +
> +#define src                a0
> +#define cntrem         a2
> +#define cntrem_4    a5
> +
> +#define vdata          v0
> +#define vmask          v16
> +#define first_index a4
> +
> +#define        tmp         t0
> +#define        vset_vl         t6
> +
> +#define src_2          a3
> +#define src_3          a6
> +#define src_4          a7
> +
> +#define vdata_1                v0
> +#define vdata_2                v4
> +#define vdata_3                v8
> +#define vdata_4                v12
> +
> +#define vmask_1                v16
> +#define vmask_2                v20
> +#define vmask_3                v24
> +#define vmask_4                v28
> +
> +#define first_index_1  a4
> +#define first_index_2  t2
> +#define first_index_3  t3
> +#define first_index_4  t4
> +
> +ENTRY (MEMCHR)
> +.option push
> +.option arch, +v
> +.option arch, +zba

^^^ do we really need zba here? I don't see any instruction from zba
in the implementation.

> +    beqz        cntin, L(ret)
> +    and         tmp, cntin, 0x3F
> +    beqz        tmp, L(loop_pre)
> +    vsetvli     vset_vl, tmp, e8, m8, ta, ma
> +    vle8.v      vdata, (srcin)
> +    vmseq.vx    vmask, vdata, chrin
> +    vfirst.m    first_index, vmask
> +    bgez        first_index, L(found)
> +
> +    beq         vset_vl, cntin, L(ret)
> +
> +    add         src, srcin, vset_vl
> +    sub         cntrem, cntin, vset_vl
> +
> +L(loop_pre):
> +    srli        cntrem_4, cntrem, 2
> +L(loop):
> +    vsetvli     vset_vl, cntrem_4, e8, m4, ta, ma
> +
> +    vle8.v      vdata_1, (src)
> +    add         src_2, src, vset_vl
> +    vle8.v      vdata_2, (src_2)
> +    add         src_3, src_2, vset_vl
> +    vle8.v      vdata_3, (src_3)
> +    add         src_4, src_3, vset_vl
> +    vle8.v      vdata_4, (src_4)
> +
> +    vmseq.vx    vmask_1, vdata_1, chrin
> +    vmseq.vx    vmask_2, vdata_2, chrin
> +    vmseq.vx    vmask_3, vdata_3, chrin
> +    vmseq.vx    vmask_4, vdata_4, chrin
> +
> +    vfirst.m    first_index, vmask_1
> +    vfirst.m    first_index_2, vmask_2
> +    vfirst.m    first_index_3, vmask_3
> +    vfirst.m    first_index_4, vmask_4
> +
> +    bgez        first_index_1, L(found1)
> +    bgez        first_index_2, L(found2)
> +    bgez        first_index_3, L(found3)
> +    bgez        first_index_4, L(found4)
> +
> +    add         src, src_4, vset_vl
> +    sub         cntrem_4, cntrem_4, vset_vl
> +    bnez        cntrem_4, L(loop)
> +L(ret):
> +    li          result, 0
> +    ret
> +L(found4):
> +    add result, src_4, first_index_4
> +    ret
> +L(found3):
> +    add result, src_3, first_index_3
> +    ret
> +L(found2):
> +    add result, src_2, first_index_2
> +    ret
> +L(found1):
> +L(found):
> +    add result, src, first_index
> +    ret
> +.option pop
> +END (MEMCHR)
> +weak_alias (MEMCHR, memchr)
> +libc_hidden_builtin_def (memchr)
  

Patch

diff --git a/sysdeps/riscv/multiarch/memchr-generic.c b/sysdeps/riscv/multiarch/memchr-generic.c
new file mode 100644
index 0000000000..a95e16ee51
--- /dev/null
+++ b/sysdeps/riscv/multiarch/memchr-generic.c
@@ -0,0 +1,24 @@ 
+/* Re-include the default memchr implementation.
+   Copyright (C) 2026 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <string.h>
+
+#if IS_IN(libc)
+# define MEMCHR __memchr_generic
+# include <string/memchr.c>
+#endif
diff --git a/sysdeps/riscv/multiarch/memchr-vector.S b/sysdeps/riscv/multiarch/memchr-vector.S
new file mode 100644
index 0000000000..951f256245
--- /dev/null
+++ b/sysdeps/riscv/multiarch/memchr-vector.S
@@ -0,0 +1,26 @@ 
+/* Re-include the RISC-V RVV based memchr implementation.
+   Copyright (C) 2026 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#if IS_IN(libc)
+# define MEMCHR __memchr_vector
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)
+# undef weak_alias
+# define weak_alias(name, alias)
+# include <sysdeps/riscv/rvv/memchr.S>
+#endif
diff --git a/sysdeps/riscv/rvv/memchr.S b/sysdeps/riscv/rvv/memchr.S
new file mode 100644
index 0000000000..8dbd2ec28a
--- /dev/null
+++ b/sysdeps/riscv/rvv/memchr.S
@@ -0,0 +1,129 @@ 
+/* RISC-V RVV based memchr.
+   Copyright (C) 2026 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#ifndef MEMCHR
+# define MEMCHR __memchr
+#endif
+
+#define srcin		a0
+#define chrin		a1
+#define cntin		a2
+#define result		a0
+
+#define src		    a0
+#define cntrem		a2
+#define cntrem_4    a5
+
+#define vdata		v0
+#define vmask		v16
+#define first_index a4
+
+#define	tmp         t0
+#define	vset_vl		t6
+
+#define src_2		a3
+#define src_3		a6
+#define src_4		a7
+
+#define vdata_1		v0
+#define vdata_2		v4
+#define vdata_3		v8
+#define vdata_4		v12
+
+#define vmask_1		v16
+#define vmask_2		v20
+#define vmask_3		v24
+#define vmask_4		v28
+
+#define first_index_1	a4
+#define first_index_2	t2
+#define first_index_3	t3
+#define first_index_4	t4
+
+ENTRY (MEMCHR)
+.option push
+.option arch, +v
+.option arch, +zba
+    beqz        cntin, L(ret)
+    and         tmp, cntin, 0x3F
+    beqz        tmp, L(loop_pre)
+    vsetvli     vset_vl, tmp, e8, m8, ta, ma
+    vle8.v      vdata, (srcin)
+    vmseq.vx    vmask, vdata, chrin
+    vfirst.m    first_index, vmask
+    bgez        first_index, L(found)
+
+    beq         vset_vl, cntin, L(ret)
+
+    add         src, srcin, vset_vl
+    sub         cntrem, cntin, vset_vl
+
+L(loop_pre):
+    srli        cntrem_4, cntrem, 2
+L(loop):
+    vsetvli     vset_vl, cntrem_4, e8, m4, ta, ma
+
+    vle8.v      vdata_1, (src)
+    add         src_2, src, vset_vl
+    vle8.v      vdata_2, (src_2)
+    add         src_3, src_2, vset_vl
+    vle8.v      vdata_3, (src_3)
+    add         src_4, src_3, vset_vl
+    vle8.v      vdata_4, (src_4)
+
+    vmseq.vx    vmask_1, vdata_1, chrin
+    vmseq.vx    vmask_2, vdata_2, chrin
+    vmseq.vx    vmask_3, vdata_3, chrin
+    vmseq.vx    vmask_4, vdata_4, chrin
+
+    vfirst.m    first_index, vmask_1
+    vfirst.m    first_index_2, vmask_2
+    vfirst.m    first_index_3, vmask_3
+    vfirst.m    first_index_4, vmask_4
+
+    bgez        first_index_1, L(found1)
+    bgez        first_index_2, L(found2)
+    bgez        first_index_3, L(found3)
+    bgez        first_index_4, L(found4)
+
+    add         src, src_4, vset_vl
+    sub         cntrem_4, cntrem_4, vset_vl
+    bnez        cntrem_4, L(loop)
+L(ret):
+    li          result, 0
+    ret
+L(found4):
+    add result, src_4, first_index_4
+    ret
+L(found3):
+    add result, src_3, first_index_3
+    ret
+L(found2):
+    add result, src_2, first_index_2
+    ret
+L(found1):
+L(found):
+    add result, src, first_index
+    ret
+.option pop
+END (MEMCHR)
+weak_alias (MEMCHR, memchr)
+libc_hidden_builtin_def (memchr)
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
index a865090a53..41ff6f5de3 100644
--- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
@@ -1,6 +1,9 @@ 
 ifeq ($(subdir),string)
 sysdep_routines += \
   memcpy \
+  memchr \
+  memchr-generic \
+  memchr-vector \
   memcpy-generic \
   memcpy_noalignment \
   memset \
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
index a3b5731411..3d107b2a97 100644
--- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
@@ -43,6 +43,12 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
       rvv_enabled = true;
   }
 
+  IFUNC_IMPL (i, name, memchr,
+	      IFUNC_IMPL_ADD (array, i, memchr, rvv_enabled,
+			      __memchr_vector)
+	      IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_generic))
+
+
   IFUNC_IMPL (i, name, memcpy,
 	      IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
 			      __memcpy_noalignment)
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
new file mode 100644
index 0000000000..0844a33760
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memchr.c
@@ -0,0 +1,57 @@ 
+/* Multiple versions of memchr.
+   All versions must be listed in ifunc-impl-list.c.
+   Copyright (C) 2026 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#if IS_IN (libc)
+/* Redefine memchr so that the compiler won't complain about the type
+   mismatch with the IFUNC selector in strong_alias, below.  */
+# undef memchr
+# define memchr __redirect_memchr
+# include <stdint.h>
+# include <string.h>
+# include <ifunc-init.h>
+# include <riscv-ifunc.h>
+# include <sys/hwprobe.h>
+
+extern __typeof (__redirect_memchr) __libc_memchr;
+
+extern __typeof (__redirect_memchr) __memchr_generic attribute_hidden;
+extern __typeof (__redirect_memchr) __memchr_vector attribute_hidden;
+
+static inline __typeof (__redirect_memchr) *
+select_memchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
+{
+  unsigned long long int v;
+  if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_IMA_EXT_0, &v) == 0
+      && (v & RISCV_HWPROBE_IMA_V) == RISCV_HWPROBE_IMA_V)
+    return __memchr_vector;
+  return __memchr_generic;
+}
+
+riscv_libc_ifunc (__libc_memchr, select_memchr_ifunc);
+
+# undef memchr
+strong_alias (__libc_memchr, memchr);
+strong_alias (memchr, __memchr)
+# ifdef SHARED
+__hidden_ver1 (memchr, __GI_memchr, __redirect_memchr)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memchr);
+# endif
+#else
+# include <string/memchr.c>
+#endif