[v2] RISC-V: add RVV support for memrchr using IFUNC

Message ID 20250319084202.4034345-1-daichengrong@iscas.ac.cn (mailing list archive)
State New
Headers
Series [v2] RISC-V: add RVV support for memrchr using IFUNC |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed

Commit Message

daichengrong March 19, 2025, 8:42 a.m. UTC
  From: daichengrong <daichengrong@iscas.ac.cn>

On BPI_F3, running the  bench-memrchr in glibc benchtests, it gets an average improvement of 114%.
On K230, the average speedup was 99%.

This patch introduces a check for assembler compilation support for RVV
This patch introduces RVV memrchr with IFUNC.
The implementation selects the RVV memrchr via dl_hwcap.

Changes in v2:
  fixed incorrectly sorted Makefile
  updated copyright time in created files 
---
 config.h.in                                   |  3 +
 sysdeps/riscv/configure                       | 35 +++++++
 sysdeps/riscv/configure.ac                    | 25 +++++
 sysdeps/riscv/multiarch/memrchr_generic.c     | 35 +++++++
 sysdeps/riscv/multiarch/memrchr_rvv.S         | 96 +++++++++++++++++++
 .../unix/sysv/linux/riscv/multiarch/Makefile  |  8 ++
 .../linux/riscv/multiarch/ifunc-impl-list.c   | 17 ++++
 .../unix/sysv/linux/riscv/multiarch/memrchr.c | 70 ++++++++++++++
 8 files changed, 289 insertions(+)
 create mode 100644 sysdeps/riscv/multiarch/memrchr_generic.c
 create mode 100644 sysdeps/riscv/multiarch/memrchr_rvv.S
 create mode 100644 sysdeps/unix/sysv/linux/riscv/multiarch/memrchr.c
  

Patch

diff --git a/config.h.in b/config.h.in
index cdbd555366..7802e8f9c4 100644
--- a/config.h.in
+++ b/config.h.in
@@ -139,6 +139,9 @@ 
 /* RISC-V floating-point ABI for ld.so.  */
 #undef RISCV_ABI_FLEN
 
+/* Define if assembler supports vector instructions on RISC-V.  */
+#undef HAVE_RISCV_ASM_VECTOR_SUPPORT
+
 /* LOONGARCH integer ABI for ld.so.  */
 #undef LOONGARCH_ABI_GRLEN
 
diff --git a/sysdeps/riscv/configure b/sysdeps/riscv/configure
index 3ae4ae3bdb..bbda6a0d4a 100644
--- a/sysdeps/riscv/configure
+++ b/sysdeps/riscv/configure
@@ -83,3 +83,38 @@  if test "$libc_cv_static_pie_on_riscv" = yes; then
 
 fi
 
+# Check if assembler supports attribute riscv vector macro.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gcc attribute riscv vector support" >&5
+printf %s "checking for gcc attribute riscv vector support... " >&6; }
+if test ${libc_cv_gcc_rvv+y}
+then :
+  printf %s "(cached) " >&6
+else case e in #(
+  e) cat > conftest.S <<EOF
+foo:
+  .option push
+  .option arch, +v
+  vsetivli t0, 8, e8, m8, ta, ma
+  .option pop
+  ret
+EOF
+libc_cv_gcc_rvv=no
+if ${CC-asm} -c conftest.S -o conftest.o 1>&5 \
+  2>&5 ; then
+  libc_cv_gcc_rvv=yes
+fi
+rm -f conftest* ;;
+esac
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_gcc_rvv" >&5
+printf "%s\n" "$libc_cv_gcc_rvv" >&6; }
+
+if test x"$libc_cv_gcc_rvv" = xyes; then
+  printf "%s\n" "#define HAVE_RISCV_ASM_VECTOR_SUPPORT 1" >>confdefs.h
+
+fi
+
+config_vars="$config_vars
+have-gcc-riscv-rvv = $libc_cv_gcc_rvv"
+
+
diff --git a/sysdeps/riscv/configure.ac b/sysdeps/riscv/configure.ac
index ee3d1ed014..27e0e51b1c 100644
--- a/sysdeps/riscv/configure.ac
+++ b/sysdeps/riscv/configure.ac
@@ -43,3 +43,28 @@  EOF
 if test "$libc_cv_static_pie_on_riscv" = yes; then
   AC_DEFINE(SUPPORT_STATIC_PIE)
 fi
+
+# Check if assembler supports attribute riscv vector macro.
+AC_CACHE_CHECK([for gcc attribute riscv vector support],
+        libc_cv_gcc_rvv, [dnl
+cat > conftest.S <<EOF
+foo:
+  .option push
+  .option arch, +v
+  vsetivli t0, 8, e8, m8, ta, ma
+  .option pop
+  ret
+EOF
+libc_cv_gcc_rvv=no
+if ${CC-asm} -c conftest.S -o conftest.o 1>&AS_MESSAGE_LOG_FD \
+  2>&AS_MESSAGE_LOG_FD ; then
+  libc_cv_gcc_rvv=yes
+fi
+rm -f conftest*])
+
+if test x"$libc_cv_gcc_rvv" = xyes; then
+  AC_DEFINE(HAVE_RISCV_ASM_VECTOR_SUPPORT)
+fi
+
+LIBC_CONFIG_VAR([have-gcc-riscv-rvv], [$libc_cv_gcc_rvv])
+
diff --git a/sysdeps/riscv/multiarch/memrchr_generic.c b/sysdeps/riscv/multiarch/memrchr_generic.c
new file mode 100644
index 0000000000..62f33396a3
--- /dev/null
+++ b/sysdeps/riscv/multiarch/memrchr_generic.c
@@ -0,0 +1,35 @@ 
+/* Re-include the default memrchr implementation.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#include <string.h>
+#if IS_IN (libc)
+
+# define MEMRCHR __memrchr_generic
+
+/* Do not hide the generic version of memrchr, we use it internally.  */
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name)
+
+# undef weak_alias
+# define weak_alias(a, b)
+
+#endif
+
+#include <string/memrchr.c>
diff --git a/sysdeps/riscv/multiarch/memrchr_rvv.S b/sysdeps/riscv/multiarch/memrchr_rvv.S
new file mode 100644
index 0000000000..cf4fe82244
--- /dev/null
+++ b/sysdeps/riscv/multiarch/memrchr_rvv.S
@@ -0,0 +1,96 @@ 
+/* Optimized memrchr implementation using RVV.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+
+#define ELEM_LMUL_1_SETTING m1
+#define ELEM_LMUL_8_SETTING m8
+#define ELEM_SEW_8_SETTING e8
+
+#define v_seq_set_data v0
+#define v_loaded_data v8
+#define v_reverse_loaded_data v16
+#define v_index v16
+#define v_reverse_index v24
+
+#define srcin		a0
+#define chrin		a1
+#define cntin		a2
+#define result		a0
+
+#define	tmp_end		t1
+#define	loaded_data_start		t1
+
+#define VL a4
+
+#define cntrem		t2
+#define set_num    t3
+#define first_set_index    t4
+#define	loaded_data_max_index		t5
+#define	one		t6
+
+ENTRY (__memrchr_rvv)
+.option push
+.option arch, +v
+
+    mv cntrem, cntin
+    add tmp_end, srcin, cntrem
+    
+L(memrchr_loop):
+    blez cntrem, L(memrchr_nomatch)
+    vsetvli VL, cntrem, ELEM_SEW_8_SETTING, ELEM_LMUL_8_SETTING, ta, ma
+    sub loaded_data_start, tmp_end, VL
+    vle8.v v_loaded_data, (loaded_data_start)
+    sub cntrem, cntrem, VL
+    /*  Set v0[i] where v8[i] = a1 */
+    vmseq.vx v_seq_set_data, v_loaded_data, chrin 
+    /*  count the number of equal elements  */  
+    vcpop.m set_num, v_seq_set_data
+    beqz set_num, L(memrchr_loop)
+
+L(memrchr_found):
+    li one, 1
+    bgt set_num, one, L(memrchr_multi_found)
+    /* get the first equal element index */ 
+    vfirst.m first_set_index, v_seq_set_data
+    add result, loaded_data_start, first_set_index 
+    ret
+
+L(memrchr_multi_found):
+    /* index [0, 1, 2, 3, ...]*/
+    vid.v v_index 
+    addi loaded_data_max_index, VL, -1 
+    /* index [VL-1, VL-2, ..., 0] */
+    vrsub.vx v_reverse_index, v_index, loaded_data_max_index
+    /* reverse loaded data */ 
+    vrgather.vv v_reverse_loaded_data, v_loaded_data, v_reverse_index
+    /*  Set v0[i] where v8[i] = a1 */
+    vmseq.vx v_seq_set_data, v_reverse_loaded_data, chrin
+    /* get the first equal element index of reverse data*/ 
+    vfirst.m first_set_index, v_seq_set_data
+    /* calc the true index of data*/ 
+    sub first_set_index, loaded_data_max_index, first_set_index
+    add result, loaded_data_start, first_set_index 
+    ret
+    
+L(memrchr_nomatch):
+	mv	result, zero
+	ret    
+.option pop
+END (__memrchr_rvv)
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
index fcef5659d4..c59690f9ac 100644
--- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
@@ -3,7 +3,15 @@  sysdep_routines += \
   memcpy \
   memcpy-generic \
   memcpy_noalignment \
+  memrchr \
+  memrchr_generic \
   # sysdep_routines
 
+ifeq ($(have-gcc-riscv-rvv),yes)
+sysdep_routines += \
+  memrchr_rvv \
+  # rvv sysdep_routines
+endif
+
 CFLAGS-memcpy_noalignment.c += -mno-strict-align
 endif
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
index 1c1deca8f6..deb787a116 100644
--- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
@@ -19,6 +19,8 @@ 
 #include <ifunc-impl-list.h>
 #include <string.h>
 #include <sys/hwprobe.h>
+#include <ldsodefs.h>
+#include <asm/hwcap.h>
 
 size_t
 __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
@@ -27,6 +29,9 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   size_t i = max;
 
   bool fast_unaligned = false;
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT) 
+  bool rvv_ext = false;
+#endif
 
   struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
   if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
@@ -34,6 +39,18 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
           == RISCV_HWPROBE_MISALIGNED_FAST)
     fast_unaligned = true;
 
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT) 
+  if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V) 
+    rvv_ext = true;
+#endif
+    
+IFUNC_IMPL (i, name, memrchr,
+  #if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT) 
+          IFUNC_IMPL_ADD (array, i, memrchr, rvv_ext,
+            __memrchr_rvv)
+  #endif
+          IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_generic))
+
   IFUNC_IMPL (i, name, memcpy,
 	      IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
 			      __memcpy_noalignment)
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memrchr.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memrchr.c
new file mode 100644
index 0000000000..3502b27ac6
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memrchr.c
@@ -0,0 +1,70 @@ 
+/* Multiple versions of memrchr. 
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Define multiple versions only for the definition in libc.  */
+
+#if IS_IN (libc)
+# undef memrchr
+# define memrchr __redirect_memrchr
+# define __memrchr __redirect___memrchr
+
+# include <stdint.h>
+# include <string.h>
+# include <ifunc-init.h>
+# include <riscv-ifunc.h>
+# include <sys/hwprobe.h>
+# include <asm/hwcap.h>
+
+extern __typeof (__redirect_memrchr) ___memrchr;
+
+extern __typeof (__redirect_memrchr) __memrchr_generic attribute_hidden;
+extern __typeof (__redirect_memrchr) __memrchr_rvv attribute_hidden;
+static inline __typeof (__redirect_memrchr) *
+select_memrchr_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
+{
+
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT) 
+  if (dl_hwcap & COMPAT_HWCAP_ISA_V) 
+  {
+    return __memrchr_rvv;
+  }
+#endif
+
+
+  return __memrchr_generic;
+}
+
+riscv_libc_ifunc (___memrchr, select_memrchr_ifunc);
+
+
+# undef memrchr
+# undef __memrchr
+strong_alias (___memrchr, memrchr);
+strong_alias (___memrchr, __memrchr);
+
+# ifdef SHARED
+__hidden_ver1 (memrchr, __GI_memrchr, __redirect_memrchr)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memrchr);
+
+__hidden_ver1 (memrchr, __GI___memrchr, __redirect___memrchr)
+  __attribute__ ((visibility ("hidden"))) __attribute_copy__ (memrchr);
+# endif
+
+#else
+# include <string/memrchr.c>
+#endif