[v3,5/5] riscv: vectorized __memcmpeq function

Commit Message

Hau Hsu May 4, 2023, 7:48 a.m. UTC
  From: Yun Hsiang <yun.hsiang@sifive.com>

This patch proposes implementations of __memcmpeq that leverage the
RISC-V V extension (RVV), version 1.0. These routines assumes VLEN is at
least 32 bits, as is required by all currently defined vector
extensions, and they support arbitrarily large VLEN. All implementations
work for both RV32 and RV64 platforms, and make no assumptions about
page size.
 sysdeps/riscv/rvv/memcmp.S   |  4 ---
 sysdeps/riscv/rvv/memcmpeq.S | 67 ++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 4 deletions(-)
 create mode 100644 sysdeps/riscv/rvv/memcmpeq.S


diff --git a/sysdeps/riscv/rvv/memcmp.S b/sysdeps/riscv/rvv/memcmp.S
index fbf81acc2f..eeec2cae6a 100644
--- a/sysdeps/riscv/rvv/memcmp.S
+++ b/sysdeps/riscv/rvv/memcmp.S
@@ -68,7 +68,3 @@  L(found):
 libc_hidden_builtin_def (memcmp)
-weak_alias (memcmp,bcmp)
-strong_alias (memcmp, __memcmpeq)
-libc_hidden_def (__memcmpeq)
diff --git a/sysdeps/riscv/rvv/memcmpeq.S b/sysdeps/riscv/rvv/memcmpeq.S
new file mode 100644
index 0000000000..5820af69d7
--- /dev/null
+++ b/sysdeps/riscv/rvv/memcmpeq.S
@@ -0,0 +1,67 @@ 
+/* RVV versions memcmp.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   Lesser General Public License for more details.
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+#include <sysdep.h>
+#include <sys/asm.h>
+#define result a0
+#define src1 a0
+#define src2 a1
+#define num a2
+#define ivl a3
+#define temp a4
+#define vdata1 v0
+#define vdata2 v8
+#define vmask v16
+    vsetvli ivl, num, e8, ELEM_LMUL_SETTING, ta, ma
+    vle8.v vdata1, (src1)
+    vle8.v vdata2, (src2)
+    vmsne.vv vmask, vdata1, vdata2
+    sub num, num, ivl
+    vfirst.m temp, vmask
+    /* Skip the loop if we find the different value between src1 and src2. */
+    bgez temp, L(found)
+    add src1, src1, ivl
+    add src2, src2, ivl
+    bnez num, L(loop)
+    li result, 0
+    ret
+    mv result, ivl
+    ret
+weak_alias (__memcmpeq, bcmp)
+libc_hidden_def (__memcmpeq)