[v2,3/5] riscv: vectorized str* functions

Message ID 20230421073132.14241-1-hau.hsu@sifive.com
State Dropped
Headers
Series [v2,1/5] riscv: Enabling vectorized mem*/str* functions in build time |

Commit Message

Hau Hsu April 21, 2023, 7:31 a.m. UTC
  From: Jerry Shih <jerry.shih@sifive.com>

This patch proposes implementations of strcat, strcmp, strcpy, strlen,
strncat, strncmp and strncpy that leverage the RISC-V V extension (RVV),
version 1.0. These routines assumes VLEN is at least 32 bits, as is
required by all currently defined vector extensions, and they support
arbitrarily large VLEN. All implementations work for both RV32 and RV64
platforms, and make no assumptions about page size.
---
 sysdeps/riscv/rvv/strcat.S  | 72 ++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/strcmp.S  | 93 +++++++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/strcpy.S  | 56 ++++++++++++++++++++++
 sysdeps/riscv/rvv/strlen.S  | 54 +++++++++++++++++++++
 sysdeps/riscv/rvv/strncat.S | 83 +++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/strncmp.S | 85 +++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/strncpy.S | 86 ++++++++++++++++++++++++++++++++++
 7 files changed, 529 insertions(+)
 create mode 100644 sysdeps/riscv/rvv/strcat.S
 create mode 100644 sysdeps/riscv/rvv/strcmp.S
 create mode 100644 sysdeps/riscv/rvv/strcpy.S
 create mode 100644 sysdeps/riscv/rvv/strlen.S
 create mode 100644 sysdeps/riscv/rvv/strncat.S
 create mode 100644 sysdeps/riscv/rvv/strncmp.S
 create mode 100644 sysdeps/riscv/rvv/strncpy.S
  

Patch

diff --git a/sysdeps/riscv/rvv/strcat.S b/sysdeps/riscv/rvv/strcat.S
new file mode 100644
index 0000000000..8a7779fd3c
--- /dev/null
+++ b/sysdeps/riscv/rvv/strcat.S
@@ -0,0 +1,72 @@ 
+/* RVV versions strcat.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define pDstPtr a2
+
+#define iVL a3
+#define iCurrentVL a4
+#define iActiveElemPos a5
+
+#define ELEM_LMUL_SETTING m1
+#define vMask1 v0
+#define vMask2 v1
+#define vStr1 v8
+#define vStr2 v16
+
+ENTRY(strcat)
+
+    mv pDstPtr, pDst
+
+    /* Perform `strlen(dst)`.  */
+L(strlen_loop):
+    vsetvli iVL, zero, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vStr1, (pDstPtr)
+    vmseq.vx vMask1, vStr1, zero
+    csrr iCurrentVL, vl
+    vfirst.m iActiveElemPos, vMask1
+    add pDstPtr, pDstPtr, iCurrentVL
+    bltz iActiveElemPos, L(strlen_loop)
+
+    sub pDstPtr, pDstPtr, iCurrentVL
+    add pDstPtr, pDstPtr, iActiveElemPos
+
+    /* Perform `strcpy(dst, src)`.  */
+L(strcpy_loop):
+    vsetvli iVL, zero, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vStr1, (pSrc)
+    vmseq.vx vMask2, vStr1, zero
+    csrr iCurrentVL, vl
+    vfirst.m iActiveElemPos, vMask2
+    vmsif.m vMask1, vMask2
+    add pSrc, pSrc, iCurrentVL
+    vse8.v vStr1, (pDstPtr), vMask1.t
+    add pDstPtr, pDstPtr, iCurrentVL
+    bltz iActiveElemPos, L(strcpy_loop)
+
+    ret
+
+END(strcat)
+libc_hidden_builtin_def (strcat)
diff --git a/sysdeps/riscv/rvv/strcmp.S b/sysdeps/riscv/rvv/strcmp.S
new file mode 100644
index 0000000000..c5f525bbe9
--- /dev/null
+++ b/sysdeps/riscv/rvv/strcmp.S
@@ -0,0 +1,93 @@ 
+// Copyright (c) 2023 SiFive, Inc. -- Proprietary and Confidential All Rights
+// Reserved.
+//
+// NOTICE: All information contained herein is, and remains the property of
+// SiFive, Inc. The intellectual and technical concepts contained herein are
+// proprietary to SiFive, Inc. and may be covered by U.S. and Foreign Patents,
+// patents in process, and are protected by trade secret or copyright law.
+//
+// This work may not be copied, modified, re-published, uploaded, executed, or
+// distributed in any way, in any medium, whether in whole or in part, without
+// prior written permission from SiFive, Inc.
+//
+// The copyright notice above does not evidence any actual or intended
+// publication or disclosure of this source code, which includes information
+// that is confidential and/or proprietary, and is a trade secret, of SiFive,
+// Inc.
+//===----------------------------------------------------------------------===//
+
+// Contributed by: Jerry Shih <jerry.shih@sifive.com>
+
+// Prototype:
+// int strcmp(const char *lhs, const char *rhs)
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define iResult a0
+
+#define pStr1 a0
+#define pStr2 a1
+
+#define iVL a2
+#define iTemp1 a3
+#define iTemp2 a4
+
+#define vStr1 v0
+#define vStr2 v8
+#define vMask1 v16
+#define vMask2 v17
+
+ENTRY(strcmp)
+    // lmul=1
+
+L(Loop):
+    vsetvli iVL, zero, e8, m1, ta, ma
+    vle8ff.v vStr1, (pStr1)
+    // check if vStr1[i] == 0
+    vmseq.vx vMask1, vStr1, zero
+
+    vle8ff.v vStr2, (pStr2)
+    // check if vStr1[i] != vStr2[i]
+    vmsne.vv vMask2, vStr1, vStr2
+
+    // find the index x for vStr1[x]==0
+    vfirst.m iTemp1, vMask1
+    // find the index x for vStr1[x]!=vStr2[x]
+    vfirst.m iTemp2, vMask2
+
+    bgez iTemp1, L(check1)
+    bgez iTemp2, L(check2)
+
+    // get the current vl updated by vle8ff.
+    csrr iVL, vl
+    add pStr1, pStr1, iVL
+    add pStr2, pStr2, iVL
+    j L(Loop)
+
+    // iTemp1>=0
+L(check1):
+    bltz iTemp2, 1f
+    blt iTemp2, iTemp1, L(check2)
+1:
+    // iTemp2<0
+    // iTemp2>=0 && iTemp1<iTemp2
+    add pStr1, pStr1, iTemp1
+    add pStr2, pStr2, iTemp1
+    lbu iTemp1, 0(pStr1)
+    lbu iTemp2, 0(pStr2)
+    sub iResult, iTemp1, iTemp2
+    ret
+
+    // iTemp1<0
+    // iTemp2>=0
+L(check2):
+    add pStr1, pStr1, iTemp2
+    add pStr2, pStr2, iTemp2
+    lbu iTemp1, 0(pStr1)
+    lbu iTemp2, 0(pStr2)
+    sub iResult, iTemp1, iTemp2
+    ret
+
+END(strcmp)
+libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/riscv/rvv/strcpy.S b/sysdeps/riscv/rvv/strcpy.S
new file mode 100644
index 0000000000..8fb754ee23
--- /dev/null
+++ b/sysdeps/riscv/rvv/strcpy.S
@@ -0,0 +1,56 @@ 
+/* RVV versions strcpy.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define pDstPtr a2
+
+#define iVL a3
+#define iCurrentVL a4
+#define iActiveElemPos a5
+
+#define ELEM_LMUL_SETTING m1
+#define vMask1 v0
+#define vMask2 v1
+#define vStr1 v8
+#define vStr2 v16
+
+ENTRY(strcpy)
+
+    mv pDstPtr, pDst
+
+L(strcpy_loop):
+    vsetvli iVL, zero, e8, ELEM_LMUL_SETTING, ta, ma
+    vle8ff.v vStr1, (pSrc)
+    vmseq.vx vMask2, vStr1, zero
+    csrr iCurrentVL, vl
+    vfirst.m iActiveElemPos, vMask2
+    vmsif.m vMask1, vMask2
+    add pSrc, pSrc, iCurrentVL
+    vse8.v vStr1, (pDstPtr), vMask1.t
+    add pDstPtr, pDstPtr, iCurrentVL
+    bltz iActiveElemPos, L(strcpy_loop)
+
+    ret
+
+END(strcpy)
+libc_hidden_builtin_def (strcpy)
diff --git a/sysdeps/riscv/rvv/strlen.S b/sysdeps/riscv/rvv/strlen.S
new file mode 100644
index 0000000000..eb456b094b
--- /dev/null
+++ b/sysdeps/riscv/rvv/strlen.S
@@ -0,0 +1,54 @@ 
+/* RVV versions strlen.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define iResult a0
+#define pStr a0
+#define pCopyStr a1
+#define iVL a2
+#define iCurrentVL a2
+#define iEndOffset a3
+
+#define ELEM_LMUL_SETTING m2
+#define vStr v0
+#define vMaskEnd v2
+
+ENTRY(strlen)
+
+    mv pCopyStr, pStr
+L(loop):
+    vsetvli iVL, zero, e8, ELEM_LMUL_SETTING, ta, ma
+    vle8ff.v vStr, (pCopyStr)
+    csrr iCurrentVL, vl
+    vmseq.vi vMaskEnd, vStr, 0
+    vfirst.m iEndOffset, vMaskEnd
+    add pCopyStr, pCopyStr, iCurrentVL
+    bltz iEndOffset, L(loop)
+
+    add pStr, pStr, iCurrentVL
+    add pCopyStr, pCopyStr, iEndOffset
+    sub iResult, pCopyStr, iResult
+
+    ret
+
+END(strlen)
+
+libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/riscv/rvv/strncat.S b/sysdeps/riscv/rvv/strncat.S
new file mode 100644
index 0000000000..7847c4f008
--- /dev/null
+++ b/sysdeps/riscv/rvv/strncat.S
@@ -0,0 +1,83 @@ 
+/* RVV versions strncat.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iLength a2
+#define pDstPtr a3
+
+#define iVL a4
+#define iCurrentVL a5
+#define iActiveElemPos a6
+
+#define ELEM_LMUL_SETTING m1
+#define vMask1 v0
+#define vMask2 v1
+#define vStr1 v8
+#define vStr2 v16
+
+ENTRY(strncat)
+
+    mv pDstPtr, pDst
+
+    /* the strlen of dst.  */
+L(strlen_loop):
+    vsetvli iVL, zero, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vStr1, (pDstPtr)
+    /* find the '\0'.  */
+    vmseq.vx vMask1, vStr1, zero
+    csrr iCurrentVL, vl
+    vfirst.m iActiveElemPos, vMask1
+    add pDstPtr, pDstPtr, iCurrentVL
+    bltz iActiveElemPos, L(strlen_loop)
+
+    sub pDstPtr, pDstPtr, iCurrentVL
+    add pDstPtr, pDstPtr, iActiveElemPos
+
+    /* copy pSrc to pDstPtr.  */
+L(strcpy_loop):
+    vsetvli zero, iLength, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vStr1, (pSrc)
+    vmseq.vx vMask2, vStr1, zero
+    csrr iCurrentVL, vl
+    vfirst.m iActiveElemPos, vMask2
+    vmsif.m vMask1, vMask2
+    add pSrc, pSrc, iCurrentVL
+    sub iLength, iLength, iCurrentVL
+    vse8.v vStr1, (pDstPtr), vMask1.t
+    add pDstPtr, pDstPtr, iCurrentVL
+    beqz iLength, L(fill_zero)
+    bltz iActiveElemPos, L(strcpy_loop)
+
+    ret
+
+L(fill_zero):
+    bgez iActiveElemPos, L(fill_zero_end)
+    sb zero, (pDstPtr)
+
+L(fill_zero_end):
+    ret
+
+END(strncat)
+libc_hidden_builtin_def (strncat)
diff --git a/sysdeps/riscv/rvv/strncmp.S b/sysdeps/riscv/rvv/strncmp.S
new file mode 100644
index 0000000000..168dbb07ce
--- /dev/null
+++ b/sysdeps/riscv/rvv/strncmp.S
@@ -0,0 +1,85 @@ 
+/* RVV versions strncmp.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http:/*www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define iResult a0
+
+#define pStr1 a0
+#define pStr2 a1
+#define iLength a2
+
+#define iVL a3
+#define iTemp1 a4
+#define iTemp2 a5
+
+#define ELEM_LMUL_SETTING m1
+#define vStr1 v0
+#define vStr2 v4
+#define vMask1 v8
+#define vMask2 v9
+
+ENTRY(strncmp)
+
+    beqz iLength, L(zero_length)
+
+L(loop):
+    vsetvli zero, iLength, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vStr1, (pStr1)
+    /* vStr1[i] == 0.  */
+    vmseq.vx vMask1, vStr1, zero
+
+    vle8ff.v vStr2, (pStr2)
+    /* vStr1[i] != vStr2[i].  */
+    vmsne.vv vMask2, vStr1, vStr2
+
+    csrr iVL, vl
+
+    /* r = mask1 | mask2
+       We could use vfirst.m to get the first zero char or the
+       first different char between str1 and str2.  */
+    vmor.mm vMask1, vMask1, vMask2
+
+    sub iLength, iLength, iVL
+
+    vfirst.m iTemp1, vMask1
+
+    bgez iTemp1, L(end_loop)
+
+    add pStr1, pStr1, iVL
+    add pStr2, pStr2, iVL
+    bnez iLength, L(loop)
+L(end_loop):
+
+    add pStr1, pStr1, iTemp1
+    add pStr2, pStr2, iTemp1
+    lbu iTemp1, 0(pStr1)
+    lbu iTemp2, 0(pStr2)
+
+    sub iResult, iTemp1, iTemp2
+    ret
+
+L(zero_length):
+    li iResult, 0
+    ret
+
+END(strncmp)
+libc_hidden_builtin_def (strncmp)
diff --git a/sysdeps/riscv/rvv/strncpy.S b/sysdeps/riscv/rvv/strncpy.S
new file mode 100644
index 0000000000..e8d9450448
--- /dev/null
+++ b/sysdeps/riscv/rvv/strncpy.S
@@ -0,0 +1,86 @@ 
+/* RVV versions strncpy.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jerry Shih <jerry.shih@sifive.com>.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define pDst a0
+#define pSrc a1
+#define iLength a2
+#define pDstPtr a3
+
+#define iVL a4
+#define iCurrentVL a5
+#define iActiveElemPos a6
+#define iTemp a7
+
+#define ELEM_LMUL_SETTING m1
+#define vMask1 v0
+#define vMask2 v1
+#define ZERO_FILL_ELEM_LMUL_SETTING m8
+#define vStr1 v8
+#define vStr2 v16
+
+ENTRY(strncpy)
+
+    mv pDstPtr, pDst
+
+    /* Copy pSrc to pDstPtr.  */
+L(strcpy_loop):
+    vsetvli zero, iLength, e8, ELEM_LMUL_SETTING, ta, ma
+    vle8ff.v vStr1, (pSrc)
+    vmseq.vx vMask2, vStr1, zero
+    csrr iCurrentVL, vl
+    vfirst.m iActiveElemPos, vMask2
+    vmsif.m vMask1, vMask2
+    add pSrc, pSrc, iCurrentVL
+    sub iLength, iLength, iCurrentVL
+    vse8.v vStr1, (pDstPtr), vMask1.t
+    add pDstPtr, pDstPtr, iCurrentVL
+    bgez iActiveElemPos, L(fill_zero)
+    bnez iLength, L(strcpy_loop)
+    ret
+
+    /* Fill the tail zero.  */
+L(fill_zero):
+    /* We already copy the `\0` to dst. But we use `vfirst.m` to
+       get the `index` of `\0` position. We need to adjust `-1`
+       to get the correct remaining iLength for zero filling.  */
+    sub iTemp, iCurrentVL, iActiveElemPos
+    addi iTemp, iTemp, -1
+    add iLength, iLength, iTemp
+    /* Have an earily return for `strlen(src) + 1 == count` case.  */
+    bnez iLength, 1f
+    ret
+1:
+    sub pDstPtr, pDstPtr, iTemp
+    vsetvli zero, iLength, e8, ZERO_FILL_ELEM_LMUL_SETTING, ta, ma
+    vmv.v.x vStr2, zero
+
+L(fill_zero_loop):
+    vsetvli iVL, iLength, e8, ZERO_FILL_ELEM_LMUL_SETTING, ta, ma
+    vse8.v vStr2, (pDstPtr)
+    sub iLength, iLength, iVL
+    add pDstPtr, pDstPtr, iVL
+    bnez iLength, L(fill_zero_loop)
+
+    ret
+
+END(strncpy)
+libc_hidden_builtin_def (strncpy)