[v3,3/5] riscv: vectorized str* functions

Message ID 20230504074851.38763-4-hau.hsu@sifive.com
State Changes Requested, archived
Delegated to: Palmer Dabbelt
Headers
Series riscv: Vectorized mem*/str* function |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Hau Hsu May 4, 2023, 7:48 a.m. UTC
  From: Jerry Shih <jerry.shih@sifive.com>

This patch proposes implementations of strcat, strcmp, strcpy, strlen,
strncat, strncmp and strncpy that leverage the RISC-V V extension (RVV),
version 1.0. These routines assumes VLEN is at least 32 bits, as is
required by all currently defined vector extensions, and they support
arbitrarily large VLEN. All implementations work for both RV32 and RV64
platforms, and make no assumptions about page size.
---
 sysdeps/riscv/rvv/strcat.S  | 71 ++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/strcmp.S  | 88 +++++++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/strcpy.S  | 55 +++++++++++++++++++++++
 sysdeps/riscv/rvv/strlen.S  | 53 ++++++++++++++++++++++
 sysdeps/riscv/rvv/strncat.S | 82 ++++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/strncmp.S | 84 +++++++++++++++++++++++++++++++++++
 sysdeps/riscv/rvv/strncpy.S | 85 +++++++++++++++++++++++++++++++++++
 7 files changed, 518 insertions(+)
 create mode 100644 sysdeps/riscv/rvv/strcat.S
 create mode 100644 sysdeps/riscv/rvv/strcmp.S
 create mode 100644 sysdeps/riscv/rvv/strcpy.S
 create mode 100644 sysdeps/riscv/rvv/strlen.S
 create mode 100644 sysdeps/riscv/rvv/strncat.S
 create mode 100644 sysdeps/riscv/rvv/strncmp.S
 create mode 100644 sysdeps/riscv/rvv/strncpy.S
  

Patch

diff --git a/sysdeps/riscv/rvv/strcat.S b/sysdeps/riscv/rvv/strcat.S
new file mode 100644
index 0000000000..fb5858fa82
--- /dev/null
+++ b/sysdeps/riscv/rvv/strcat.S
@@ -0,0 +1,71 @@ 
+/* RVV versions strcat.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define dst a0
+#define src a1
+#define dst_ptr a2
+
+#define ivl a3
+#define cur_vl a4
+#define active_elem_pos a5
+
+#define ELEM_LMUL_SETTING m1
+#define vmask1 v0
+#define vmask2 v1
+#define vstr1 v8
+#define vstr2 v16
+
+ENTRY(strcat)
+
+    mv dst_ptr, dst
+
+    /* Perform `strlen(dst)`.  */
+L(strlen_loop):
+    vsetvli ivl, zero, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vstr1, (dst_ptr)
+    vmseq.vx vmask1, vstr1, zero
+    csrr cur_vl, vl
+    vfirst.m active_elem_pos, vmask1
+    add dst_ptr, dst_ptr, cur_vl
+    bltz active_elem_pos, L(strlen_loop)
+
+    sub dst_ptr, dst_ptr, cur_vl
+    add dst_ptr, dst_ptr, active_elem_pos
+
+    /* Perform `strcpy(dst, src)`.  */
+L(strcpy_loop):
+    vsetvli ivl, zero, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vstr1, (src)
+    vmseq.vx vmask2, vstr1, zero
+    csrr cur_vl, vl
+    vfirst.m active_elem_pos, vmask2
+    vmsif.m vmask1, vmask2
+    add src, src, cur_vl
+    vse8.v vstr1, (dst_ptr), vmask1.t
+    add dst_ptr, dst_ptr, cur_vl
+    bltz active_elem_pos, L(strcpy_loop)
+
+    ret
+
+END(strcat)
+libc_hidden_builtin_def (strcat)
diff --git a/sysdeps/riscv/rvv/strcmp.S b/sysdeps/riscv/rvv/strcmp.S
new file mode 100644
index 0000000000..2e60d76dc8
--- /dev/null
+++ b/sysdeps/riscv/rvv/strcmp.S
@@ -0,0 +1,88 @@ 
+/* RVV versions strcmp.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define result a0
+
+#define str1 a0
+#define str2 a1
+
+#define ivl a2
+#define temp1 a3
+#define temp2 a4
+
+#define vstr1 v0
+#define vstr2 v8
+#define vmask1 v16
+#define vmask2 v17
+
+ENTRY(strcmp)
+    /* lmul=1 */
+
+L(Loop):
+    vsetvli ivl, zero, e8, m1, ta, ma
+    vle8ff.v vstr1, (str1)
+    /* check if vstr1[i] == 0 */
+    vmseq.vx vmask1, vstr1, zero
+
+    vle8ff.v vstr2, (str2)
+    /* check if vstr1[i] != vstr2[i] */
+    vmsne.vv vmask2, vstr1, vstr2
+
+    /* find the index x for vstr1[x]==0 */
+    vfirst.m temp1, vmask1
+    /* find the index x for vstr1[x]!=vstr2[x] */
+    vfirst.m temp2, vmask2
+
+    bgez temp1, L(check1)
+    bgez temp2, L(check2)
+
+    /* get the current vl updated by vle8ff. */
+    csrr ivl, vl
+    add str1, str1, ivl
+    add str2, str2, ivl
+    j L(Loop)
+
+    /* temp1>=0 */
+L(check1):
+    bltz temp2, 1f
+    blt temp2, temp1, L(check2)
+1:
+    /* temp2<0 */
+    /* temp2>=0 && temp1<temp2 */
+    add str1, str1, temp1
+    add str2, str2, temp1
+    lbu temp1, 0(str1)
+    lbu temp2, 0(str2)
+    sub result, temp1, temp2
+    ret
+
+    /* temp1<0 */
+    /* temp2>=0 */
+L(check2):
+    add str1, str1, temp2
+    add str2, str2, temp2
+    lbu temp1, 0(str1)
+    lbu temp2, 0(str2)
+    sub result, temp1, temp2
+    ret
+
+END(strcmp)
+libc_hidden_builtin_def (strcmp)
diff --git a/sysdeps/riscv/rvv/strcpy.S b/sysdeps/riscv/rvv/strcpy.S
new file mode 100644
index 0000000000..1ad433f5f3
--- /dev/null
+++ b/sysdeps/riscv/rvv/strcpy.S
@@ -0,0 +1,55 @@ 
+/* RVV versions strcpy.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define dst a0
+#define src a1
+#define dst_ptr a2
+
+#define ivl a3
+#define cur_vl a4
+#define active_elem_pos a5
+
+#define ELEM_LMUL_SETTING m1
+#define vmask1 v0
+#define vmask2 v1
+#define vstr1 v8
+#define vstr2 v16
+
+ENTRY(strcpy)
+
+    mv dst_ptr, dst
+
+L(strcpy_loop):
+    vsetvli ivl, zero, e8, ELEM_LMUL_SETTING, ta, ma
+    vle8ff.v vstr1, (src)
+    vmseq.vx vmask2, vstr1, zero
+    csrr cur_vl, vl
+    vfirst.m active_elem_pos, vmask2
+    vmsif.m vmask1, vmask2
+    add src, src, cur_vl
+    vse8.v vstr1, (dst_ptr), vmask1.t
+    add dst_ptr, dst_ptr, cur_vl
+    bltz active_elem_pos, L(strcpy_loop)
+
+    ret
+
+END(strcpy)
+libc_hidden_builtin_def (strcpy)
diff --git a/sysdeps/riscv/rvv/strlen.S b/sysdeps/riscv/rvv/strlen.S
new file mode 100644
index 0000000000..cf3698f52a
--- /dev/null
+++ b/sysdeps/riscv/rvv/strlen.S
@@ -0,0 +1,53 @@ 
+/* RVV versions strlen.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define result a0
+#define str a0
+#define copy_str a1
+#define ivl a2
+#define cur_vl a2
+#define end_offset a3
+
+#define ELEM_LMUL_SETTING m2
+#define vstr v0
+#define vmask_end v2
+
+ENTRY(strlen)
+
+    mv copy_str, str
+L(loop):
+    vsetvli ivl, zero, e8, ELEM_LMUL_SETTING, ta, ma
+    vle8ff.v vstr, (copy_str)
+    csrr cur_vl, vl
+    vmseq.vi vmask_end, vstr, 0
+    vfirst.m end_offset, vmask_end
+    add copy_str, copy_str, cur_vl
+    bltz end_offset, L(loop)
+
+    add str, str, cur_vl
+    add copy_str, copy_str, end_offset
+    sub result, copy_str, result
+
+    ret
+
+END(strlen)
+
+libc_hidden_builtin_def (strlen)
diff --git a/sysdeps/riscv/rvv/strncat.S b/sysdeps/riscv/rvv/strncat.S
new file mode 100644
index 0000000000..d30a6533a3
--- /dev/null
+++ b/sysdeps/riscv/rvv/strncat.S
@@ -0,0 +1,82 @@ 
+/* RVV versions strncat.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define dst a0
+#define src a1
+#define length a2
+#define dst_ptr a3
+
+#define ivl a4
+#define cur_vl a5
+#define activate_elem_pos a6
+
+#define ELEM_LMUL_SETTING m1
+#define vmask1 v0
+#define vmask2 v1
+#define vstr1 v8
+#define vstr2 v16
+
+ENTRY(strncat)
+
+    mv dst_ptr, dst
+
+    /* the strlen of dst.  */
+L(strlen_loop):
+    vsetvli ivl, zero, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vstr1, (dst_ptr)
+    /* find the '\0'.  */
+    vmseq.vx vmask1, vstr1, zero
+    csrr cur_vl, vl
+    vfirst.m activate_elem_pos, vmask1
+    add dst_ptr, dst_ptr, cur_vl
+    bltz activate_elem_pos, L(strlen_loop)
+
+    sub dst_ptr, dst_ptr, cur_vl
+    add dst_ptr, dst_ptr, activate_elem_pos
+
+    /* copy src to dst_ptr.  */
+L(strcpy_loop):
+    vsetvli zero, length, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vstr1, (src)
+    vmseq.vx vmask2, vstr1, zero
+    csrr cur_vl, vl
+    vfirst.m activate_elem_pos, vmask2
+    vmsif.m vmask1, vmask2
+    add src, src, cur_vl
+    sub length, length, cur_vl
+    vse8.v vstr1, (dst_ptr), vmask1.t
+    add dst_ptr, dst_ptr, cur_vl
+    beqz length, L(fill_zero)
+    bltz activate_elem_pos, L(strcpy_loop)
+
+    ret
+
+L(fill_zero):
+    bgez activate_elem_pos, L(fill_zero_end)
+    sb zero, (dst_ptr)
+
+L(fill_zero_end):
+    ret
+
+END(strncat)
+libc_hidden_builtin_def (strncat)
diff --git a/sysdeps/riscv/rvv/strncmp.S b/sysdeps/riscv/rvv/strncmp.S
new file mode 100644
index 0000000000..2b6ab1f233
--- /dev/null
+++ b/sysdeps/riscv/rvv/strncmp.S
@@ -0,0 +1,84 @@ 
+/* RVV versions strncmp.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http:/*www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define result a0
+
+#define str1 a0
+#define str2 a1
+#define length a2
+
+#define ivl a3
+#define temp1 a4
+#define temp2 a5
+
+#define ELEM_LMUL_SETTING m1
+#define vstr1 v0
+#define vstr2 v4
+#define vmask1 v8
+#define vmask2 v9
+
+ENTRY(strncmp)
+
+    beqz length, L(zero_length)
+
+L(loop):
+    vsetvli zero, length, e8, ELEM_LMUL_SETTING, ta, ma
+
+    vle8ff.v vstr1, (str1)
+    /* vstr1[i] == 0.  */
+    vmseq.vx vmask1, vstr1, zero
+
+    vle8ff.v vstr2, (str2)
+    /* vstr1[i] != vstr2[i].  */
+    vmsne.vv vmask2, vstr1, vstr2
+
+    csrr ivl, vl
+
+    /* r = mask1 | mask2
+       We could use vfirst.m to get the first zero char or the
+       first different char between str1 and str2.  */
+    vmor.mm vmask1, vmask1, vmask2
+
+    sub length, length, ivl
+
+    vfirst.m temp1, vmask1
+
+    bgez temp1, L(end_loop)
+
+    add str1, str1, ivl
+    add str2, str2, ivl
+    bnez length, L(loop)
+L(end_loop):
+
+    add str1, str1, temp1
+    add str2, str2, temp1
+    lbu temp1, 0(str1)
+    lbu temp2, 0(str2)
+
+    sub result, temp1, temp2
+    ret
+
+L(zero_length):
+    li result, 0
+    ret
+
+END(strncmp)
+libc_hidden_builtin_def (strncmp)
diff --git a/sysdeps/riscv/rvv/strncpy.S b/sysdeps/riscv/rvv/strncpy.S
new file mode 100644
index 0000000000..53fb8cdec7
--- /dev/null
+++ b/sysdeps/riscv/rvv/strncpy.S
@@ -0,0 +1,85 @@ 
+/* RVV versions strncpy.  RISC-V version.
+   Copyright (C) 2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+#define dst a0
+#define src a1
+#define length a2
+#define dst_ptr a3
+
+#define ivl a4
+#define cur_vl a5
+#define active_elem_pos a6
+#define temp a7
+
+#define ELEM_LMUL_SETTING m1
+#define vmask1 v0
+#define vmask2 v1
+#define ZERO_FILL_ELEM_LMUL_SETTING m8
+#define vstr1 v8
+#define vstr2 v16
+
+ENTRY(strncpy)
+
+    mv dst_ptr, dst
+
+    /* Copy src to dst_ptr.  */
+L(strcpy_loop):
+    vsetvli zero, length, e8, ELEM_LMUL_SETTING, ta, ma
+    vle8ff.v vstr1, (src)
+    vmseq.vx vmask2, vstr1, zero
+    csrr cur_vl, vl
+    vfirst.m active_elem_pos, vmask2
+    vmsif.m vmask1, vmask2
+    add src, src, cur_vl
+    sub length, length, cur_vl
+    vse8.v vstr1, (dst_ptr), vmask1.t
+    add dst_ptr, dst_ptr, cur_vl
+    bgez active_elem_pos, L(fill_zero)
+    bnez length, L(strcpy_loop)
+    ret
+
+    /* Fill the tail zero.  */
+L(fill_zero):
+    /* We already copy the `\0` to dst. But we use `vfirst.m` to
+       get the `index` of `\0` position. We need to adjust `-1`
+       to get the correct remaining length for zero filling.  */
+    sub temp, cur_vl, active_elem_pos
+    addi temp, temp, -1
+    add length, length, temp
+    /* Have an earily return for `strlen(src) + 1 == count` case.  */
+    bnez length, 1f
+    ret
+1:
+    sub dst_ptr, dst_ptr, temp
+    vsetvli zero, length, e8, ZERO_FILL_ELEM_LMUL_SETTING, ta, ma
+    vmv.v.x vstr2, zero
+
+L(fill_zero_loop):
+    vsetvli ivl, length, e8, ZERO_FILL_ELEM_LMUL_SETTING, ta, ma
+    vse8.v vstr2, (dst_ptr)
+    sub length, length, ivl
+    add dst_ptr, dst_ptr, ivl
+    bnez length, L(fill_zero_loop)
+
+    ret
+
+END(strncpy)
+libc_hidden_builtin_def (strncpy)