RISC-V: Add vectorized strlen.

Message ID 072e8569-e08b-4a22-adb5-64e888bd471b@gmail.com
State Committed
Headers
Series RISC-V: Add vectorized strlen. |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply
rivoscibot/toolchain-ci-rivos-lint warning Lint failed
rivoscibot/toolchain-ci-rivos-apply-patch fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply

Commit Message

Robin Dapp Dec. 1, 2023, 3:21 p.m. UTC
  Hi,

this patch implements a vectorized strlen by re-using and slightly
adjusting the rawmemchr implementation.  Rawmemchr returns the address
of the needle while strlen returns the difference between needle address
and start address.

As before, strlen expansion is guarded by -minline-strlen.

While testing with -minline-strlen I encountered a vsetvl problem in
memcpy-chk.c where we didn't insert a vsetvl at the proper spot (after
a setjmp).  This needs to be fixed separately and I figured I'd post
this patch as-is.

Regards
 Robin

gcc/ChangeLog:

	* config/riscv/riscv-protos.h (expand_rawmemchr): Add strlen
	parameter.
	* config/riscv/riscv-string.cc (riscv_expand_strlen): Call
	rawmemchr.
	(expand_rawmemchr): Add strlen handling.
	* config/riscv/riscv.md: Add TARGET_VECTOR to strlen expander.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/builtin/strlen-run.c: New test.
	* gcc.target/riscv/rvv/autovec/builtin/strlen.c: New test.
---
 gcc/config/riscv/riscv-protos.h               |  2 +-
 gcc/config/riscv/riscv-string.cc              | 41 ++++++++++++++-----
 gcc/config/riscv/riscv.md                     |  8 +---
 .../riscv/rvv/autovec/builtin/strlen-run.c    | 37 +++++++++++++++++
 .../riscv/rvv/autovec/builtin/strlen.c        | 12 ++++++
 5 files changed, 83 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c
  

Comments

juzhe.zhong@rivai.ai Dec. 1, 2023, 10:58 p.m. UTC | #1
LGTM.



juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2023-12-01 23:21
To: gcc-patches; palmer; Kito Cheng; jeffreyalaw; juzhe.zhong@rivai.ai
CC: rdapp.gcc
Subject: [PATCH] RISC-V: Add vectorized strlen.
Hi,
 
this patch implements a vectorized strlen by re-using and slightly
adjusting the rawmemchr implementation.  Rawmemchr returns the address
of the needle while strlen returns the difference between needle address
and start address.
 
As before, strlen expansion is guarded by -minline-strlen.
 
While testing with -minline-strlen I encountered a vsetvl problem in
memcpy-chk.c where we didn't insert a vsetvl at the proper spot (after
a setjmp).  This needs to be fixed separately and I figured I'd post
this patch as-is.
 
Regards
Robin
 
gcc/ChangeLog:
 
* config/riscv/riscv-protos.h (expand_rawmemchr): Add strlen
parameter.
* config/riscv/riscv-string.cc (riscv_expand_strlen): Call
rawmemchr.
(expand_rawmemchr): Add strlen handling.
* config/riscv/riscv.md: Add TARGET_VECTOR to strlen expander.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/builtin/strlen-run.c: New test.
* gcc.target/riscv/rvv/autovec/builtin/strlen.c: New test.
---
gcc/config/riscv/riscv-protos.h               |  2 +-
gcc/config/riscv/riscv-string.cc              | 41 ++++++++++++++-----
gcc/config/riscv/riscv.md                     |  8 +---
.../riscv/rvv/autovec/builtin/strlen-run.c    | 37 +++++++++++++++++
.../riscv/rvv/autovec/builtin/strlen.c        | 12 ++++++
5 files changed, 83 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 695ee24ad6f..c94c82a9973 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -557,7 +557,7 @@ void expand_cond_unop (unsigned, rtx *);
void expand_cond_binop (unsigned, rtx *);
void expand_cond_ternop (unsigned, rtx *);
void expand_popcount (rtx *);
-void expand_rawmemchr (machine_mode, rtx, rtx, rtx);
+void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false);
void emit_vec_extract (rtx, rtx, poly_int64);
/* Rounding mode bitfield for fixed point VXRM.  */
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 594ff49fc5a..6cde1bf89a0 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -588,9 +588,16 @@ riscv_expand_strlen_scalar (rtx result, rtx src, rtx align)
bool
riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
{
+  if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR)
+    {
+      riscv_vector::expand_rawmemchr (E_QImode, result, src, search_char,
+       /* strlen */ true);
+      return true;
+    }
+
   gcc_assert (search_char == const0_rtx);
-  if (TARGET_ZBB || TARGET_XTHEADBB)
+  if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR)
     return riscv_expand_strlen_scalar (result, src, align);
   return false;
@@ -979,12 +986,13 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
}
-/* Implement rawmemchr<mode> using vector instructions.
+/* Implement rawmemchr<mode> and strlen using vector instructions.
    It can be assumed that the needle is in the haystack, otherwise the
    behavior is undefined.  */
void
-expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
+expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle,
+   bool strlen)
{
   /*
     rawmemchr:
@@ -1005,6 +1013,9 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
   */
   gcc_assert (TARGET_VECTOR);
+  if (strlen)
+    gcc_assert (mode == E_QImode);
+
   unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
   int lmul = TARGET_MAX_LMUL;
   poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
@@ -1028,12 +1039,13 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
      return a pointer to the matching byte.  */
   unsigned int shift = exact_log2 (GET_MODE_SIZE (mode).to_constant ());
-  rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
+  rtx src_addr = copy_addr_to_reg (XEXP (haystack, 0));
+  rtx start_addr = copy_addr_to_reg (XEXP (haystack, 0));
   rtx loop = gen_label_rtx ();
   emit_label (loop);
-  rtx vsrc = change_address (src, vmode, src_addr);
+  rtx vsrc = change_address (haystack, vmode, src_addr);
   /* Bump the pointer.  */
   rtx step = gen_reg_rtx (Pmode);
@@ -1052,8 +1064,8 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
     emit_insn (gen_read_vldi_zero_extend (cnt));
   /* Compare needle with haystack and store in a mask.  */
-  rtx eq = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, pat), vec);
-  rtx vmsops[] = {mask, eq, vec, pat};
+  rtx eq = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, needle), vec);
+  rtx vmsops[] = {mask, eq, vec, needle};
   emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode),
      riscv_vector::COMPARE_OP, vmsops, cnt);
@@ -1066,9 +1078,18 @@ expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
   rtx test = gen_rtx_LT (VOIDmode, end, const0_rtx);
   emit_jump_insn (gen_cbranch4 (Pmode, test, end, const0_rtx, loop));
-  /*  We found something at SRC + END * [1,2,4,8].  */
-  emit_insn (gen_rtx_SET (end, gen_rtx_ASHIFT (Pmode, end, GEN_INT (shift))));
-  emit_insn (gen_rtx_SET (dst, gen_rtx_PLUS (Pmode, src_addr, end)));
+  if (strlen)
+    {
+      /* For strlen, return the length.  */
+      emit_insn (gen_rtx_SET (dst, gen_rtx_PLUS (Pmode, src_addr, end)));
+      emit_insn (gen_rtx_SET (dst, gen_rtx_MINUS (Pmode, dst, start_addr)));
+    }
+  else
+    {
+      /*  For rawmemchr, return the position at SRC + END * [1,2,4,8].  */
+      emit_insn (gen_rtx_SET (end, gen_rtx_ASHIFT (Pmode, end, GEN_INT (shift))));
+      emit_insn (gen_rtx_SET (dst, gen_rtx_PLUS (Pmode, src_addr, end)));
+    }
}
}
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 6056391c6dc..54015eed57c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3747,13 +3747,9 @@ (define_expand "strlen<mode>"
     (match_operand:SI 2 "const_int_operand")
     (match_operand:SI 3 "const_int_operand")]
  UNSPEC_STRLEN))]
-  "riscv_inline_strlen && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+  "riscv_inline_strlen && !optimize_size
+    && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
{
-  rtx search_char = operands[2];
-
-  if (search_char != const0_rtx)
-    FAIL;
-
   if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
     DONE;
   else
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c
new file mode 100644
index 00000000000..d29297a5f86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strlen" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+int
+__attribute__ ((noipa))
+foo2 (const char *s)
+{
+  int n = 0;
+  while (*s++ != '\0')
+    {
+      asm volatile ("");
+      n++;
+    }
+  return n;
+}
+
+#define SZ 10
+
+int main ()
+{
+  const char *s[SZ]
+    = {"",  "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+       "a", "z",    "1", "9",  "12345678901234567889012345678901234567890"};
+
+  for (int i = 0; i < SZ; i++)
+    {
+      if (foo (s[i]) != foo2 (s[i]))
+        __builtin_abort ();
+    }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c
new file mode 100644
index 00000000000..0c6cca63ebf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strlen" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 1 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */
-- 
2.43.0
  
Robin Dapp Dec. 8, 2023, 1:19 p.m. UTC | #2
After Juzhe's vsetvl fix earlier this week this seems safe to push.
Going to do so later.

I tested on rv64gcv_zvl128b with -minline-strlen and didn't see
regressions apart from zbb-strlen-disabled-2.c which will always
fail with -minline-strlen because it expects -mno-inline-strlen.

Regards
 Robin
  

Patch

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 695ee24ad6f..c94c82a9973 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -557,7 +557,7 @@  void expand_cond_unop (unsigned, rtx *);
 void expand_cond_binop (unsigned, rtx *);
 void expand_cond_ternop (unsigned, rtx *);
 void expand_popcount (rtx *);
-void expand_rawmemchr (machine_mode, rtx, rtx, rtx);
+void expand_rawmemchr (machine_mode, rtx, rtx, rtx, bool = false);
 void emit_vec_extract (rtx, rtx, poly_int64);
 
 /* Rounding mode bitfield for fixed point VXRM.  */
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 594ff49fc5a..6cde1bf89a0 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -588,9 +588,16 @@  riscv_expand_strlen_scalar (rtx result, rtx src, rtx align)
 bool
 riscv_expand_strlen (rtx result, rtx src, rtx search_char, rtx align)
 {
+  if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR)
+    {
+      riscv_vector::expand_rawmemchr (E_QImode, result, src, search_char,
+				      /* strlen */ true);
+      return true;
+    }
+
   gcc_assert (search_char == const0_rtx);
 
-  if (TARGET_ZBB || TARGET_XTHEADBB)
+  if ((TARGET_ZBB || TARGET_XTHEADBB) && stringop_strategy & STRATEGY_SCALAR)
     return riscv_expand_strlen_scalar (result, src, align);
 
   return false;
@@ -979,12 +986,13 @@  expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 }
 
 
-/* Implement rawmemchr<mode> using vector instructions.
+/* Implement rawmemchr<mode> and strlen using vector instructions.
    It can be assumed that the needle is in the haystack, otherwise the
    behavior is undefined.  */
 
 void
-expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
+expand_rawmemchr (machine_mode mode, rtx dst, rtx haystack, rtx needle,
+		  bool strlen)
 {
   /*
     rawmemchr:
@@ -1005,6 +1013,9 @@  expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
   */
   gcc_assert (TARGET_VECTOR);
 
+  if (strlen)
+    gcc_assert (mode == E_QImode);
+
   unsigned int isize = GET_MODE_SIZE (mode).to_constant ();
   int lmul = TARGET_MAX_LMUL;
   poly_int64 nunits = exact_div (BYTES_PER_RISCV_VECTOR * lmul, isize);
@@ -1028,12 +1039,13 @@  expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
      return a pointer to the matching byte.  */
   unsigned int shift = exact_log2 (GET_MODE_SIZE (mode).to_constant ());
 
-  rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
+  rtx src_addr = copy_addr_to_reg (XEXP (haystack, 0));
+  rtx start_addr = copy_addr_to_reg (XEXP (haystack, 0));
 
   rtx loop = gen_label_rtx ();
   emit_label (loop);
 
-  rtx vsrc = change_address (src, vmode, src_addr);
+  rtx vsrc = change_address (haystack, vmode, src_addr);
 
   /* Bump the pointer.  */
   rtx step = gen_reg_rtx (Pmode);
@@ -1052,8 +1064,8 @@  expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
     emit_insn (gen_read_vldi_zero_extend (cnt));
 
   /* Compare needle with haystack and store in a mask.  */
-  rtx eq = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, pat), vec);
-  rtx vmsops[] = {mask, eq, vec, pat};
+  rtx eq = gen_rtx_EQ (mask_mode, gen_const_vec_duplicate (vmode, needle), vec);
+  rtx vmsops[] = {mask, eq, vec, needle};
   emit_nonvlmax_insn (code_for_pred_eqne_scalar (vmode),
 		      riscv_vector::COMPARE_OP, vmsops, cnt);
 
@@ -1066,9 +1078,18 @@  expand_rawmemchr (machine_mode mode, rtx dst, rtx src, rtx pat)
   rtx test = gen_rtx_LT (VOIDmode, end, const0_rtx);
   emit_jump_insn (gen_cbranch4 (Pmode, test, end, const0_rtx, loop));
 
-  /*  We found something at SRC + END * [1,2,4,8].  */
-  emit_insn (gen_rtx_SET (end, gen_rtx_ASHIFT (Pmode, end, GEN_INT (shift))));
-  emit_insn (gen_rtx_SET (dst, gen_rtx_PLUS (Pmode, src_addr, end)));
+  if (strlen)
+    {
+      /* For strlen, return the length.  */
+      emit_insn (gen_rtx_SET (dst, gen_rtx_PLUS (Pmode, src_addr, end)));
+      emit_insn (gen_rtx_SET (dst, gen_rtx_MINUS (Pmode, dst, start_addr)));
+    }
+  else
+    {
+      /*  For rawmemchr, return the position at SRC + END * [1,2,4,8].  */
+      emit_insn (gen_rtx_SET (end, gen_rtx_ASHIFT (Pmode, end, GEN_INT (shift))));
+      emit_insn (gen_rtx_SET (dst, gen_rtx_PLUS (Pmode, src_addr, end)));
+    }
 }
 
 }
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 6056391c6dc..54015eed57c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3747,13 +3747,9 @@  (define_expand "strlen<mode>"
 		     (match_operand:SI 2 "const_int_operand")
 		     (match_operand:SI 3 "const_int_operand")]
 		  UNSPEC_STRLEN))]
-  "riscv_inline_strlen && !optimize_size && (TARGET_ZBB || TARGET_XTHEADBB)"
+  "riscv_inline_strlen && !optimize_size
+    && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_VECTOR)"
 {
-  rtx search_char = operands[2];
-
-  if (search_char != const0_rtx)
-    FAIL;
-
   if (riscv_expand_strlen (operands[0], operands[1], operands[2], operands[3]))
     DONE;
   else
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c
new file mode 100644
index 00000000000..d29297a5f86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen-run.c
@@ -0,0 +1,37 @@ 
+/* { dg-do run } */
+/* { dg-additional-options "-O3 -minline-strlen" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+int
+__attribute__ ((noipa))
+foo2 (const char *s)
+{
+  int n = 0;
+  while (*s++ != '\0')
+    {
+      asm volatile ("");
+      n++;
+    }
+  return n;
+}
+
+#define SZ 10
+
+int main ()
+{
+  const char *s[SZ]
+    = {"",  "asdf", "0", "\0", "!@#$%***m1123fdnmoi43",
+       "a", "z",    "1", "9",  "12345678901234567889012345678901234567890"};
+
+  for (int i = 0; i < SZ; i++)
+    {
+      if (foo (s[i]) != foo2 (s[i]))
+        __builtin_abort ();
+    }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c
new file mode 100644
index 00000000000..0c6cca63ebf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/builtin/strlen.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile { target { riscv_v } } } */
+/* { dg-additional-options "-O3 -minline-strlen" } */
+
+int
+__attribute__ ((noipa))
+foo (const char *s)
+{
+  return __builtin_strlen (s);
+}
+
+/* { dg-final { scan-assembler-times "vle8ff" 1 } } */
+/* { dg-final { scan-assembler-times "vfirst.m" 1 } } */