[4/6] riscv: add vectorized mempcpy

Message ID 20260513153835.213249-5-pincheng.plct@isrc.iscas.ac.cn
State New
Headers
Series riscv: add vectorized mem* routines |

Commit Message

Pincheng Wang May 13, 2026, 3:38 p.m. UTC
  The vector implementation uses m8 register grouping and processes data in
vector-length chunks, providing significant performance improvements on
RVV-capable hardware.  Use conditional compilation to fall back to the
generic implementation when __riscv_vector is not available, maintaining
compatibility with non-vector RISC-V systems.

Signed-off-by: Pincheng Wang <pincheng.plct@isrc.iscas.ac.cn>
---
 newlib/libc/machine/riscv/Makefile.inc  |  2 +
 newlib/libc/machine/riscv/mempcpy-asm.S | 57 +++++++++++++++++++++++++
 newlib/libc/machine/riscv/mempcpy.c     |  5 +++
 3 files changed, 64 insertions(+)
 create mode 100644 newlib/libc/machine/riscv/mempcpy-asm.S
 create mode 100644 newlib/libc/machine/riscv/mempcpy.c
  

Patch

diff --git a/newlib/libc/machine/riscv/Makefile.inc b/newlib/libc/machine/riscv/Makefile.inc
index fbf87b3db..14134d7ba 100644
--- a/newlib/libc/machine/riscv/Makefile.inc
+++ b/newlib/libc/machine/riscv/Makefile.inc
@@ -11,6 +11,8 @@  libc_a_SOURCES += \
         %D%/memcpy.c \
         %D%/memmove-asm.S \
         %D%/memmove.c \
+        %D%/mempcpy-asm.S \
+        %D%/mempcpy.c \
         %D%/memrchr.c \
         %D%/memset.S \
         %D%/setjmp.S \
diff --git a/newlib/libc/machine/riscv/mempcpy-asm.S b/newlib/libc/machine/riscv/mempcpy-asm.S
new file mode 100644
index 000000000..30916f30b
--- /dev/null
+++ b/newlib/libc/machine/riscv/mempcpy-asm.S
@@ -0,0 +1,57 @@ 
+#if defined(__riscv_vector) && !defined(__OPTIMIZE_SIZE__) && !defined(PREFER_SIZE_OVER_SPEED)
+.text
+.option push
+.option arch, +zve32x
+.global mempcpy
+.type mempcpy, @function
+mempcpy:
+#if __riscv_landing_pad
+  lpad 0
+#endif
+  mv     t0, a0                   /* t0 = running dst */
+  mv     t1, a1                   /* t1 = running src */
+  beqz   a2, .Ldone               /* if n == 0, return */
+
+  /* Align dst to SZREG: skip when __riscv_misaligned_fast, else align */
+#ifndef __riscv_misaligned_fast
+  /* process small data directly with vectors, no alignment optimization */
+  li     t3, 32
+  bltu   a2, t3, .Lbulk_copy
+#if __riscv_xlen == 64
+  andi   t2, t0, 7                /* t2 = dst & 7 */
+  beqz   t2, .Lbulk_copy       /* already aligned to 8 bytes */
+  li     t4, 8
+  sub    t2, t4, t2               /* pad = 8 - (dst & 7) */
+#else
+  andi   t2, t0, 3                /* t2 = dst & 3 */
+  beqz   t2, .Lbulk_copy       /* already aligned to 4 bytes */
+  li     t4, 4
+  sub    t2, t4, t2               /* pad = 4 - (dst & 3) */
+#endif
+  /* copy prologue using vectors */
+  vsetvli t3, t2, e8, m8, ta, ma
+  vle8.v v0, (t1)
+  vse8.v v0, (t0)
+  add    t0, t0, t3
+  add    t1, t1, t3
+  sub    a2, a2, t3
+  beqz   a2, .Ldone
+#endif
+
+.Lbulk_copy:
+  vsetvli t2, a2, e8, m8, ta, ma
+  vle8.v v0, (t1)
+  vse8.v v0, (t0)
+  add    t0, t0, t2
+  add    t1, t1, t2
+  sub    a2, a2, t2
+  bnez   a2, .Lbulk_copy
+  /* fallthrough */
+
+.Ldone:
+  mv a0, t0                   /* return dst + n */
+  ret
+
+  .size mempcpy, .-mempcpy
+  .option pop
+#endif
diff --git a/newlib/libc/machine/riscv/mempcpy.c b/newlib/libc/machine/riscv/mempcpy.c
new file mode 100644
index 000000000..79b701d94
--- /dev/null
+++ b/newlib/libc/machine/riscv/mempcpy.c
@@ -0,0 +1,5 @@ 
+#if defined(__OPTIMIZE_SIZE__) || defined(PREFER_SIZE_OVER_SPEED) || !defined(__riscv_vector)
+# include "../../string/mempcpy.c"
+#else
+/* mempcpy defined in mempcpy-asm.S */
+#endif