@@ -11,6 +11,8 @@ libc_a_SOURCES += \
%D%/memcpy.c \
%D%/memmove-asm.S \
%D%/memmove.c \
+ %D%/mempcpy-asm.S \
+ %D%/mempcpy.c \
%D%/memrchr.c \
%D%/memset.S \
%D%/setjmp.S \
new file mode 100644
@@ -0,0 +1,57 @@
+#if defined(__riscv_vector) && !defined(__OPTIMIZE_SIZE__) && !defined(PREFER_SIZE_OVER_SPEED)
+.text
+.option push
+.option arch, +zve32x
+.global mempcpy
+.type mempcpy, @function
+mempcpy:
+#if __riscv_landing_pad
+ lpad 0
+#endif
+ mv t0, a0 /* t0 = running dst */
+ mv t1, a1 /* t1 = running src */
+ beqz a2, .Ldone /* if n == 0, return */
+
+ /* Align dst to SZREG: skip when __riscv_misaligned_fast, else align */
+#ifndef __riscv_misaligned_fast
+ /* process small data directly with vectors, no alignment optimization */
+ li t3, 32
+ bltu a2, t3, .Lbulk_copy
+#if __riscv_xlen == 64
+ andi t2, t0, 7 /* t2 = dst & 7 */
+ beqz t2, .Lbulk_copy /* already aligned to 8 bytes */
+ li t4, 8
+ sub t2, t4, t2 /* pad = 8 - (dst & 7) */
+#else
+ andi t2, t0, 3 /* t2 = dst & 3 */
+ beqz t2, .Lbulk_copy /* already aligned to 4 bytes */
+ li t4, 4
+ sub t2, t4, t2 /* pad = 4 - (dst & 3) */
+#endif
+ /* copy prologue using vectors */
+ vsetvli t3, t2, e8, m8, ta, ma
+ vle8.v v0, (t1)
+ vse8.v v0, (t0)
+ add t0, t0, t3
+ add t1, t1, t3
+ sub a2, a2, t3
+ beqz a2, .Ldone
+#endif
+
+.Lbulk_copy:
+ vsetvli t2, a2, e8, m8, ta, ma
+ vle8.v v0, (t1)
+ vse8.v v0, (t0)
+ add t0, t0, t2
+ add t1, t1, t2
+ sub a2, a2, t2
+ bnez a2, .Lbulk_copy
+ /* fallthrough */
+
+.Ldone:
+ mv a0, t0 /* return dst + n */
+ ret
+
+ .size mempcpy, .-mempcpy
+ .option pop
+#endif
new file mode 100644
@@ -0,0 +1,5 @@
+#if defined(__OPTIMIZE_SIZE__) || defined(PREFER_SIZE_OVER_SPEED) || !defined(__riscv_vector)
+# include "../../string/mempcpy.c"
+#else
+/* mempcpy defined in mempcpy-asm.S */
+#endif