@@ -20,6 +20,9 @@ sysdep_routines += \
strcpy-unaligned \
strcpy-lsx \
strcpy-lasx \
+ stpcpy-aligned \
+ stpcpy-lsx \
+ stpcpy-lasx \
memcpy-aligned \
memcpy-unaligned \
memmove-unaligned \
@@ -85,6 +85,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_aligned)
)
+ IFUNC_IMPL (i, name, stpcpy,
+#if !defined __loongarch_soft_float
+ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LASX, __stpcpy_lasx)
+ IFUNC_IMPL_ADD (array, i, stpcpy, SUPPORT_LSX, __stpcpy_lsx)
+#endif
+ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_aligned)
+ )
+
IFUNC_IMPL (i, name, memcpy,
#if !defined __loongarch_soft_float
IFUNC_IMPL_ADD (array, i, memcpy, SUPPORT_LASX, __memcpy_lasx)
new file mode 100644
@@ -0,0 +1,40 @@
+/* Common definition for stpcpy ifunc selections.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <ldsodefs.h>
+#include <ifunc-init.h>
+
+#if !defined __loongarch_soft_float
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lasx) attribute_hidden;
+extern __typeof (REDIRECT_NAME) OPTIMIZE (lsx) attribute_hidden;
+#endif
+extern __typeof (REDIRECT_NAME) OPTIMIZE (aligned) attribute_hidden;
+
+static inline void *
+IFUNC_SELECTOR (void)
+{
+#if !defined __loongarch_soft_float
+ if (SUPPORT_LASX)
+ return OPTIMIZE (lasx);
+ else if (SUPPORT_LSX)
+ return OPTIMIZE (lsx);
+ else
+#endif
+ return OPTIMIZE (aligned);
+}
new file mode 100644
@@ -0,0 +1,191 @@
+/* Optimized stpcpy aligned implementation using basic LoongArch instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc)
+# define STPCPY_NAME __stpcpy_aligned
+#else
+# define STPCPY_NAME __stpcpy
+#endif
+
+LEAF(STPCPY_NAME, 6)
+ andi a3, a0, 0x7
+ beqz a3, L(dest_align)
+ sub.d a5, a1, a3
+ addi.d a5, a5, 8
+
+L(make_dest_align):
+ ld.b t0, a1, 0
+ addi.d a1, a1, 1
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+
+ beqz t0, L(al_out)
+ bne a1, a5, L(make_dest_align)
+
+L(dest_align):
+ andi a4, a1, 7
+ bstrins.d a1, zero, 2, 0
+
+ lu12i.w t5, 0x1010
+ ld.d t0, a1, 0
+ ori t5, t5, 0x101
+ bstrins.d t5, t5, 63, 32
+
+ slli.d t6, t5, 0x7
+ bnez a4, L(unalign)
+ sub.d t1, t0, t5
+ andn t2, t6, t0
+
+ and t3, t1, t2
+ bnez t3, L(al_end)
+
+L(al_loop):
+ st.d t0, a0, 0
+ ld.d t0, a1, 8
+
+ addi.d a1, a1, 8
+ addi.d a0, a0, 8
+ sub.d t1, t0, t5
+ andn t2, t6, t0
+
+ and t3, t1, t2
+ beqz t3, L(al_loop)
+
+L(al_end):
+ ctz.d t1, t3
+ srli.d t1, t1, 3
+ addi.d t1, t1, 1
+
+ andi a3, t1, 8
+ andi a4, t1, 4
+ andi a5, t1, 2
+ andi a6, t1, 1
+
+L(al_end_8):
+ beqz a3, L(al_end_4)
+ st.d t0, a0, 0
+ addi.d a0, a0, 7
+ jr ra
+L(al_end_4):
+ beqz a4, L(al_end_2)
+ st.w t0, a0, 0
+ addi.d a0, a0, 4
+ srli.d t0, t0, 32
+L(al_end_2):
+ beqz a5, L(al_end_1)
+ st.h t0, a0, 0
+ addi.d a0, a0, 2
+ srli.d t0, t0, 16
+L(al_end_1):
+ beqz a6, L(al_out)
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+L(al_out):
+ addi.d a0, a0, -1
+ jr ra
+
+L(unalign):
+ slli.d a5, a4, 3
+ li.d t1, -1
+ sub.d a6, zero, a5
+
+ srl.d a7, t0, a5
+ sll.d t7, t1, a6
+
+ or t0, a7, t7
+ sub.d t1, t0, t5
+ andn t2, t6, t0
+ and t3, t1, t2
+
+ bnez t3, L(un_end)
+
+ ld.d t4, a1, 8
+ addi.d a1, a1, 8
+
+ sub.d t1, t4, t5
+ andn t2, t6, t4
+ sll.d t0, t4, a6
+ and t3, t1, t2
+
+ or t0, t0, a7
+ bnez t3, L(un_end_with_remaining)
+
+L(un_loop):
+ srl.d a7, t4, a5
+
+ ld.d t4, a1, 8
+ addi.d a1, a1, 8
+
+ st.d t0, a0, 0
+ addi.d a0, a0, 8
+
+ sub.d t1, t4, t5
+ andn t2, t6, t4
+ sll.d t0, t4, a6
+ and t3, t1, t2
+
+ or t0, t0, a7
+ beqz t3, L(un_loop)
+
+L(un_end_with_remaining):
+ ctz.d t1, t3
+ srli.d t1, t1, 3
+ addi.d t1, t1, 1
+ sub.d t1, t1, a4
+
+ blt t1, zero, L(un_end_less_8)
+ st.d t0, a0, 0
+ addi.d a0, a0, 8
+ beqz t1, L(un_out)
+ srl.d t0, t4, a5
+ b L(un_end_less_8)
+
+L(un_end):
+ ctz.d t1, t3
+ srli.d t1, t1, 3
+ addi.d t1, t1, 1
+
+L(un_end_less_8):
+ andi a4, t1, 4
+ andi a5, t1, 2
+ andi a6, t1, 1
+L(un_end_4):
+ beqz a4, L(un_end_2)
+ st.w t0, a0, 0
+ addi.d a0, a0, 4
+ srli.d t0, t0, 32
+L(un_end_2):
+ beqz a5, L(un_end_1)
+ st.h t0, a0, 0
+ addi.d a0, a0, 2
+ srli.d t0, t0, 16
+L(un_end_1):
+ beqz a6, L(un_out)
+ st.b t0, a0, 0
+ addi.d a0, a0, 1
+L(un_out):
+ addi.d a0, a0, -1
+ jr ra
+
+END(STPCPY_NAME)
+
+libc_hidden_builtin_def (STPCPY_NAME)
new file mode 100644
@@ -0,0 +1,208 @@
+/* Optimized stpcpy implementation using LoongArch LASX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STPCPY __stpcpy_lasx
+
+LEAF(STPCPY, 6)
+ ori t8, zero, 0xfe0
+ andi t0, a1, 0xfff
+ li.d t7, -1
+ move a2, a0
+
+ bltu t8, t0, L(page_cross_start)
+L(start_entry):
+ xvld xr0, a1, 0
+ li.d t0, 32
+ andi t1, a2, 0x1f
+
+ xvsetanyeqz.b fcc0, xr0
+ sub.d t0, t0, t1
+ bcnez fcc0, L(end)
+ add.d a1, a1, t0
+
+ xvst xr0, a2, 0
+ andi a3, a1, 0x1f
+ add.d a2, a2, t0
+ bnez a3, L(unaligned)
+
+
+ xvld xr0, a1, 0
+ xvsetanyeqz.b fcc0, xr0
+ bcnez fcc0, L(al_end)
+L(al_loop):
+ xvst xr0, a2, 0
+
+ xvld xr0, a1, 32
+ addi.d a2, a2, 32
+ addi.d a1, a1, 32
+ xvsetanyeqz.b fcc0, xr0
+
+ bceqz fcc0, L(al_loop)
+L(al_end):
+ xvmsknz.b xr0, xr0
+ xvpickve.w xr1, xr0, 4
+ vilvl.h vr0, vr1, vr0
+
+ movfr2gr.s t0, fa0
+ cto.w t0, t0
+ add.d a1, a1, t0
+ xvld xr0, a1, -31
+
+
+ add.d a0, a2, t0
+ xvst xr0, a0, -31
+ jr ra
+ nop
+
+L(page_cross_start):
+ move a4, a1
+ bstrins.d a4, zero, 4, 0
+ xvld xr0, a4, 0
+ xvmsknz.b xr0, xr0
+
+ xvpickve.w xr1, xr0, 4
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+ sra.w t0, t0, a1
+
+ beq t0, t7, L(start_entry)
+ b L(tail)
+L(unaligned):
+ andi t0, a1, 0xfff
+ bltu t8, t0, L(un_page_cross)
+
+
+L(un_start_entry):
+ xvld xr0, a1, 0
+ xvsetanyeqz.b fcc0, xr0
+ bcnez fcc0, L(un_end)
+ addi.d a1, a1, 32
+
+L(un_loop):
+ xvst xr0, a2, 0
+ andi t0, a1, 0xfff
+ addi.d a2, a2, 32
+ bltu t8, t0, L(page_cross_loop)
+
+L(un_loop_entry):
+ xvld xr0, a1, 0
+ addi.d a1, a1, 32
+ xvsetanyeqz.b fcc0, xr0
+ bceqz fcc0, L(un_loop)
+
+ addi.d a1, a1, -32
+L(un_end):
+ xvmsknz.b xr0, xr0
+ xvpickve.w xr1, xr0, 4
+ vilvl.h vr0, vr1, vr0
+
+
+ movfr2gr.s t0, fa0
+L(un_tail):
+ cto.w t0, t0
+ add.d a1, a1, t0
+ xvld xr0, a1, -31
+
+ add.d a0, a2, t0
+ xvst xr0, a0, -31
+ jr ra
+L(un_page_cross):
+ sub.d a4, a1, a3
+
+ xvld xr0, a4, 0
+ xvmsknz.b xr0, xr0
+ xvpickve.w xr1, xr0, 4
+ vilvl.h vr0, vr1, vr0
+
+ movfr2gr.s t0, fa0
+ sra.w t0, t0, a1
+ beq t0, t7, L(un_start_entry)
+ b L(un_tail)
+
+
+L(page_cross_loop):
+ sub.d a4, a1, a3
+ xvld xr0, a4, 0
+ xvmsknz.b xr0, xr0
+ xvpickve.w xr1, xr0, 4
+
+ vilvl.h vr0, vr1, vr0
+ movfr2gr.s t0, fa0
+ sra.w t0, t0, a1
+ beq t0, t7, L(un_loop_entry)
+
+ b L(un_tail)
+L(end):
+ xvmsknz.b xr0, xr0
+ xvpickve.w xr1, xr0, 4
+ vilvl.h vr0, vr1, vr0
+
+ movfr2gr.s t0, fa0
+L(tail):
+ cto.w t0, t0
+ add.d a0, a2, t0
+ add.d a5, a1, t0
+
+L(less_32):
+ srli.d t1, t0, 4
+ beqz t1, L(less_16)
+ vld vr0, a1, 0
+ vld vr1, a5, -15
+
+ vst vr0, a2, 0
+ vst vr1, a0, -15
+ jr ra
+L(less_16):
+ srli.d t1, t0, 3
+
+ beqz t1, L(less_8)
+ ld.d t2, a1, 0
+ ld.d t3, a5, -7
+ st.d t2, a2, 0
+
+ st.d t3, a0, -7
+ jr ra
+L(less_8):
+ li.d t1, 3
+ bltu t0, t1, L(less_4)
+
+ ld.w t2, a1, 0
+ ld.w t3, a5, -3
+ st.w t2, a2, 0
+ st.w t3, a0, -3
+
+ jr ra
+L(less_4):
+ srli.d t1, t0, 2
+ bgeu t1, t0, L(zero_byte)
+ ld.h t2, a1, 0
+
+ st.h t2, a2, 0
+L(zero_byte):
+ st.b zero, a0, 0
+ jr ra
+END(STPCPY)
+
+libc_hidden_builtin_def (STPCPY)
+#endif
new file mode 100644
@@ -0,0 +1,206 @@
+/* Optimized stpcpy implementation using LoongArch LSX instructions.
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+#include <sys/regdef.h>
+#include <sys/asm.h>
+
+#if IS_IN (libc) && !defined __loongarch_soft_float
+
+# define STPCPY __stpcpy_lsx
+
+LEAF(STPCPY, 6)
+ pcalau12i t0, %pc_hi20(L(INDEX))
+ andi a4, a1, 0xf
+ vld vr1, t0, %pc_lo12(L(INDEX))
+ beqz a4, L(load_start)
+
+ xor t0, a1, a4
+ vld vr0, t0, 0
+ vreplgr2vr.b vr2, a4
+ vadd.b vr2, vr2, vr1
+
+ vshuf.b vr0, vr2, vr0, vr2
+ vsetanyeqz.b fcc0, vr0
+ bcnez fcc0, L(end)
+L(load_start):
+ vld vr0, a1, 0
+
+ li.d t1, 16
+ andi a3, a0, 0xf
+ vsetanyeqz.b fcc0, vr0
+ sub.d t0, t1, a3
+
+
+ bcnez fcc0, L(end)
+ add.d a1, a1, t0
+ vst vr0, a0, 0
+ add.d a0, a0, t0
+
+ bne a3, a4, L(unaligned)
+ vld vr0, a1, 0
+ vsetanyeqz.b fcc0, vr0
+ bcnez fcc0, L(al_end)
+
+L(al_loop):
+ vst vr0, a0, 0
+ vld vr0, a1, 16
+ addi.d a0, a0, 16
+ addi.d a1, a1, 16
+
+ vsetanyeqz.b fcc0, vr0
+ bceqz fcc0, L(al_loop)
+L(al_end):
+ vmsknz.b vr1, vr0
+ movfr2gr.s t0, fa1
+
+
+ cto.w t0, t0
+ add.d a1, a1, t0
+ vld vr0, a1, -15
+ add.d a0, a0, t0
+
+ vst vr0, a0, -15
+ jr ra
+ nop
+ nop
+
+L(end):
+ vseqi.b vr1, vr0, 0
+ vfrstpi.b vr1, vr1, 0
+ vpickve2gr.bu t0, vr1, 0
+ addi.d t0, t0, 1
+
+L(end_16):
+ andi t1, t0, 16
+ beqz t1, L(end_8)
+ vst vr0, a0, 0
+ addi.d a0, a0, 15
+
+
+ jr ra
+L(end_8):
+ andi t2, t0, 8
+ andi t3, t0, 4
+ andi t4, t0, 2
+
+ andi t5, t0, 1
+ beqz t2, L(end_4)
+ vstelm.d vr0, a0, 0, 0
+ addi.d a0, a0, 8
+
+ vbsrl.v vr0, vr0, 8
+L(end_4):
+ beqz t3, L(end_2)
+ vstelm.w vr0, a0, 0, 0
+ addi.d a0, a0, 4
+
+ vbsrl.v vr0, vr0, 4
+L(end_2):
+ beqz t4, L(end_1)
+ vstelm.h vr0, a0, 0, 0
+ addi.d a0, a0, 2
+
+
+ vbsrl.v vr0, vr0, 2
+L(end_1):
+ beqz t5, L(out)
+ vstelm.b vr0, a0, 0, 0
+ addi.d a0, a0, 1
+
+L(out):
+ addi.d a0, a0, -1
+ jr ra
+ nop
+ nop
+
+L(unaligned):
+ andi a3, a1, 0xf
+ bstrins.d a1, zero, 3, 0
+ vld vr2, a1, 0
+ vreplgr2vr.b vr3, a3
+
+ vslt.b vr4, vr1, vr3
+ vor.v vr0, vr2, vr4
+ vsetanyeqz.b fcc0, vr0
+ bcnez fcc0, L(un_first_end)
+
+
+ vld vr0, a1, 16
+ vadd.b vr3, vr3, vr1
+ vshuf.b vr4, vr0, vr2, vr3
+ vsetanyeqz.b fcc0, vr0
+
+ bcnez fcc0, L(un_end)
+ vor.v vr2, vr0, vr0
+ addi.d a1, a1, 16
+L(un_loop):
+ vld vr0, a1, 16
+
+ vst vr4, a0, 0
+ addi.d a0, a0, 16
+ vshuf.b vr4, vr0, vr2, vr3
+ vsetanyeqz.b fcc0, vr0
+
+ bcnez fcc0, L(un_end)
+ vld vr2, a1, 32
+ vst vr4, a0, 0
+ addi.d a1, a1, 32
+
+
+ addi.d a0, a0, 16
+ vshuf.b vr4, vr2, vr0, vr3
+ vsetanyeqz.b fcc0, vr2
+ bceqz fcc0, L(un_loop)
+
+ vor.v vr0, vr2, vr2
+ addi.d a1, a1, -16
+L(un_end):
+ vsetanyeqz.b fcc0, vr4
+ bcnez fcc0, 1f
+
+ vst vr4, a0, 0
+1:
+ vmsknz.b vr1, vr0
+ movfr2gr.s t0, fa1
+ cto.w t0, t0
+
+ add.d a1, a1, t0
+ vld vr0, a1, 1
+ add.d a0, a0, t0
+ sub.d a0, a0, a3
+
+
+ vst vr0, a0, 1
+ addi.d a0, a0, 16
+ jr ra
+L(un_first_end):
+ addi.d a0, a0, -16
+
+ addi.d a1, a1, -16
+ b 1b
+END(STPCPY)
+
+ .section .rodata.cst16,"M",@progbits,16
+ .align 4
+L(INDEX):
+ .dword 0x0706050403020100
+ .dword 0x0f0e0d0c0b0a0908
+
+libc_hidden_builtin_def (STPCPY)
+#endif
new file mode 100644
@@ -0,0 +1,42 @@
+/* Multiple versions of stpcpy.
+ All versions must be listed in ifunc-impl-list.c.
+ Copyright (C) 2017-2023 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* Define multiple versions only for the definition in libc. */
+#if IS_IN (libc)
+# define stpcpy __redirect_stpcpy
+# define __stpcpy __redirect___stpcpy
+# define NO_MEMPCPY_STPCPY_REDIRECT
+# define __NO_STRING_INLINES
+# include <string.h>
+# undef stpcpy
+# undef __stpcpy
+
+# define SYMBOL_NAME stpcpy
+# include "ifunc-stpcpy.h"
+
+libc_ifunc_redirected (__redirect_stpcpy, __stpcpy, IFUNC_SELECTOR ());
+
+weak_alias (__stpcpy, stpcpy)
+# ifdef SHARED
+__hidden_ver1 (__stpcpy, __GI___stpcpy, __redirect___stpcpy)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy);
+__hidden_ver1 (stpcpy, __GI_stpcpy, __redirect_stpcpy)
+ __attribute__ ((visibility ("hidden"))) __attribute_copy__ (stpcpy);
+# endif
+#endif