[v7,2/2] RISC-V: add riscv vector support for memcpy
Checks
| Context |
Check |
Description |
| redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
| linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
| redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
| linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
| linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Test passed
|
| linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Test passed
|
| redhat-pt-bot/TryBot-still_applies |
warning
|
Patch no longer applies to master
|
Commit Message
From: daichengrong <daichengrong@iscas.ac.cn>
Change in v7:
update dl_hwcap support using kernel header
update rvv memcpy compile with option,+v
delete optimization for small lengths
Change in v6:
Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
Changes in v5:
check ifunc-impl-list memcpy vector support with by dl_hwcap
Changes in v4:
update rvv memcpy support by compiler
check whether rvv enabled by dl_hwcap
Changes in v2:
delete size-0 branch
---
sysdeps/riscv/multiarch/memcpy_vector.S | 37 +++++++++++++++++++
.../unix/sysv/linux/riscv/multiarch/Makefile | 6 +++
.../linux/riscv/multiarch/ifunc-impl-list.c | 14 +++++++
.../unix/sysv/linux/riscv/multiarch/memcpy.c | 8 ++++
4 files changed, 65 insertions(+)
create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
Comments
Hi,
On Fri, Feb 21, 2025 at 8:57 PM <daichengrong@iscas.ac.cn> wrote:
> From: daichengrong <daichengrong@iscas.ac.cn>
>
> Change in v7:
> update dl_hwcap support using kernel header
> update rvv memcpy compile with option,+v
> delete optimization for small lengths
I tested this again on our upcoming Tenstorrent Ascalon CPU and (as
before) it's a 3x speedup on long lengths, a clear improvement for
everything 16 bytes onwards and no significant regressions on lengths
less than 16 bytes.
Tested-by: Anton Blanchard <antonb@tenstorrent.com>
Anton
> Change in v6:
> Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
>
> Changes in v5:
> check ifunc-impl-list memcpy vector support with by dl_hwcap
>
> Changes in v4:
> update rvv memcpy support by compiler
> check whether rvv enabled by dl_hwcap
>
> Changes in v2:
> delete size-0 branch
> ---
> sysdeps/riscv/multiarch/memcpy_vector.S | 37 +++++++++++++++++++
> .../unix/sysv/linux/riscv/multiarch/Makefile | 6 +++
> .../linux/riscv/multiarch/ifunc-impl-list.c | 14 +++++++
> .../unix/sysv/linux/riscv/multiarch/memcpy.c | 8 ++++
> 4 files changed, 65 insertions(+)
> create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
>
> diff --git a/sysdeps/riscv/multiarch/memcpy_vector.S b/sysdeps/riscv/multiarch/memcpy_vector.S
> new file mode 100644
> index 0000000000..eaf28aaf6d
> --- /dev/null
> +++ b/sysdeps/riscv/multiarch/memcpy_vector.S
> @@ -0,0 +1,37 @@
> +/* memcpy for RISC-V Vector.
> + Copyright (C) 2024-2025 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +ENTRY (__memcpy_vector)
> +.option push
> +.option arch, +v
> + mv a6, a0
> +L(loop):
> + vsetvli a3,a2,e8,m8,ta,ma
> + vle8.v v8,(a1)
> + vse8.v v8,(a6)
> + add a1,a1,a3
> + sub a2,a2,a3
> + add a6,a6,a3
> + bnez a2,L(loop)
> + ret
> +.option pop
> +END (__memcpy_vector)
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> index fcef5659d4..478338006b 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> @@ -5,5 +5,11 @@ sysdep_routines += \
> memcpy_noalignment \
> # sysdep_routines
>
> +ifeq ($(have-gcc-riscv-rvv),yes)
> +sysdep_routines += \
> + memcpy_vector \
> + # rvv sysdep_routines
> +endif
> +
> CFLAGS-memcpy_noalignment.c += -mno-strict-align
> endif
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> index 1c1deca8f6..26f3376d23 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> @@ -19,6 +19,8 @@
> #include <ifunc-impl-list.h>
> #include <string.h>
> #include <sys/hwprobe.h>
> +#include <ldsodefs.h>
> +#include <asm/hwcap.h>
>
> size_t
> __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> @@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> size_t i = max;
>
> bool fast_unaligned = false;
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> + bool rvv_ext = false;
> +#endif
>
> struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
> if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
> @@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> == RISCV_HWPROBE_MISALIGNED_FAST)
> fast_unaligned = true;
>
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> + if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
> + rvv_ext = true;
> +#endif
> +
> IFUNC_IMPL (i, name, memcpy,
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> + IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
> + __memcpy_vector)
> +#endif
> IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
> __memcpy_noalignment)
> IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
> index 8544f5402a..4bedd21866 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
> @@ -27,16 +27,24 @@
> # include <ifunc-init.h>
> # include <riscv-ifunc.h>
> # include <sys/hwprobe.h>
> +# include <asm/hwcap.h>
>
> extern __typeof (__redirect_memcpy) __libc_memcpy;
>
> extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
> extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
> +extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
>
> static inline __typeof (__redirect_memcpy) *
> select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
> {
> unsigned long long int v;
> +
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> + if (dl_hwcap & COMPAT_HWCAP_ISA_V)
> + return __memcpy_vector;
> +#endif
> +
> if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
> && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
> return __memcpy_noalignment;
> --
> 2.25.1
>
>
> On Fri, Feb 21, 2025 at 8:57 PM <daichengrong@iscas.ac.cn> wrote:
>> From: daichengrong <daichengrong@iscas.ac.cn>
>>
>> Change in v7:
>> update dl_hwcap support using kernel header
>> update rvv memcpy compile with option,+v
>> delete optimization for small lengths
> I tested this again on our upcoming Tenstorrent Ascalon CPU and (as
> before) it's a 3x speedup on long lengths, a clear improvement for
> everything 16 bytes onwards and no significant regressions on lengths
> less than 16 bytes.
>
> Tested-by: Anton Blanchard <antonb@tenstorrent.com>
>
> Anton
On banana pi-F3(256bit RVV),it's a 3x speedup on long lengths.
And on K230(128bit RVV),it gets a 2x speedup on long lengths
Tested-by: daichengrogn<daichengrong@iscas.ac.cn>
>> Change in v6:
>> Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
>>
>> Changes in v5:
>> check ifunc-impl-list memcpy vector support with by dl_hwcap
>>
>> Changes in v4:
>> update rvv memcpy support by compiler
>> check whether rvv enabled by dl_hwcap
>>
>> Changes in v2:
>> delete size-0 branch
>> ---
>> sysdeps/riscv/multiarch/memcpy_vector.S | 37 +++++++++++++++++++
>> .../unix/sysv/linux/riscv/multiarch/Makefile | 6 +++
>> .../linux/riscv/multiarch/ifunc-impl-list.c | 14 +++++++
>> .../unix/sysv/linux/riscv/multiarch/memcpy.c | 8 ++++
>> 4 files changed, 65 insertions(+)
>> create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
>>
>> diff --git a/sysdeps/riscv/multiarch/memcpy_vector.S b/sysdeps/riscv/multiarch/memcpy_vector.S
>> new file mode 100644
>> index 0000000000..eaf28aaf6d
>> --- /dev/null
>> +++ b/sysdeps/riscv/multiarch/memcpy_vector.S
>> @@ -0,0 +1,37 @@
>> +/* memcpy for RISC-V Vector.
>> + Copyright (C) 2024-2025 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +
>> +#include <sysdep.h>
>> +#include <sys/asm.h>
>> +
>> +ENTRY (__memcpy_vector)
>> +.option push
>> +.option arch, +v
>> + mv a6, a0
>> +L(loop):
>> + vsetvli a3,a2,e8,m8,ta,ma
>> + vle8.v v8,(a1)
>> + vse8.v v8,(a6)
>> + add a1,a1,a3
>> + sub a2,a2,a3
>> + add a6,a6,a3
>> + bnez a2,L(loop)
>> + ret
>> +.option pop
>> +END (__memcpy_vector)
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> index fcef5659d4..478338006b 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> @@ -5,5 +5,11 @@ sysdep_routines += \
>> memcpy_noalignment \
>> # sysdep_routines
>>
>> +ifeq ($(have-gcc-riscv-rvv),yes)
>> +sysdep_routines += \
>> + memcpy_vector \
>> + # rvv sysdep_routines
>> +endif
>> +
>> CFLAGS-memcpy_noalignment.c += -mno-strict-align
>> endif
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> index 1c1deca8f6..26f3376d23 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> @@ -19,6 +19,8 @@
>> #include <ifunc-impl-list.h>
>> #include <string.h>
>> #include <sys/hwprobe.h>
>> +#include <ldsodefs.h>
>> +#include <asm/hwcap.h>
>>
>> size_t
>> __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> @@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> size_t i = max;
>>
>> bool fast_unaligned = false;
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + bool rvv_ext = false;
>> +#endif
>>
>> struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>> if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
>> @@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> == RISCV_HWPROBE_MISALIGNED_FAST)
>> fast_unaligned = true;
>>
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
>> + rvv_ext = true;
>> +#endif
>> +
>> IFUNC_IMPL (i, name, memcpy,
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
>> + __memcpy_vector)
>> +#endif
>> IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>> __memcpy_noalignment)
>> IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> index 8544f5402a..4bedd21866 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> @@ -27,16 +27,24 @@
>> # include <ifunc-init.h>
>> # include <riscv-ifunc.h>
>> # include <sys/hwprobe.h>
>> +# include <asm/hwcap.h>
>>
>> extern __typeof (__redirect_memcpy) __libc_memcpy;
>>
>> extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
>> extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
>> +extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
>>
>> static inline __typeof (__redirect_memcpy) *
>> select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>> {
>> unsigned long long int v;
>> +
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + if (dl_hwcap & COMPAT_HWCAP_ISA_V)
>> + return __memcpy_vector;
>> +#endif
>> +
>> if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
>> && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
>> return __memcpy_noalignment;
>> --
>> 2.25.1
>>
>>
On 2/22/25 01:11, Anton Blanchard wrote:
> Hi,
>
> On Fri, Feb 21, 2025 at 8:57 PM <daichengrong@iscas.ac.cn> wrote:
>> From: daichengrong <daichengrong@iscas.ac.cn>
>>
>> Change in v7:
>> update dl_hwcap support using kernel header
>> update rvv memcpy compile with option,+v
>> delete optimization for small lengths
> I tested this again on our upcoming Tenstorrent Ascalon CPU and (as
> before) it's a 3x speedup on long lengths, a clear improvement for
> everything 16 bytes onwards and no significant regressions on lengths
> less than 16 bytes.
>
> Tested-by: Anton Blanchard <antonb@tenstorrent.com>
Hi Palmer,
Is this patch series blocked by something ?
Thx,
-Vineet
>
> Anton
>
>> Change in v6:
>> Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
>>
>> Changes in v5:
>> check ifunc-impl-list memcpy vector support with by dl_hwcap
>>
>> Changes in v4:
>> update rvv memcpy support by compiler
>> check whether rvv enabled by dl_hwcap
>>
>> Changes in v2:
>> delete size-0 branch
>> ---
>> sysdeps/riscv/multiarch/memcpy_vector.S | 37 +++++++++++++++++++
>> .../unix/sysv/linux/riscv/multiarch/Makefile | 6 +++
>> .../linux/riscv/multiarch/ifunc-impl-list.c | 14 +++++++
>> .../unix/sysv/linux/riscv/multiarch/memcpy.c | 8 ++++
>> 4 files changed, 65 insertions(+)
>> create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
>>
>> diff --git a/sysdeps/riscv/multiarch/memcpy_vector.S b/sysdeps/riscv/multiarch/memcpy_vector.S
>> new file mode 100644
>> index 0000000000..eaf28aaf6d
>> --- /dev/null
>> +++ b/sysdeps/riscv/multiarch/memcpy_vector.S
>> @@ -0,0 +1,37 @@
>> +/* memcpy for RISC-V Vector.
>> + Copyright (C) 2024-2025 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +
>> +#include <sysdep.h>
>> +#include <sys/asm.h>
>> +
>> +ENTRY (__memcpy_vector)
>> +.option push
>> +.option arch, +v
>> + mv a6, a0
>> +L(loop):
>> + vsetvli a3,a2,e8,m8,ta,ma
>> + vle8.v v8,(a1)
>> + vse8.v v8,(a6)
>> + add a1,a1,a3
>> + sub a2,a2,a3
>> + add a6,a6,a3
>> + bnez a2,L(loop)
>> + ret
>> +.option pop
>> +END (__memcpy_vector)
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> index fcef5659d4..478338006b 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> @@ -5,5 +5,11 @@ sysdep_routines += \
>> memcpy_noalignment \
>> # sysdep_routines
>>
>> +ifeq ($(have-gcc-riscv-rvv),yes)
>> +sysdep_routines += \
>> + memcpy_vector \
>> + # rvv sysdep_routines
>> +endif
>> +
>> CFLAGS-memcpy_noalignment.c += -mno-strict-align
>> endif
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> index 1c1deca8f6..26f3376d23 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> @@ -19,6 +19,8 @@
>> #include <ifunc-impl-list.h>
>> #include <string.h>
>> #include <sys/hwprobe.h>
>> +#include <ldsodefs.h>
>> +#include <asm/hwcap.h>
>>
>> size_t
>> __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> @@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> size_t i = max;
>>
>> bool fast_unaligned = false;
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + bool rvv_ext = false;
>> +#endif
>>
>> struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>> if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
>> @@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> == RISCV_HWPROBE_MISALIGNED_FAST)
>> fast_unaligned = true;
>>
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
>> + rvv_ext = true;
>> +#endif
>> +
>> IFUNC_IMPL (i, name, memcpy,
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
>> + __memcpy_vector)
>> +#endif
>> IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>> __memcpy_noalignment)
>> IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> index 8544f5402a..4bedd21866 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> @@ -27,16 +27,24 @@
>> # include <ifunc-init.h>
>> # include <riscv-ifunc.h>
>> # include <sys/hwprobe.h>
>> +# include <asm/hwcap.h>
>>
>> extern __typeof (__redirect_memcpy) __libc_memcpy;
>>
>> extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
>> extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
>> +extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
>>
>> static inline __typeof (__redirect_memcpy) *
>> select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>> {
>> unsigned long long int v;
>> +
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + if (dl_hwcap & COMPAT_HWCAP_ISA_V)
>> + return __memcpy_vector;
>> +#endif
>> +
>> if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
>> && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
>> return __memcpy_noalignment;
>> --
>> 2.25.1
>>
>>
On Fri, 15 Aug 2025 21:41:13 PDT (-0700), Vineet Gupta wrote:
>
>
> On 2/22/25 01:11, Anton Blanchard wrote:
>> Hi,
>>
>> On Fri, Feb 21, 2025 at 8:57 PM <daichengrong@iscas.ac.cn> wrote:
>>> From: daichengrong <daichengrong@iscas.ac.cn>
>>>
>>> Change in v7:
>>> update dl_hwcap support using kernel header
>>> update rvv memcpy compile with option,+v
>>> delete optimization for small lengths
>> I tested this again on our upcoming Tenstorrent Ascalon CPU and (as
>> before) it's a 3x speedup on long lengths, a clear improvement for
>> everything 16 bytes onwards and no significant regressions on lengths
>> less than 16 bytes.
>>
>> Tested-by: Anton Blanchard <antonb@tenstorrent.com>
>
> Hi Palmer,
>
> Is this patch series blocked by something ?
Jeff said he had a better version of this and was going to send it.
>
> Thx,
> -Vineet
>
>>
>> Anton
>>
>>> Change in v6:
>>> Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
>>>
>>> Changes in v5:
>>> check ifunc-impl-list memcpy vector support with by dl_hwcap
>>>
>>> Changes in v4:
>>> update rvv memcpy support by compiler
>>> check whether rvv enabled by dl_hwcap
>>>
>>> Changes in v2:
>>> delete size-0 branch
>>> ---
>>> sysdeps/riscv/multiarch/memcpy_vector.S | 37 +++++++++++++++++++
>>> .../unix/sysv/linux/riscv/multiarch/Makefile | 6 +++
>>> .../linux/riscv/multiarch/ifunc-impl-list.c | 14 +++++++
>>> .../unix/sysv/linux/riscv/multiarch/memcpy.c | 8 ++++
>>> 4 files changed, 65 insertions(+)
>>> create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
>>>
>>> diff --git a/sysdeps/riscv/multiarch/memcpy_vector.S b/sysdeps/riscv/multiarch/memcpy_vector.S
>>> new file mode 100644
>>> index 0000000000..eaf28aaf6d
>>> --- /dev/null
>>> +++ b/sysdeps/riscv/multiarch/memcpy_vector.S
>>> @@ -0,0 +1,37 @@
>>> +/* memcpy for RISC-V Vector.
>>> + Copyright (C) 2024-2025 Free Software Foundation, Inc.
>>> + This file is part of the GNU C Library.
>>> +
>>> + The GNU C Library is free software; you can redistribute it and/or
>>> + modify it under the terms of the GNU Lesser General Public
>>> + License as published by the Free Software Foundation; either
>>> + version 2.1 of the License, or (at your option) any later version.
>>> +
>>> + The GNU C Library is distributed in the hope that it will be useful,
>>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>>> + Lesser General Public License for more details.
>>> +
>>> + You should have received a copy of the GNU Lesser General Public
>>> + License along with the GNU C Library; if not, see
>>> + <https://www.gnu.org/licenses/>. */
>>> +
>>> +
>>> +#include <sysdep.h>
>>> +#include <sys/asm.h>
>>> +
>>> +ENTRY (__memcpy_vector)
>>> +.option push
>>> +.option arch, +v
>>> + mv a6, a0
>>> +L(loop):
>>> + vsetvli a3,a2,e8,m8,ta,ma
>>> + vle8.v v8,(a1)
>>> + vse8.v v8,(a6)
>>> + add a1,a1,a3
>>> + sub a2,a2,a3
>>> + add a6,a6,a3
>>> + bnez a2,L(loop)
>>> + ret
>>> +.option pop
>>> +END (__memcpy_vector)
>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>> index fcef5659d4..478338006b 100644
>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>>> @@ -5,5 +5,11 @@ sysdep_routines += \
>>> memcpy_noalignment \
>>> # sysdep_routines
>>>
>>> +ifeq ($(have-gcc-riscv-rvv),yes)
>>> +sysdep_routines += \
>>> + memcpy_vector \
>>> + # rvv sysdep_routines
>>> +endif
>>> +
>>> CFLAGS-memcpy_noalignment.c += -mno-strict-align
>>> endif
>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>> index 1c1deca8f6..26f3376d23 100644
>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>>> @@ -19,6 +19,8 @@
>>> #include <ifunc-impl-list.h>
>>> #include <string.h>
>>> #include <sys/hwprobe.h>
>>> +#include <ldsodefs.h>
>>> +#include <asm/hwcap.h>
>>>
>>> size_t
>>> __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>> @@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>> size_t i = max;
>>>
>>> bool fast_unaligned = false;
>>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>>> + bool rvv_ext = false;
>>> +#endif
>>>
>>> struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>>> if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
>>> @@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>> == RISCV_HWPROBE_MISALIGNED_FAST)
>>> fast_unaligned = true;
>>>
>>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>>> + if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
>>> + rvv_ext = true;
>>> +#endif
>>> +
>>> IFUNC_IMPL (i, name, memcpy,
>>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>>> + IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
>>> + __memcpy_vector)
>>> +#endif
>>> IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>>> __memcpy_noalignment)
>>> IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
>>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>>> index 8544f5402a..4bedd21866 100644
>>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>>> @@ -27,16 +27,24 @@
>>> # include <ifunc-init.h>
>>> # include <riscv-ifunc.h>
>>> # include <sys/hwprobe.h>
>>> +# include <asm/hwcap.h>
>>>
>>> extern __typeof (__redirect_memcpy) __libc_memcpy;
>>>
>>> extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
>>> extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
>>> +extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
>>>
>>> static inline __typeof (__redirect_memcpy) *
>>> select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>>> {
>>> unsigned long long int v;
>>> +
>>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>>> + if (dl_hwcap & COMPAT_HWCAP_ISA_V)
>>> + return __memcpy_vector;
>>> +#endif
>>> +
>>> if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
>>> && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
>>> return __memcpy_noalignment;
>>> --
>>> 2.25.1
>>>
>>>
new file mode 100644
@@ -0,0 +1,37 @@
+/* memcpy for RISC-V Vector.
+ Copyright (C) 2024-2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+ENTRY (__memcpy_vector)
+.option push
+.option arch, +v
+ mv a6, a0
+L(loop):
+ vsetvli a3,a2,e8,m8,ta,ma
+ vle8.v v8,(a1)
+ vse8.v v8,(a6)
+ add a1,a1,a3
+ sub a2,a2,a3
+ add a6,a6,a3
+ bnez a2,L(loop)
+ ret
+.option pop
+END (__memcpy_vector)
@@ -5,5 +5,11 @@ sysdep_routines += \
memcpy_noalignment \
# sysdep_routines
+ifeq ($(have-gcc-riscv-rvv),yes)
+sysdep_routines += \
+ memcpy_vector \
+ # rvv sysdep_routines
+endif
+
CFLAGS-memcpy_noalignment.c += -mno-strict-align
endif
@@ -19,6 +19,8 @@
#include <ifunc-impl-list.h>
#include <string.h>
#include <sys/hwprobe.h>
+#include <ldsodefs.h>
+#include <asm/hwcap.h>
size_t
__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
@@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t i = max;
bool fast_unaligned = false;
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
+ bool rvv_ext = false;
+#endif
struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
@@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
== RISCV_HWPROBE_MISALIGNED_FAST)
fast_unaligned = true;
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
+ if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
+ rvv_ext = true;
+#endif
+
IFUNC_IMPL (i, name, memcpy,
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
+ IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
+ __memcpy_vector)
+#endif
IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
__memcpy_noalignment)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
@@ -27,16 +27,24 @@
# include <ifunc-init.h>
# include <riscv-ifunc.h>
# include <sys/hwprobe.h>
+# include <asm/hwcap.h>
extern __typeof (__redirect_memcpy) __libc_memcpy;
extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
static inline __typeof (__redirect_memcpy) *
select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
{
unsigned long long int v;
+
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
+ if (dl_hwcap & COMPAT_HWCAP_ISA_V)
+ return __memcpy_vector;
+#endif
+
if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
&& (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
return __memcpy_noalignment;