[v7,2/2] RISC-V: add riscv vector support for memcpy
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Build passed
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Test passed
|
Commit Message
From: daichengrong <daichengrong@iscas.ac.cn>
Change in v7:
update dl_hwcap support using kernel header
update rvv memcpy compile with option,+v
delete optimization for small lengths
Change in v6:
Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
Changes in v5:
check ifunc-impl-list memcpy vector support with by dl_hwcap
Changes in v4:
update rvv memcpy support by compiler
check whether rvv enabled by dl_hwcap
Changes in v2:
delete size-0 branch
---
sysdeps/riscv/multiarch/memcpy_vector.S | 37 +++++++++++++++++++
.../unix/sysv/linux/riscv/multiarch/Makefile | 6 +++
.../linux/riscv/multiarch/ifunc-impl-list.c | 14 +++++++
.../unix/sysv/linux/riscv/multiarch/memcpy.c | 8 ++++
4 files changed, 65 insertions(+)
create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
Comments
Hi,
On Fri, Feb 21, 2025 at 8:57 PM <daichengrong@iscas.ac.cn> wrote:
> From: daichengrong <daichengrong@iscas.ac.cn>
>
> Change in v7:
> update dl_hwcap support using kernel header
> update rvv memcpy compile with option,+v
> delete optimization for small lengths
I tested this again on our upcoming Tenstorrent Ascalon CPU and (as
before) it's a 3x speedup on long lengths, a clear improvement for
everything 16 bytes onwards and no significant regressions on lengths
less than 16 bytes.
Tested-by: Anton Blanchard <antonb@tenstorrent.com>
Anton
> Change in v6:
> Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
>
> Changes in v5:
> check ifunc-impl-list memcpy vector support with by dl_hwcap
>
> Changes in v4:
> update rvv memcpy support by compiler
> check whether rvv enabled by dl_hwcap
>
> Changes in v2:
> delete size-0 branch
> ---
> sysdeps/riscv/multiarch/memcpy_vector.S | 37 +++++++++++++++++++
> .../unix/sysv/linux/riscv/multiarch/Makefile | 6 +++
> .../linux/riscv/multiarch/ifunc-impl-list.c | 14 +++++++
> .../unix/sysv/linux/riscv/multiarch/memcpy.c | 8 ++++
> 4 files changed, 65 insertions(+)
> create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
>
> diff --git a/sysdeps/riscv/multiarch/memcpy_vector.S b/sysdeps/riscv/multiarch/memcpy_vector.S
> new file mode 100644
> index 0000000000..eaf28aaf6d
> --- /dev/null
> +++ b/sysdeps/riscv/multiarch/memcpy_vector.S
> @@ -0,0 +1,37 @@
> +/* memcpy for RISC-V Vector.
> + Copyright (C) 2024-2025 Free Software Foundation, Inc.
> + This file is part of the GNU C Library.
> +
> + The GNU C Library is free software; you can redistribute it and/or
> + modify it under the terms of the GNU Lesser General Public
> + License as published by the Free Software Foundation; either
> + version 2.1 of the License, or (at your option) any later version.
> +
> + The GNU C Library is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + Lesser General Public License for more details.
> +
> + You should have received a copy of the GNU Lesser General Public
> + License along with the GNU C Library; if not, see
> + <https://www.gnu.org/licenses/>. */
> +
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +ENTRY (__memcpy_vector)
> +.option push
> +.option arch, +v
> + mv a6, a0
> +L(loop):
> + vsetvli a3,a2,e8,m8,ta,ma
> + vle8.v v8,(a1)
> + vse8.v v8,(a6)
> + add a1,a1,a3
> + sub a2,a2,a3
> + add a6,a6,a3
> + bnez a2,L(loop)
> + ret
> +.option pop
> +END (__memcpy_vector)
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> index fcef5659d4..478338006b 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> @@ -5,5 +5,11 @@ sysdep_routines += \
> memcpy_noalignment \
> # sysdep_routines
>
> +ifeq ($(have-gcc-riscv-rvv),yes)
> +sysdep_routines += \
> + memcpy_vector \
> + # rvv sysdep_routines
> +endif
> +
> CFLAGS-memcpy_noalignment.c += -mno-strict-align
> endif
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> index 1c1deca8f6..26f3376d23 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> @@ -19,6 +19,8 @@
> #include <ifunc-impl-list.h>
> #include <string.h>
> #include <sys/hwprobe.h>
> +#include <ldsodefs.h>
> +#include <asm/hwcap.h>
>
> size_t
> __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> @@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> size_t i = max;
>
> bool fast_unaligned = false;
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> + bool rvv_ext = false;
> +#endif
>
> struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
> if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
> @@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> == RISCV_HWPROBE_MISALIGNED_FAST)
> fast_unaligned = true;
>
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> + if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
> + rvv_ext = true;
> +#endif
> +
> IFUNC_IMPL (i, name, memcpy,
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> + IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
> + __memcpy_vector)
> +#endif
> IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
> __memcpy_noalignment)
> IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
> index 8544f5402a..4bedd21866 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
> @@ -27,16 +27,24 @@
> # include <ifunc-init.h>
> # include <riscv-ifunc.h>
> # include <sys/hwprobe.h>
> +# include <asm/hwcap.h>
>
> extern __typeof (__redirect_memcpy) __libc_memcpy;
>
> extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
> extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
> +extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
>
> static inline __typeof (__redirect_memcpy) *
> select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
> {
> unsigned long long int v;
> +
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> + if (dl_hwcap & COMPAT_HWCAP_ISA_V)
> + return __memcpy_vector;
> +#endif
> +
> if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
> && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
> return __memcpy_noalignment;
> --
> 2.25.1
>
>
> On Fri, Feb 21, 2025 at 8:57 PM <daichengrong@iscas.ac.cn> wrote:
>> From: daichengrong <daichengrong@iscas.ac.cn>
>>
>> Change in v7:
>> update dl_hwcap support using kernel header
>> update rvv memcpy compile with option,+v
>> delete optimization for small lengths
> I tested this again on our upcoming Tenstorrent Ascalon CPU and (as
> before) it's a 3x speedup on long lengths, a clear improvement for
> everything 16 bytes onwards and no significant regressions on lengths
> less than 16 bytes.
>
> Tested-by: Anton Blanchard <antonb@tenstorrent.com>
>
> Anton
On banana pi-F3(256bit RVV),it's a 3x speedup on long lengths.
And on K230(128bit RVV),it gets a 2x speedup on long lengths
Tested-by: daichengrogn<daichengrong@iscas.ac.cn>
>> Change in v6:
>> Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
>>
>> Changes in v5:
>> check ifunc-impl-list memcpy vector support with by dl_hwcap
>>
>> Changes in v4:
>> update rvv memcpy support by compiler
>> check whether rvv enabled by dl_hwcap
>>
>> Changes in v2:
>> delete size-0 branch
>> ---
>> sysdeps/riscv/multiarch/memcpy_vector.S | 37 +++++++++++++++++++
>> .../unix/sysv/linux/riscv/multiarch/Makefile | 6 +++
>> .../linux/riscv/multiarch/ifunc-impl-list.c | 14 +++++++
>> .../unix/sysv/linux/riscv/multiarch/memcpy.c | 8 ++++
>> 4 files changed, 65 insertions(+)
>> create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
>>
>> diff --git a/sysdeps/riscv/multiarch/memcpy_vector.S b/sysdeps/riscv/multiarch/memcpy_vector.S
>> new file mode 100644
>> index 0000000000..eaf28aaf6d
>> --- /dev/null
>> +++ b/sysdeps/riscv/multiarch/memcpy_vector.S
>> @@ -0,0 +1,37 @@
>> +/* memcpy for RISC-V Vector.
>> + Copyright (C) 2024-2025 Free Software Foundation, Inc.
>> + This file is part of the GNU C Library.
>> +
>> + The GNU C Library is free software; you can redistribute it and/or
>> + modify it under the terms of the GNU Lesser General Public
>> + License as published by the Free Software Foundation; either
>> + version 2.1 of the License, or (at your option) any later version.
>> +
>> + The GNU C Library is distributed in the hope that it will be useful,
>> + but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + Lesser General Public License for more details.
>> +
>> + You should have received a copy of the GNU Lesser General Public
>> + License along with the GNU C Library; if not, see
>> + <https://www.gnu.org/licenses/>. */
>> +
>> +
>> +#include <sysdep.h>
>> +#include <sys/asm.h>
>> +
>> +ENTRY (__memcpy_vector)
>> +.option push
>> +.option arch, +v
>> + mv a6, a0
>> +L(loop):
>> + vsetvli a3,a2,e8,m8,ta,ma
>> + vle8.v v8,(a1)
>> + vse8.v v8,(a6)
>> + add a1,a1,a3
>> + sub a2,a2,a3
>> + add a6,a6,a3
>> + bnez a2,L(loop)
>> + ret
>> +.option pop
>> +END (__memcpy_vector)
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> index fcef5659d4..478338006b 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> @@ -5,5 +5,11 @@ sysdep_routines += \
>> memcpy_noalignment \
>> # sysdep_routines
>>
>> +ifeq ($(have-gcc-riscv-rvv),yes)
>> +sysdep_routines += \
>> + memcpy_vector \
>> + # rvv sysdep_routines
>> +endif
>> +
>> CFLAGS-memcpy_noalignment.c += -mno-strict-align
>> endif
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> index 1c1deca8f6..26f3376d23 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> @@ -19,6 +19,8 @@
>> #include <ifunc-impl-list.h>
>> #include <string.h>
>> #include <sys/hwprobe.h>
>> +#include <ldsodefs.h>
>> +#include <asm/hwcap.h>
>>
>> size_t
>> __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> @@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> size_t i = max;
>>
>> bool fast_unaligned = false;
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + bool rvv_ext = false;
>> +#endif
>>
>> struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>> if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
>> @@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> == RISCV_HWPROBE_MISALIGNED_FAST)
>> fast_unaligned = true;
>>
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
>> + rvv_ext = true;
>> +#endif
>> +
>> IFUNC_IMPL (i, name, memcpy,
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
>> + __memcpy_vector)
>> +#endif
>> IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>> __memcpy_noalignment)
>> IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> index 8544f5402a..4bedd21866 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> @@ -27,16 +27,24 @@
>> # include <ifunc-init.h>
>> # include <riscv-ifunc.h>
>> # include <sys/hwprobe.h>
>> +# include <asm/hwcap.h>
>>
>> extern __typeof (__redirect_memcpy) __libc_memcpy;
>>
>> extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
>> extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
>> +extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
>>
>> static inline __typeof (__redirect_memcpy) *
>> select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>> {
>> unsigned long long int v;
>> +
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> + if (dl_hwcap & COMPAT_HWCAP_ISA_V)
>> + return __memcpy_vector;
>> +#endif
>> +
>> if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
>> && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
>> return __memcpy_noalignment;
>> --
>> 2.25.1
>>
>>
new file mode 100644
@@ -0,0 +1,37 @@
+/* memcpy for RISC-V Vector.
+ Copyright (C) 2024-2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+ENTRY (__memcpy_vector)
+.option push
+.option arch, +v
+ mv a6, a0
+L(loop):
+ vsetvli a3,a2,e8,m8,ta,ma
+ vle8.v v8,(a1)
+ vse8.v v8,(a6)
+ add a1,a1,a3
+ sub a2,a2,a3
+ add a6,a6,a3
+ bnez a2,L(loop)
+ ret
+.option pop
+END (__memcpy_vector)
@@ -5,5 +5,11 @@ sysdep_routines += \
memcpy_noalignment \
# sysdep_routines
+ifeq ($(have-gcc-riscv-rvv),yes)
+sysdep_routines += \
+ memcpy_vector \
+ # rvv sysdep_routines
+endif
+
CFLAGS-memcpy_noalignment.c += -mno-strict-align
endif
@@ -19,6 +19,8 @@
#include <ifunc-impl-list.h>
#include <string.h>
#include <sys/hwprobe.h>
+#include <ldsodefs.h>
+#include <asm/hwcap.h>
size_t
__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
@@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t i = max;
bool fast_unaligned = false;
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
+ bool rvv_ext = false;
+#endif
struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
@@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
== RISCV_HWPROBE_MISALIGNED_FAST)
fast_unaligned = true;
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
+ if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
+ rvv_ext = true;
+#endif
+
IFUNC_IMPL (i, name, memcpy,
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
+ IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
+ __memcpy_vector)
+#endif
IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
__memcpy_noalignment)
IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
@@ -27,16 +27,24 @@
# include <ifunc-init.h>
# include <riscv-ifunc.h>
# include <sys/hwprobe.h>
+# include <asm/hwcap.h>
extern __typeof (__redirect_memcpy) __libc_memcpy;
extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
static inline __typeof (__redirect_memcpy) *
select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
{
unsigned long long int v;
+
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
+ if (dl_hwcap & COMPAT_HWCAP_ISA_V)
+ return __memcpy_vector;
+#endif
+
if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
&& (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
return __memcpy_noalignment;