[v7,2/2] RISC-V: add riscv vector support for memcpy

Message ID 20250221095740.582183-3-daichengrong@iscas.ac.cn (mailing list archive)
State New
Headers
Series RISC-V: add multiarch RVV support for memcpy using FMV IFUNC |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed

Commit Message

daichengrong Feb. 21, 2025, 9:57 a.m. UTC
  From: daichengrong <daichengrong@iscas.ac.cn>

Change in v7:
   update dl_hwcap support using kernel header
   update rvv memcpy compile with option,+v
   delete optimization for small lengths

Change in v6:
   Optimize the RVV memcpy for small lengths less than VLEN/8 bytes

Changes in v5:
   check ifunc-impl-list memcpy vector support with by dl_hwcap

Changes in v4:
   update rvv memcpy support by compiler 
   check whether rvv enabled by dl_hwcap

Changes in v2:
   delete size-0 branch
---
 sysdeps/riscv/multiarch/memcpy_vector.S       | 37 +++++++++++++++++++
 .../unix/sysv/linux/riscv/multiarch/Makefile  |  6 +++
 .../linux/riscv/multiarch/ifunc-impl-list.c   | 14 +++++++
 .../unix/sysv/linux/riscv/multiarch/memcpy.c  |  8 ++++
 4 files changed, 65 insertions(+)
 create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
  

Comments

Anton Blanchard Feb. 22, 2025, 9:11 a.m. UTC | #1
Hi,

On Fri, Feb 21, 2025 at 8:57 PM <daichengrong@iscas.ac.cn> wrote:
> From: daichengrong <daichengrong@iscas.ac.cn>
>
> Change in v7:
>    update dl_hwcap support using kernel header
>    update rvv memcpy compile with option,+v
>    delete optimization for small lengths

I tested this again on our upcoming Tenstorrent Ascalon CPU and (as
before) it's a 3x speedup on long lengths, a clear improvement for
everything 16 bytes onwards and no significant regressions on lengths
less than 16 bytes.

Tested-by: Anton Blanchard <antonb@tenstorrent.com>

Anton

> Change in v6:
>    Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
>
> Changes in v5:
>    check ifunc-impl-list memcpy vector support with by dl_hwcap
>
> Changes in v4:
>    update rvv memcpy support by compiler
>    check whether rvv enabled by dl_hwcap
>
> Changes in v2:
>    delete size-0 branch
> ---
>  sysdeps/riscv/multiarch/memcpy_vector.S       | 37 +++++++++++++++++++
>  .../unix/sysv/linux/riscv/multiarch/Makefile  |  6 +++
>  .../linux/riscv/multiarch/ifunc-impl-list.c   | 14 +++++++
>  .../unix/sysv/linux/riscv/multiarch/memcpy.c  |  8 ++++
>  4 files changed, 65 insertions(+)
>  create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
>
> diff --git a/sysdeps/riscv/multiarch/memcpy_vector.S b/sysdeps/riscv/multiarch/memcpy_vector.S
> new file mode 100644
> index 0000000000..eaf28aaf6d
> --- /dev/null
> +++ b/sysdeps/riscv/multiarch/memcpy_vector.S
> @@ -0,0 +1,37 @@
> +/* memcpy for RISC-V Vector.
> +   Copyright (C) 2024-2025 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +
> +#include <sysdep.h>
> +#include <sys/asm.h>
> +
> +ENTRY (__memcpy_vector)
> +.option push
> +.option arch, +v
> +    mv     a6, a0
> +L(loop):
> +    vsetvli a3,a2,e8,m8,ta,ma
> +    vle8.v  v8,(a1)
> +    vse8.v  v8,(a6)
> +    add     a1,a1,a3
> +    sub     a2,a2,a3
> +    add     a6,a6,a3
> +    bnez    a2,L(loop)
> +    ret
> +.option pop
> +END (__memcpy_vector)
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> index fcef5659d4..478338006b 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
> @@ -5,5 +5,11 @@ sysdep_routines += \
>    memcpy_noalignment \
>    # sysdep_routines
>
> +ifeq ($(have-gcc-riscv-rvv),yes)
> +sysdep_routines += \
> +  memcpy_vector \
> +  # rvv sysdep_routines
> +endif
> +
>  CFLAGS-memcpy_noalignment.c += -mno-strict-align
>  endif
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> index 1c1deca8f6..26f3376d23 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
> @@ -19,6 +19,8 @@
>  #include <ifunc-impl-list.h>
>  #include <string.h>
>  #include <sys/hwprobe.h>
> +#include <ldsodefs.h>
> +#include <asm/hwcap.h>
>
>  size_t
>  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
> @@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>    size_t i = max;
>
>    bool fast_unaligned = false;
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> +  bool rvv_ext = false;
> +#endif
>
>    struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>    if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
> @@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>            == RISCV_HWPROBE_MISALIGNED_FAST)
>      fast_unaligned = true;
>
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> +  if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
> +    rvv_ext = true;
> +#endif
> +
>    IFUNC_IMPL (i, name, memcpy,
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> +             IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
> +                             __memcpy_vector)
> +#endif
>               IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>                               __memcpy_noalignment)
>               IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
> index 8544f5402a..4bedd21866 100644
> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
> @@ -27,16 +27,24 @@
>  # include <ifunc-init.h>
>  # include <riscv-ifunc.h>
>  # include <sys/hwprobe.h>
> +# include <asm/hwcap.h>
>
>  extern __typeof (__redirect_memcpy) __libc_memcpy;
>
>  extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
>  extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
> +extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
>
>  static inline __typeof (__redirect_memcpy) *
>  select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>  {
>    unsigned long long int v;
> +
> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
> +  if (dl_hwcap & COMPAT_HWCAP_ISA_V)
> +    return __memcpy_vector;
> +#endif
> +
>    if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
>        && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
>      return __memcpy_noalignment;
> --
> 2.25.1
>
>
  
daichengrong March 7, 2025, 9:24 a.m. UTC | #2
> On Fri, Feb 21, 2025 at 8:57 PM <daichengrong@iscas.ac.cn> wrote:
>> From: daichengrong <daichengrong@iscas.ac.cn>
>>
>> Change in v7:
>>     update dl_hwcap support using kernel header
>>     update rvv memcpy compile with option,+v
>>     delete optimization for small lengths
> I tested this again on our upcoming Tenstorrent Ascalon CPU and (as
> before) it's a 3x speedup on long lengths, a clear improvement for
> everything 16 bytes onwards and no significant regressions on lengths
> less than 16 bytes.
>
> Tested-by: Anton Blanchard <antonb@tenstorrent.com>
>
> Anton

On banana pi-F3(256bit RVV),it's a 3x speedup on long lengths.
And on K230(128bit RVV),it gets a 2x speedup on long lengths

Tested-by: daichengrogn<daichengrong@iscas.ac.cn>

>> Change in v6:
>>     Optimize the RVV memcpy for small lengths less than VLEN/8 bytes
>>
>> Changes in v5:
>>     check ifunc-impl-list memcpy vector support with by dl_hwcap
>>
>> Changes in v4:
>>     update rvv memcpy support by compiler
>>     check whether rvv enabled by dl_hwcap
>>
>> Changes in v2:
>>     delete size-0 branch
>> ---
>>   sysdeps/riscv/multiarch/memcpy_vector.S       | 37 +++++++++++++++++++
>>   .../unix/sysv/linux/riscv/multiarch/Makefile  |  6 +++
>>   .../linux/riscv/multiarch/ifunc-impl-list.c   | 14 +++++++
>>   .../unix/sysv/linux/riscv/multiarch/memcpy.c  |  8 ++++
>>   4 files changed, 65 insertions(+)
>>   create mode 100644 sysdeps/riscv/multiarch/memcpy_vector.S
>>
>> diff --git a/sysdeps/riscv/multiarch/memcpy_vector.S b/sysdeps/riscv/multiarch/memcpy_vector.S
>> new file mode 100644
>> index 0000000000..eaf28aaf6d
>> --- /dev/null
>> +++ b/sysdeps/riscv/multiarch/memcpy_vector.S
>> @@ -0,0 +1,37 @@
>> +/* memcpy for RISC-V Vector.
>> +   Copyright (C) 2024-2025 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public
>> +   License as published by the Free Software Foundation; either
>> +   version 2.1 of the License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; if not, see
>> +   <https://www.gnu.org/licenses/>.  */
>> +
>> +
>> +#include <sysdep.h>
>> +#include <sys/asm.h>
>> +
>> +ENTRY (__memcpy_vector)
>> +.option push
>> +.option arch, +v
>> +    mv     a6, a0
>> +L(loop):
>> +    vsetvli a3,a2,e8,m8,ta,ma
>> +    vle8.v  v8,(a1)
>> +    vse8.v  v8,(a6)
>> +    add     a1,a1,a3
>> +    sub     a2,a2,a3
>> +    add     a6,a6,a3
>> +    bnez    a2,L(loop)
>> +    ret
>> +.option pop
>> +END (__memcpy_vector)
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> index fcef5659d4..478338006b 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
>> @@ -5,5 +5,11 @@ sysdep_routines += \
>>     memcpy_noalignment \
>>     # sysdep_routines
>>
>> +ifeq ($(have-gcc-riscv-rvv),yes)
>> +sysdep_routines += \
>> +  memcpy_vector \
>> +  # rvv sysdep_routines
>> +endif
>> +
>>   CFLAGS-memcpy_noalignment.c += -mno-strict-align
>>   endif
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> index 1c1deca8f6..26f3376d23 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
>> @@ -19,6 +19,8 @@
>>   #include <ifunc-impl-list.h>
>>   #include <string.h>
>>   #include <sys/hwprobe.h>
>> +#include <ldsodefs.h>
>> +#include <asm/hwcap.h>
>>
>>   size_t
>>   __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>> @@ -27,6 +29,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>     size_t i = max;
>>
>>     bool fast_unaligned = false;
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> +  bool rvv_ext = false;
>> +#endif
>>
>>     struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
>>     if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
>> @@ -34,7 +39,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
>>             == RISCV_HWPROBE_MISALIGNED_FAST)
>>       fast_unaligned = true;
>>
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> +  if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V)
>> +    rvv_ext = true;
>> +#endif
>> +
>>     IFUNC_IMPL (i, name, memcpy,
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> +             IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
>> +                             __memcpy_vector)
>> +#endif
>>                IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
>>                                __memcpy_noalignment)
>>                IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
>> diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> index 8544f5402a..4bedd21866 100644
>> --- a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> +++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
>> @@ -27,16 +27,24 @@
>>   # include <ifunc-init.h>
>>   # include <riscv-ifunc.h>
>>   # include <sys/hwprobe.h>
>> +# include <asm/hwcap.h>
>>
>>   extern __typeof (__redirect_memcpy) __libc_memcpy;
>>
>>   extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
>>   extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
>> +extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
>>
>>   static inline __typeof (__redirect_memcpy) *
>>   select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
>>   {
>>     unsigned long long int v;
>> +
>> +#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT)
>> +  if (dl_hwcap & COMPAT_HWCAP_ISA_V)
>> +    return __memcpy_vector;
>> +#endif
>> +
>>     if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
>>         && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
>>       return __memcpy_noalignment;
>> --
>> 2.25.1
>>
>>
  

Patch

diff --git a/sysdeps/riscv/multiarch/memcpy_vector.S b/sysdeps/riscv/multiarch/memcpy_vector.S
new file mode 100644
index 0000000000..eaf28aaf6d
--- /dev/null
+++ b/sysdeps/riscv/multiarch/memcpy_vector.S
@@ -0,0 +1,37 @@ 
+/* memcpy for RISC-V Vector.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+
+#include <sysdep.h>
+#include <sys/asm.h>
+
+ENTRY (__memcpy_vector) 
+.option push
+.option arch, +v
+    mv	    a6, a0
+L(loop):
+    vsetvli a3,a2,e8,m8,ta,ma
+    vle8.v  v8,(a1)
+    vse8.v  v8,(a6)
+    add     a1,a1,a3
+    sub     a2,a2,a3 
+    add     a6,a6,a3 
+    bnez    a2,L(loop)  
+    ret
+.option pop
+END (__memcpy_vector)
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
index fcef5659d4..478338006b 100644
--- a/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/Makefile
@@ -5,5 +5,11 @@  sysdep_routines += \
   memcpy_noalignment \
   # sysdep_routines
 
+ifeq ($(have-gcc-riscv-rvv),yes)
+sysdep_routines += \
+  memcpy_vector \
+  # rvv sysdep_routines
+endif
+
 CFLAGS-memcpy_noalignment.c += -mno-strict-align
 endif
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
index 1c1deca8f6..26f3376d23 100644
--- a/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/ifunc-impl-list.c
@@ -19,6 +19,8 @@ 
 #include <ifunc-impl-list.h>
 #include <string.h>
 #include <sys/hwprobe.h>
+#include <ldsodefs.h>
+#include <asm/hwcap.h>
 
 size_t
 __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
@@ -27,6 +29,9 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
   size_t i = max;
 
   bool fast_unaligned = false;
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT) 
+  bool rvv_ext = false;
+#endif
 
   struct riscv_hwprobe pair = { .key = RISCV_HWPROBE_KEY_CPUPERF_0 };
   if (__riscv_hwprobe (&pair, 1, 0, NULL, 0) == 0
@@ -34,7 +39,16 @@  __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
           == RISCV_HWPROBE_MISALIGNED_FAST)
     fast_unaligned = true;
 
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT) 
+  if (GLRO(dl_hwcap) & COMPAT_HWCAP_ISA_V) 
+    rvv_ext = true;
+#endif
+
   IFUNC_IMPL (i, name, memcpy,
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT) 
+	      IFUNC_IMPL_ADD (array, i, memcpy, rvv_ext,
+			      __memcpy_vector)
+#endif
 	      IFUNC_IMPL_ADD (array, i, memcpy, fast_unaligned,
 			      __memcpy_noalignment)
 	      IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_generic))
diff --git a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
index 8544f5402a..4bedd21866 100644
--- a/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
+++ b/sysdeps/unix/sysv/linux/riscv/multiarch/memcpy.c
@@ -27,16 +27,24 @@ 
 # include <ifunc-init.h>
 # include <riscv-ifunc.h>
 # include <sys/hwprobe.h>
+# include <asm/hwcap.h>
 
 extern __typeof (__redirect_memcpy) __libc_memcpy;
 
 extern __typeof (__redirect_memcpy) __memcpy_generic attribute_hidden;
 extern __typeof (__redirect_memcpy) __memcpy_noalignment attribute_hidden;
+extern __typeof (__redirect_memcpy) __memcpy_vector attribute_hidden;
 
 static inline __typeof (__redirect_memcpy) *
 select_memcpy_ifunc (uint64_t dl_hwcap, __riscv_hwprobe_t hwprobe_func)
 {
   unsigned long long int v;
+
+#if defined(HAVE_RISCV_ASM_VECTOR_SUPPORT) 
+  if (dl_hwcap & COMPAT_HWCAP_ISA_V) 
+    return __memcpy_vector;
+#endif
+
   if (__riscv_hwprobe_one (hwprobe_func, RISCV_HWPROBE_KEY_CPUPERF_0, &v) == 0
       && (v & RISCV_HWPROBE_MISALIGNED_MASK) == RISCV_HWPROBE_MISALIGNED_FAST)
     return __memcpy_noalignment;