[6/6,v2] newlib: introduce --enable-newlib-hw-misaligned-access option

Message ID c7e200da6f59b5809bfb7ae0bd15511a487909ad.camel@espressif.com
State New
Headers
Series None |

Commit Message

Alexey Lapshin Jan. 28, 2025, 11:59 a.m. UTC
  Some hardware may perform better when copying unaligned
word-sized memory compared to byte-by-byte copying.

In case not defined explicitly by --enable-newlib-hw-misaligned-access
config option or variable $default_newlib_hw_misaligned_access in
configure.host file the compiler check will be performed to detect if
__riscv_misaligned_fast or __riscv_misaligned_slow is defined.

This commit introduces autodetection for RISC-V. Additionally, checking
for __ARM_FEATURE_UNALIGNED could be checked for ARM architecture.
However, this was not implemented in the commit, as changes in
newlib/libc/machine/[arm|aarch64] need to be performed.
---
 newlib/configure                   | 51 ++++++++++++++++++++++++++++++
 newlib/configure.ac                | 34 ++++++++++++++++++++
 newlib/configure.host              |  6 ++++
 newlib/libc/machine/riscv/memcpy.c |  4 +++
 newlib/libc/machine/riscv/strcmp.S |  5 ++-
 newlib/libc/machine/riscv/strcpy.c |  2 ++
 newlib/libc/string/local.h         | 11 ++++++-
 newlib/newlib.hin                  |  3 ++
 8 files changed, 114 insertions(+), 2 deletions(-)

-- 
2.43.0
  

Comments

Torbjorn SVENSSON Jan. 28, 2025, 3:48 p.m. UTC | #1
On 2025-01-28 12:59, Alexey Lapshin wrote:
> Some hardware may perform better when copying unaligned
> word-sized memory compared to byte-by-byte copying.
> 
> In case not defined explicitly by --enable-newlib-hw-misaligned-access
> config option or variable $default_newlib_hw_misaligned_access in
> configure.host file the compiler check will be performed to detect if
> __riscv_misaligned_fast or __riscv_misaligned_slow is defined.
> 
> This commit introduces autodetection for RISC-V. Additionally, checking
> for __ARM_FEATURE_UNALIGNED could be checked for ARM architecture.
> However, this was not implemented in the commit, as changes in
> newlib/libc/machine/[arm|aarch64] need to be performed.
> ---
>   newlib/configure                   | 51 ++++++++++++++++++++++++++++++
>   newlib/configure.ac                | 34 ++++++++++++++++++++
>   newlib/configure.host              |  6 ++++
>   newlib/libc/machine/riscv/memcpy.c |  4 +++
>   newlib/libc/machine/riscv/strcmp.S |  5 ++-
>   newlib/libc/machine/riscv/strcpy.c |  2 ++
>   newlib/libc/string/local.h         | 11 ++++++-
>   newlib/newlib.hin                  |  3 ++
>   8 files changed, 114 insertions(+), 2 deletions(-)
> 
> diff --git a/newlib/configure b/newlib/configure
> index bf8d08100..2336fa68c 100755
> --- a/newlib/configure
> +++ b/newlib/configure
> @@ -999,6 +999,7 @@ enable_newlib_nano_formatted_io
>   enable_newlib_retargetable_locking
>   enable_newlib_long_time_t
>   enable_newlib_use_gdtoa
> +enable_newlib_hw_misaligned_access
>   enable_multilib
>   enable_target_optspace
>   enable_malloc_debugging
> @@ -1668,6 +1669,7 @@ Optional Features:
>     --enable-newlib-retargetable-locking    Allow locking routines to be retargeted at link time
>     --enable-newlib-long-time_t   define time_t to long
>     --enable-newlib-use-gdtoa   Use gdtoa rather than legacy ldtoa
> +  --enable-newlib-hw-misaligned-access   Use hardware word-to-word insetead byte-to-byte copy on misaligned memory
>     --enable-multilib       build many library versions (default)
>     --enable-target-optspace  optimize for space
>     --enable-malloc-debugging indicate malloc debugging requested
> @@ -2594,6 +2596,19 @@ else
>     newlib_use_gdtoa=yes
>   fi
>   
> +# Check whether --enable-newlib-hw-misaligned-access was given.
> +if test "${enable_newlib_hw_misaligned_access+set}" = set; then :
> +  enableval=$enable_newlib_hw_misaligned_access; if test "${newlib_hw_misaligned_access+set}" != set; then
> +  case "${enableval}" in
> +  yes) newlib_hw_misaligned_access=yes;;
> +  no)  newlib_hw_misaligned_access=no ;;
> +  *)   as_fn_error $? "bad value ${enableval} for newlib-hw-misaligned-access option" "$LINENO" 5 ;;
> +  esac
> + fi
> +else
> +  newlib_hw_misaligned_access=
> +fi
> +
>   # Default to --enable-multilib
>   # Check whether --enable-multilib was given.
>   if test "${enable_multilib+set}" = set; then :
> @@ -6674,6 +6689,42 @@ $as_echo "#define _WANT_USE_GDTOA 1" >>confdefs.h
>   
>   fi
>   
> +if test "x${newlib_hw_misaligned_access}" = "x"; then
> +  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC has enabled misaligned hardware access" >&5
> +$as_echo_n "checking if $CC has enabled misaligned hardware access... " >&6; }
> +if ${newlib_cv_hw_misaligned_access+:} false; then :
> +  $as_echo_n "(cached) " >&6
> +else
> +    cat > conftest.c <<EOF
> +#if __riscv_misaligned_fast || __riscv_misaligned_slow
> +void misalign_access_supported(void) {}
> +#else
> +#error "misaligned access is not supported"
> +#endif
> +EOF
> +  newlib_cv_hw_misaligned_access=no
> +  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c'
> +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> +  (eval $ac_try) 2>&5
> +  ac_status=$?
> +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> +  test $ac_status = 0; }; }
> +  then
> +    newlib_cv_hw_misaligned_access=yes
> +  fi
> +  rm -f conftest*
> +fi
> +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $newlib_cv_hw_misaligned_access" >&5
> +$as_echo "$newlib_cv_hw_misaligned_access" >&6; }
> +  newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
> +fi
> +
> +if test "${newlib_hw_misaligned_access}" = "yes"; then
> +
> +$as_echo "#define _HAVE_HW_MISALIGNED_ACCESS 1" >>confdefs.h
> +
> +fi
> +
>   
>   if test "x${iconv_encodings}" != "x" \
>      || test "x${iconv_to_encodings}" != "x" \
> diff --git a/newlib/configure.ac b/newlib/configure.ac
> index 55e5a9446..3a10b671b 100644
> --- a/newlib/configure.ac
> +++ b/newlib/configure.ac
> @@ -313,6 +313,17 @@ AC_ARG_ENABLE(newlib-use-gdtoa,
>     esac
>    fi], [newlib_use_gdtoa=yes])dnl
>   
> +dnl Support --enable-newlib-hw-misaligned-access
> +AC_ARG_ENABLE(newlib-hw-misaligned-access,
> +[  --enable-newlib-hw-misaligned-access   Use hardware word-to-word insetead byte-to-byte copy on misaligned memory ],
> +[if test "${newlib_hw_misaligned_access+set}" != set; then
> +  case "${enableval}" in
> +  yes) newlib_hw_misaligned_access=yes;;
> +  no)  newlib_hw_misaligned_access=no ;;
> +  *)   AC_MSG_ERROR(bad value ${enableval} for newlib-hw-misaligned-access option) ;;
> +  esac
> + fi], [newlib_hw_misaligned_access=])dnl
> +
>   AM_ENABLE_MULTILIB(, ..)
>   NEWLIB_CONFIGURE(.)
>   
> @@ -527,6 +538,29 @@ if test "${newlib_use_gdtoa}" = "yes"; then
>     AC_DEFINE(_WANT_USE_GDTOA, 1, [Define if using gdtoa rather than legacy ldtoa.])
>   fi
>   
> +if test "x${newlib_hw_misaligned_access}" = "x"; then
> +  AC_CACHE_CHECK([if $CC has enabled misaligned hardware access],
> +              [newlib_cv_hw_misaligned_access], [dnl
> +  cat > conftest.c <<EOF
> +#if __riscv_misaligned_fast || __riscv_misaligned_slow
> +void misalign_access_supported(void) {}
> +#else
> +#error "misaligned access is not supported"
> +#endif
> +EOF
> +  newlib_cv_hw_misaligned_access=no
> +  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c])
> +  then
> +    newlib_cv_hw_misaligned_access=yes
> +  fi
> +  rm -f conftest*])
> +  newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
> +fi
> +
> +if test "${newlib_hw_misaligned_access}" = "yes"; then
> +  AC_DEFINE(_HAVE_HW_MISALIGNED_ACCESS, 1, [Define if hardware has little to no penalty for misaligned memory access.])
> +fi
> +
>   dnl
>   dnl Parse --enable-newlib-iconv-encodings option argument
>   dnl
> diff --git a/newlib/configure.host b/newlib/configure.host
> index ff2e51275..c43cfcf9a 100644
> --- a/newlib/configure.host
> +++ b/newlib/configure.host
> @@ -76,6 +76,7 @@ default_newlib_io_pos_args=no
>   default_newlib_atexit_dynamic_alloc=yes
>   default_newlib_nano_malloc=no
>   default_newlib_reent_check_verify=yes
> +default_newlib_hw_misaligned_access=
>   lpfx="lib_a-"
>   newlib_msg_warn=
>   
> @@ -943,6 +944,11 @@ if [ "x${newlib_reent_check_verify}" = "x" ]; then
>   	fi
>   fi
>   
> +# Set newlib-hw-misaligned-access to default if not defined.
> +if [ "x${newlib_hw_misaligned_access}" = "x" ]; then
> +	newlib_hw_misaligned_access="${default_newlib_hw_misaligned_access}";
> +fi
> +
>   # Remove rpc headers if xdr_dir not specified
>   if [ "x${xdr_dir}" = "x" ]; then
>   	noinclude="${noinclude} rpc/types.h rpc/xdr.h"
> diff --git a/newlib/libc/machine/riscv/memcpy.c b/newlib/libc/machine/riscv/memcpy.c
> index 4098f3ab1..e1a34a8c8 100644
> --- a/newlib/libc/machine/riscv/memcpy.c
> +++ b/newlib/libc/machine/riscv/memcpy.c
> @@ -33,8 +33,12 @@ memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
>     const char *b = (const char *)bb;
>     char *end = a + n;
>     uintptr_t msk = sizeof (long) - 1;
> +#if __riscv_misaligned_slow || __riscv_misaligned_fast
> +  if (n < sizeof (long))
> +#else
>     if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
>   	       || n < sizeof (long)))
> +#endif
>       {
>   small:
>         if (__builtin_expect (a < end, 1))
> diff --git a/newlib/libc/machine/riscv/strcmp.S b/newlib/libc/machine/riscv/strcmp.S
> index 9af9ca1f3..12c39db94 100644
> --- a/newlib/libc/machine/riscv/strcmp.S
> +++ b/newlib/libc/machine/riscv/strcmp.S
> @@ -30,10 +30,13 @@ strcmp:
>   
>   .size	strcmp, .-strcmp
>   #else
> -  or    a4, a0, a1
>     li    t2, -1
> +
> +#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
> +  or    a4, a0, a1
>     and   a4, a4, SZREG-1
>     bnez  a4, .Lmisaligned
> +#endif
>   
>   #if SZREG == 4
>     li a5, 0x7f7f7f7f
> diff --git a/newlib/libc/machine/riscv/strcpy.c b/newlib/libc/machine/riscv/strcpy.c
> index 6d802fa8e..08aef64ba 100644
> --- a/newlib/libc/machine/riscv/strcpy.c
> +++ b/newlib/libc/machine/riscv/strcpy.c
> @@ -17,8 +17,10 @@ char *strcpy(char *dst, const char *src)
>     char *dst0 = dst;
>   
>   #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
> +#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
>     int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
>     if (__builtin_expect(!misaligned, 1))
> +#endif
>       {
>         long *ldst = (long *)dst;
>         const long *lsrc = (const long *)src;
> diff --git a/newlib/libc/string/local.h b/newlib/libc/string/local.h
> index fb8e6c65c..263ca5cbc 100644
> --- a/newlib/libc/string/local.h
> +++ b/newlib/libc/string/local.h
> @@ -17,12 +17,21 @@ int __wcwidth (wint_t);
>   # define __inhibit_loop_to_libcall
>   #endif
>   
> -/* Nonzero if X is not aligned on a "long" boundary.  */
> +/* Nonzero if X is not aligned on a "long" boundary.
> + * This macro is used to skip a few bytes to find an aligned pointer.
> + * It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
> + * to avoid small performance penalties (if they are not zero).  */
>   #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
>   
> +#ifdef _HAVE_HW_MISALIGNED_ACCESS
> +/* Hardware performs unaligned operations with little
> + * to no penalty compared to byte-to-byte copy.  */
> +#define UNALIGNED_X_Y(X, Y) (0)
> +#else // _HAVE_HW_MISALIGNED_ACCESS

I think the above comment should be C-style (/* ... */).

>   /* Nonzero if either X or Y is not aligned on a "long" boundary.  */
>   #define UNALIGNED_X_Y(X, Y) \
>     (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
> +#endif // _HAVE_HW_MISALIGNED_ACCESS

Same here.

Kind regards,
Torbjörn

>   
>   /* How many bytes are copied each iteration of the word copy loop.  */
>   #define LITTLE_BLOCK_SIZE (sizeof (long))
> diff --git a/newlib/newlib.hin b/newlib/newlib.hin
> index 831846940..daba1972d 100644
> --- a/newlib/newlib.hin
> +++ b/newlib/newlib.hin
> @@ -425,6 +425,9 @@
>   /* Define if wide char orientation is supported. */
>   #undef _WIDE_ORIENT
>   
> +/* Define if hardware has little to no penalty for misaligned memory access. */
> +#undef _HAVE_HW_MISALIGNED_ACCESS
> +
>   /* The newlib minor version number. */
>   #undef __NEWLIB_MINOR__
>
  
Alexey Lapshin Jan. 29, 2025, 8:27 a.m. UTC | #2
Some hardware may perform better when copying unaligned
word-sized memory compared to byte-by-byte copying.

In case not defined explicitly by --enable-newlib-hw-misaligned-access
config option or variable $default_newlib_hw_misaligned_access in
configure.host file the compiler check will be performed to detect if
__riscv_misaligned_fast or __riscv_misaligned_slow is defined.

This commit introduces autodetection for RISC-V. Additionally, checking
for __ARM_FEATURE_UNALIGNED could be checked for ARM architecture.
However, this was not implemented in the commit, as changes in
newlib/libc/machine/[arm|aarch64] need to be performed.
---
 newlib/configure                   | 51 ++++++++++++++++++++++++++++++
 newlib/configure.ac                | 34 ++++++++++++++++++++
 newlib/configure.host              |  6 ++++
 newlib/libc/machine/riscv/memcpy.c |  4 +++
 newlib/libc/machine/riscv/strcmp.S |  5 ++-
 newlib/libc/machine/riscv/strcpy.c |  2 ++
 newlib/libc/string/local.h         | 11 ++++++-
 newlib/newlib.hin                  |  3 ++
 8 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/newlib/configure b/newlib/configure
index bf8d08100..2336fa68c 100755
--- a/newlib/configure
+++ b/newlib/configure
@@ -999,6 +999,7 @@ enable_newlib_nano_formatted_io
 enable_newlib_retargetable_locking
 enable_newlib_long_time_t
 enable_newlib_use_gdtoa
+enable_newlib_hw_misaligned_access
 enable_multilib
 enable_target_optspace
 enable_malloc_debugging
@@ -1668,6 +1669,7 @@ Optional Features:
   --enable-newlib-retargetable-locking    Allow locking routines to be retargeted at link time
   --enable-newlib-long-time_t   define time_t to long
   --enable-newlib-use-gdtoa   Use gdtoa rather than legacy ldtoa
+  --enable-newlib-hw-misaligned-access   Use hardware word-to-word insetead byte-to-byte copy on misaligned memory
   --enable-multilib       build many library versions (default)
   --enable-target-optspace  optimize for space
   --enable-malloc-debugging indicate malloc debugging requested
@@ -2594,6 +2596,19 @@ else
   newlib_use_gdtoa=yes
 fi
 
+# Check whether --enable-newlib-hw-misaligned-access was given.
+if test "${enable_newlib_hw_misaligned_access+set}" = set; then :
+  enableval=$enable_newlib_hw_misaligned_access; if test "${newlib_hw_misaligned_access+set}" != set; then
+  case "${enableval}" in
+  yes) newlib_hw_misaligned_access=yes;;
+  no)  newlib_hw_misaligned_access=no ;;
+  *)   as_fn_error $? "bad value ${enableval} for newlib-hw-misaligned-access option" "$LINENO" 5 ;;
+  esac
+ fi
+else
+  newlib_hw_misaligned_access=
+fi
+
 # Default to --enable-multilib
 # Check whether --enable-multilib was given.
 if test "${enable_multilib+set}" = set; then :
@@ -6674,6 +6689,42 @@ $as_echo "#define _WANT_USE_GDTOA 1" >>confdefs.h
 
 fi
 
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC has enabled misaligned hardware access" >&5
+$as_echo_n "checking if $CC has enabled misaligned hardware access... " >&6; }
+if ${newlib_cv_hw_misaligned_access+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+    cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+  newlib_cv_hw_misaligned_access=no
+  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+  then
+    newlib_cv_hw_misaligned_access=yes
+  fi
+  rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $newlib_cv_hw_misaligned_access" >&5
+$as_echo "$newlib_cv_hw_misaligned_access" >&6; }
+  newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+
+$as_echo "#define _HAVE_HW_MISALIGNED_ACCESS 1" >>confdefs.h
+
+fi
+
 
 if test "x${iconv_encodings}" != "x" \
    || test "x${iconv_to_encodings}" != "x" \
diff --git a/newlib/configure.ac b/newlib/configure.ac
index 55e5a9446..3a10b671b 100644
--- a/newlib/configure.ac
+++ b/newlib/configure.ac
@@ -313,6 +313,17 @@ AC_ARG_ENABLE(newlib-use-gdtoa,
   esac
  fi], [newlib_use_gdtoa=yes])dnl
 
+dnl Support --enable-newlib-hw-misaligned-access
+AC_ARG_ENABLE(newlib-hw-misaligned-access,
+[  --enable-newlib-hw-misaligned-access   Use hardware word-to-word insetead byte-to-byte copy on misaligned memory ],
+[if test "${newlib_hw_misaligned_access+set}" != set; then
+  case "${enableval}" in
+  yes) newlib_hw_misaligned_access=yes;;
+  no)  newlib_hw_misaligned_access=no ;;
+  *)   AC_MSG_ERROR(bad value ${enableval} for newlib-hw-misaligned-access option) ;;
+  esac
+ fi], [newlib_hw_misaligned_access=])dnl
+
 AM_ENABLE_MULTILIB(, ..)
 NEWLIB_CONFIGURE(.)
 
@@ -527,6 +538,29 @@ if test "${newlib_use_gdtoa}" = "yes"; then
   AC_DEFINE(_WANT_USE_GDTOA, 1, [Define if using gdtoa rather than legacy ldtoa.])
 fi
 
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+  AC_CACHE_CHECK([if $CC has enabled misaligned hardware access],
+              [newlib_cv_hw_misaligned_access], [dnl
+  cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+  newlib_cv_hw_misaligned_access=no
+  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c])
+  then
+    newlib_cv_hw_misaligned_access=yes
+  fi
+  rm -f conftest*])
+  newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+  AC_DEFINE(_HAVE_HW_MISALIGNED_ACCESS, 1, [Define if hardware has little to no penalty for misaligned memory access.])
+fi
+
 dnl
 dnl Parse --enable-newlib-iconv-encodings option argument
 dnl
diff --git a/newlib/configure.host b/newlib/configure.host
index ff2e51275..c43cfcf9a 100644
--- a/newlib/configure.host
+++ b/newlib/configure.host
@@ -76,6 +76,7 @@ default_newlib_io_pos_args=no
 default_newlib_atexit_dynamic_alloc=yes
 default_newlib_nano_malloc=no
 default_newlib_reent_check_verify=yes
+default_newlib_hw_misaligned_access=
 lpfx="lib_a-"
 newlib_msg_warn=
 
@@ -943,6 +944,11 @@ if [ "x${newlib_reent_check_verify}" = "x" ]; then
 	fi
 fi
 
+# Set newlib-hw-misaligned-access to default if not defined.
+if [ "x${newlib_hw_misaligned_access}" = "x" ]; then
+	newlib_hw_misaligned_access="${default_newlib_hw_misaligned_access}";
+fi
+
 # Remove rpc headers if xdr_dir not specified
 if [ "x${xdr_dir}" = "x" ]; then
 	noinclude="${noinclude} rpc/types.h rpc/xdr.h"
diff --git a/newlib/libc/machine/riscv/memcpy.c b/newlib/libc/machine/riscv/memcpy.c
index 4098f3ab1..e1a34a8c8 100644
--- a/newlib/libc/machine/riscv/memcpy.c
+++ b/newlib/libc/machine/riscv/memcpy.c
@@ -33,8 +33,12 @@ memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
   const char *b = (const char *)bb;
   char *end = a + n;
   uintptr_t msk = sizeof (long) - 1;
+#if __riscv_misaligned_slow || __riscv_misaligned_fast
+  if (n < sizeof (long))
+#else
   if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
 	       || n < sizeof (long)))
+#endif
     {
 small:
       if (__builtin_expect (a < end, 1))
diff --git a/newlib/libc/machine/riscv/strcmp.S b/newlib/libc/machine/riscv/strcmp.S
index 9af9ca1f3..12c39db94 100644
--- a/newlib/libc/machine/riscv/strcmp.S
+++ b/newlib/libc/machine/riscv/strcmp.S
@@ -30,10 +30,13 @@ strcmp:
 
 .size	strcmp, .-strcmp
 #else
-  or    a4, a0, a1
   li    t2, -1
+
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
+  or    a4, a0, a1
   and   a4, a4, SZREG-1
   bnez  a4, .Lmisaligned
+#endif
 
 #if SZREG == 4
   li a5, 0x7f7f7f7f
diff --git a/newlib/libc/machine/riscv/strcpy.c b/newlib/libc/machine/riscv/strcpy.c
index 6d802fa8e..08aef64ba 100644
--- a/newlib/libc/machine/riscv/strcpy.c
+++ b/newlib/libc/machine/riscv/strcpy.c
@@ -17,8 +17,10 @@ char *strcpy(char *dst, const char *src)
   char *dst0 = dst;
 
 #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
   int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
   if (__builtin_expect(!misaligned, 1))
+#endif
     {
       long *ldst = (long *)dst;
       const long *lsrc = (const long *)src;
diff --git a/newlib/libc/string/local.h b/newlib/libc/string/local.h
index fb8e6c65c..8cb43f8e3 100644
--- a/newlib/libc/string/local.h
+++ b/newlib/libc/string/local.h
@@ -17,12 +17,21 @@ int __wcwidth (wint_t);
 # define __inhibit_loop_to_libcall
 #endif
 
-/* Nonzero if X is not aligned on a "long" boundary.  */
+/* Nonzero if X is not aligned on a "long" boundary.
+ * This macro is used to skip a few bytes to find an aligned pointer.
+ * It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
+ * to avoid small performance penalties (if they are not zero).  */
 #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
 
+#ifdef _HAVE_HW_MISALIGNED_ACCESS
+/* Hardware performs unaligned operations with little
+ * to no penalty compared to byte-to-byte copy.  */
+#define UNALIGNED_X_Y(X, Y) (0)
+#else /* _HAVE_HW_MISALIGNED_ACCESS */
 /* Nonzero if either X or Y is not aligned on a "long" boundary.  */
 #define UNALIGNED_X_Y(X, Y) \
   (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
+#endif /* _HAVE_HW_MISALIGNED_ACCESS */
 
 /* How many bytes are copied each iteration of the word copy loop.  */
 #define LITTLE_BLOCK_SIZE (sizeof (long))
diff --git a/newlib/newlib.hin b/newlib/newlib.hin
index 831846940..daba1972d 100644
--- a/newlib/newlib.hin
+++ b/newlib/newlib.hin
@@ -425,6 +425,9 @@
 /* Define if wide char orientation is supported. */
 #undef _WIDE_ORIENT
 
+/* Define if hardware has little to no penalty for misaligned memory access. */
+#undef _HAVE_HW_MISALIGNED_ACCESS
+
 /* The newlib minor version number. */
 #undef __NEWLIB_MINOR__
 
-- 
2.43.0
  
Corinna Vinschen Jan. 29, 2025, 11:26 a.m. UTC | #3
Hi Alexey,

On Jan 29 08:27, Alexey Lapshin wrote:
>  newlib/configure                   | 51 ++++++++++++++++++++++++++++++
>  newlib/newlib.hin                  |  3 ++

You don't have to add the generated files to your patch, because we'll
regenerate them anyway when commiting a patch changing the configury.

However, if you add a configury option, can you please add it to
the README file as well?

Jeff, can you also take a look here, please?


Thanks,
Corinna
  
Corinna Vinschen Jan. 29, 2025, 11:27 a.m. UTC | #4
Hi Alexey,

On Jan 29 08:27, Alexey Lapshin wrote:
>  newlib/configure                   | 51 ++++++++++++++++++++++++++++++
>  newlib/newlib.hin                  |  3 ++

You don't have to add the generated files to your patch, because we'll
regenerate them anyway when commiting a patch changing the configury.

However, if you add a configury option, can you please add it to
the README file as well?

Jeff, can you also take a look here, please?


Thanks,
Corinna
  

Patch

diff --git a/newlib/configure b/newlib/configure
index bf8d08100..2336fa68c 100755
--- a/newlib/configure
+++ b/newlib/configure
@@ -999,6 +999,7 @@  enable_newlib_nano_formatted_io
 enable_newlib_retargetable_locking
 enable_newlib_long_time_t
 enable_newlib_use_gdtoa
+enable_newlib_hw_misaligned_access
 enable_multilib
 enable_target_optspace
 enable_malloc_debugging
@@ -1668,6 +1669,7 @@  Optional Features:
   --enable-newlib-retargetable-locking    Allow locking routines to be retargeted at link time
   --enable-newlib-long-time_t   define time_t to long
   --enable-newlib-use-gdtoa   Use gdtoa rather than legacy ldtoa
+  --enable-newlib-hw-misaligned-access   Use hardware word-to-word insetead byte-to-byte copy on misaligned memory
   --enable-multilib       build many library versions (default)
   --enable-target-optspace  optimize for space
   --enable-malloc-debugging indicate malloc debugging requested
@@ -2594,6 +2596,19 @@  else
   newlib_use_gdtoa=yes
 fi
 
+# Check whether --enable-newlib-hw-misaligned-access was given.
+if test "${enable_newlib_hw_misaligned_access+set}" = set; then :
+  enableval=$enable_newlib_hw_misaligned_access; if test "${newlib_hw_misaligned_access+set}" != set; then
+  case "${enableval}" in
+  yes) newlib_hw_misaligned_access=yes;;
+  no)  newlib_hw_misaligned_access=no ;;
+  *)   as_fn_error $? "bad value ${enableval} for newlib-hw-misaligned-access option" "$LINENO" 5 ;;
+  esac
+ fi
+else
+  newlib_hw_misaligned_access=
+fi
+
 # Default to --enable-multilib
 # Check whether --enable-multilib was given.
 if test "${enable_multilib+set}" = set; then :
@@ -6674,6 +6689,42 @@  $as_echo "#define _WANT_USE_GDTOA 1" >>confdefs.h
 
 fi
 
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC has enabled misaligned hardware access" >&5
+$as_echo_n "checking if $CC has enabled misaligned hardware access... " >&6; }
+if ${newlib_cv_hw_misaligned_access+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+    cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+  newlib_cv_hw_misaligned_access=no
+  if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+  then
+    newlib_cv_hw_misaligned_access=yes
+  fi
+  rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $newlib_cv_hw_misaligned_access" >&5
+$as_echo "$newlib_cv_hw_misaligned_access" >&6; }
+  newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+
+$as_echo "#define _HAVE_HW_MISALIGNED_ACCESS 1" >>confdefs.h
+
+fi
+
 
 if test "x${iconv_encodings}" != "x" \
    || test "x${iconv_to_encodings}" != "x" \
diff --git a/newlib/configure.ac b/newlib/configure.ac
index 55e5a9446..3a10b671b 100644
--- a/newlib/configure.ac
+++ b/newlib/configure.ac
@@ -313,6 +313,17 @@  AC_ARG_ENABLE(newlib-use-gdtoa,
   esac
  fi], [newlib_use_gdtoa=yes])dnl
 
+dnl Support --enable-newlib-hw-misaligned-access
+AC_ARG_ENABLE(newlib-hw-misaligned-access,
+[  --enable-newlib-hw-misaligned-access   Use hardware word-to-word insetead byte-to-byte copy on misaligned memory ],
+[if test "${newlib_hw_misaligned_access+set}" != set; then
+  case "${enableval}" in
+  yes) newlib_hw_misaligned_access=yes;;
+  no)  newlib_hw_misaligned_access=no ;;
+  *)   AC_MSG_ERROR(bad value ${enableval} for newlib-hw-misaligned-access option) ;;
+  esac
+ fi], [newlib_hw_misaligned_access=])dnl
+
 AM_ENABLE_MULTILIB(, ..)
 NEWLIB_CONFIGURE(.)
 
@@ -527,6 +538,29 @@  if test "${newlib_use_gdtoa}" = "yes"; then
   AC_DEFINE(_WANT_USE_GDTOA, 1, [Define if using gdtoa rather than legacy ldtoa.])
 fi
 
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+  AC_CACHE_CHECK([if $CC has enabled misaligned hardware access],
+              [newlib_cv_hw_misaligned_access], [dnl
+  cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+  newlib_cv_hw_misaligned_access=no
+  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c])
+  then
+    newlib_cv_hw_misaligned_access=yes
+  fi
+  rm -f conftest*])
+  newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+  AC_DEFINE(_HAVE_HW_MISALIGNED_ACCESS, 1, [Define if hardware has little to no penalty for misaligned memory access.])
+fi
+
 dnl
 dnl Parse --enable-newlib-iconv-encodings option argument
 dnl
diff --git a/newlib/configure.host b/newlib/configure.host
index ff2e51275..c43cfcf9a 100644
--- a/newlib/configure.host
+++ b/newlib/configure.host
@@ -76,6 +76,7 @@  default_newlib_io_pos_args=no
 default_newlib_atexit_dynamic_alloc=yes
 default_newlib_nano_malloc=no
 default_newlib_reent_check_verify=yes
+default_newlib_hw_misaligned_access=
 lpfx="lib_a-"
 newlib_msg_warn=
 
@@ -943,6 +944,11 @@  if [ "x${newlib_reent_check_verify}" = "x" ]; then
 	fi
 fi
 
+# Set newlib-hw-misaligned-access to default if not defined.
+if [ "x${newlib_hw_misaligned_access}" = "x" ]; then
+	newlib_hw_misaligned_access="${default_newlib_hw_misaligned_access}";
+fi
+
 # Remove rpc headers if xdr_dir not specified
 if [ "x${xdr_dir}" = "x" ]; then
 	noinclude="${noinclude} rpc/types.h rpc/xdr.h"
diff --git a/newlib/libc/machine/riscv/memcpy.c b/newlib/libc/machine/riscv/memcpy.c
index 4098f3ab1..e1a34a8c8 100644
--- a/newlib/libc/machine/riscv/memcpy.c
+++ b/newlib/libc/machine/riscv/memcpy.c
@@ -33,8 +33,12 @@  memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
   const char *b = (const char *)bb;
   char *end = a + n;
   uintptr_t msk = sizeof (long) - 1;
+#if __riscv_misaligned_slow || __riscv_misaligned_fast
+  if (n < sizeof (long))
+#else
   if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
 	       || n < sizeof (long)))
+#endif
     {
 small:
       if (__builtin_expect (a < end, 1))
diff --git a/newlib/libc/machine/riscv/strcmp.S b/newlib/libc/machine/riscv/strcmp.S
index 9af9ca1f3..12c39db94 100644
--- a/newlib/libc/machine/riscv/strcmp.S
+++ b/newlib/libc/machine/riscv/strcmp.S
@@ -30,10 +30,13 @@  strcmp:
 
 .size	strcmp, .-strcmp
 #else
-  or    a4, a0, a1
   li    t2, -1
+
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
+  or    a4, a0, a1
   and   a4, a4, SZREG-1
   bnez  a4, .Lmisaligned
+#endif
 
 #if SZREG == 4
   li a5, 0x7f7f7f7f
diff --git a/newlib/libc/machine/riscv/strcpy.c b/newlib/libc/machine/riscv/strcpy.c
index 6d802fa8e..08aef64ba 100644
--- a/newlib/libc/machine/riscv/strcpy.c
+++ b/newlib/libc/machine/riscv/strcpy.c
@@ -17,8 +17,10 @@  char *strcpy(char *dst, const char *src)
   char *dst0 = dst;
 
 #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
   int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
   if (__builtin_expect(!misaligned, 1))
+#endif
     {
       long *ldst = (long *)dst;
       const long *lsrc = (const long *)src;
diff --git a/newlib/libc/string/local.h b/newlib/libc/string/local.h
index fb8e6c65c..263ca5cbc 100644
--- a/newlib/libc/string/local.h
+++ b/newlib/libc/string/local.h
@@ -17,12 +17,21 @@  int __wcwidth (wint_t);
 # define __inhibit_loop_to_libcall
 #endif
 
-/* Nonzero if X is not aligned on a "long" boundary.  */
+/* Nonzero if X is not aligned on a "long" boundary.
+ * This macro is used to skip a few bytes to find an aligned pointer.
+ * It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
+ * to avoid small performance penalties (if they are not zero).  */
 #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
 
+#ifdef _HAVE_HW_MISALIGNED_ACCESS
+/* Hardware performs unaligned operations with little
+ * to no penalty compared to byte-to-byte copy.  */
+#define UNALIGNED_X_Y(X, Y) (0)
+#else // _HAVE_HW_MISALIGNED_ACCESS
 /* Nonzero if either X or Y is not aligned on a "long" boundary.  */
 #define UNALIGNED_X_Y(X, Y) \
   (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
+#endif // _HAVE_HW_MISALIGNED_ACCESS
 
 /* How many bytes are copied each iteration of the word copy loop.  */
 #define LITTLE_BLOCK_SIZE (sizeof (long))
diff --git a/newlib/newlib.hin b/newlib/newlib.hin
index 831846940..daba1972d 100644
--- a/newlib/newlib.hin
+++ b/newlib/newlib.hin
@@ -425,6 +425,9 @@ 
 /* Define if wide char orientation is supported. */
 #undef _WIDE_ORIENT
 
+/* Define if hardware has little to no penalty for misaligned memory access. */
+#undef _HAVE_HW_MISALIGNED_ACCESS
+
 /* The newlib minor version number. */
 #undef __NEWLIB_MINOR__