[6/6,v5] newlib: introduce --enable-newlib-hw-misaligned-access option

Message ID 8af2dcc2e26ac16a3f1702b29b9f4d5bf9e4f2d9.camel@espressif.com
State New
Headers
Series None |

Commit Message

Alexey Lapshin Feb. 10, 2025, 1:09 p.m. UTC
  Some hardware may perform better when copying unaligned
word-sized memory compared to byte-by-byte copying.

In case not defined explicitly by --enable-newlib-hw-misaligned-access
config option or variable $default_newlib_hw_misaligned_access in
configure.host file the compiler check will be performed to detect if
__riscv_misaligned_fast or __riscv_misaligned_slow is defined.

This commit introduces autodetection for RISC-V. Additionally, checking
for __ARM_FEATURE_UNALIGNED could be checked for ARM architecture.
However, this was not implemented in the commit, as changes in
newlib/libc/machine/[arm|aarch64] need to be performed.
---
 newlib/README                      |  7 ++++++
 newlib/configure.ac                | 34 ++++++++++++++++++++++++++++++
 newlib/configure.host              |  6 ++++++
 newlib/libc/machine/riscv/memcpy.c |  4 ++++
 newlib/libc/machine/riscv/strcmp.S |  5 ++++-
 newlib/libc/machine/riscv/strcpy.c |  2 ++
 newlib/libc/string/local.h         | 11 +++++++++-
 7 files changed, 67 insertions(+), 2 deletions(-)

-- 
2.43.0
  

Patch

diff --git a/newlib/README b/newlib/README
index e652c4a92..ce4149b7a 100644
--- a/newlib/README
+++ b/newlib/README
@@ -477,6 +477,13 @@  One feature can be enabled by specifying `--enable-FEATURE=yes' or
 
      Disabled by default.
 
+`--enable-newlib-hw-misaligned-access'
+	 Use word-by-word access for misaligned memory in string functions
+	 (e.g., memcpy, strcmp, etc.) instead of byte-by-byte access.
+	 This approach may offer better performance on some architectures
+	 that have little to no penalty for unaligned memory access.
+     Disabled by default.
+
 Running the Testsuite
 =====================
 
diff --git a/newlib/configure.ac b/newlib/configure.ac
index c6833cfb1..a4807830e 100644
--- a/newlib/configure.ac
+++ b/newlib/configure.ac
@@ -322,6 +322,17 @@  AC_ARG_ENABLE(newlib-use-malloc-in-execl,
   *)   AC_MSG_ERROR(bad value ${enableval} for newlib-use-malloc-in-execl option) ;;
  esac], [newlib_use_malloc_in_execl=no])dnl
 
+dnl Support --enable-newlib-hw-misaligned-access
+AC_ARG_ENABLE(newlib-hw-misaligned-access,
+[  --enable-newlib-hw-misaligned-access   Use hardware word-by-word access instead of byte-by-byte for misaligned memory ],
+[if test "${newlib_hw_misaligned_access+set}" != set; then
+  case "${enableval}" in
+  yes) newlib_hw_misaligned_access=yes;;
+  no)  newlib_hw_misaligned_access=no ;;
+  *)   AC_MSG_ERROR(bad value ${enableval} for newlib-hw-misaligned-access option) ;;
+  esac
+ fi], [newlib_hw_misaligned_access=])dnl
+
 AM_ENABLE_MULTILIB(, ..)
 NEWLIB_CONFIGURE(.)
 
@@ -540,6 +551,29 @@  if test "${newlib_use_malloc_in_execl}" = "yes"; then
   AC_DEFINE(_EXECL_USE_MALLOC, 1, [Define if using malloc for execl, execle and execlp.])
 fi
 
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+  AC_CACHE_CHECK([if $CC has enabled misaligned hardware access],
+              [newlib_cv_hw_misaligned_access], [dnl
+  cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+  newlib_cv_hw_misaligned_access=no
+  if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c])
+  then
+    newlib_cv_hw_misaligned_access=yes
+  fi
+  rm -f conftest*])
+  newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+  AC_DEFINE(_HAVE_HW_MISALIGNED_ACCESS, 1, [Define if hardware has little to no penalty for misaligned memory access.])
+fi
+
 dnl
 dnl Parse --enable-newlib-iconv-encodings option argument
 dnl
diff --git a/newlib/configure.host b/newlib/configure.host
index ff2e51275..c43cfcf9a 100644
--- a/newlib/configure.host
+++ b/newlib/configure.host
@@ -76,6 +76,7 @@  default_newlib_io_pos_args=no
 default_newlib_atexit_dynamic_alloc=yes
 default_newlib_nano_malloc=no
 default_newlib_reent_check_verify=yes
+default_newlib_hw_misaligned_access=
 lpfx="lib_a-"
 newlib_msg_warn=
 
@@ -943,6 +944,11 @@  if [ "x${newlib_reent_check_verify}" = "x" ]; then
 	fi
 fi
 
+# Set newlib-hw-misaligned-access to default if not defined.
+if [ "x${newlib_hw_misaligned_access}" = "x" ]; then
+	newlib_hw_misaligned_access="${default_newlib_hw_misaligned_access}";
+fi
+
 # Remove rpc headers if xdr_dir not specified
 if [ "x${xdr_dir}" = "x" ]; then
 	noinclude="${noinclude} rpc/types.h rpc/xdr.h"
diff --git a/newlib/libc/machine/riscv/memcpy.c b/newlib/libc/machine/riscv/memcpy.c
index 4098f3ab1..e1a34a8c8 100644
--- a/newlib/libc/machine/riscv/memcpy.c
+++ b/newlib/libc/machine/riscv/memcpy.c
@@ -33,8 +33,12 @@  memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
   const char *b = (const char *)bb;
   char *end = a + n;
   uintptr_t msk = sizeof (long) - 1;
+#if __riscv_misaligned_slow || __riscv_misaligned_fast
+  if (n < sizeof (long))
+#else
   if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
 	       || n < sizeof (long)))
+#endif
     {
 small:
       if (__builtin_expect (a < end, 1))
diff --git a/newlib/libc/machine/riscv/strcmp.S b/newlib/libc/machine/riscv/strcmp.S
index 9af9ca1f3..12c39db94 100644
--- a/newlib/libc/machine/riscv/strcmp.S
+++ b/newlib/libc/machine/riscv/strcmp.S
@@ -30,10 +30,13 @@  strcmp:
 
 .size	strcmp, .-strcmp
 #else
-  or    a4, a0, a1
   li    t2, -1
+
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
+  or    a4, a0, a1
   and   a4, a4, SZREG-1
   bnez  a4, .Lmisaligned
+#endif
 
 #if SZREG == 4
   li a5, 0x7f7f7f7f
diff --git a/newlib/libc/machine/riscv/strcpy.c b/newlib/libc/machine/riscv/strcpy.c
index 6d802fa8e..08aef64ba 100644
--- a/newlib/libc/machine/riscv/strcpy.c
+++ b/newlib/libc/machine/riscv/strcpy.c
@@ -17,8 +17,10 @@  char *strcpy(char *dst, const char *src)
   char *dst0 = dst;
 
 #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
   int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
   if (__builtin_expect(!misaligned, 1))
+#endif
     {
       long *ldst = (long *)dst;
       const long *lsrc = (const long *)src;
diff --git a/newlib/libc/string/local.h b/newlib/libc/string/local.h
index fb8e6c65c..8cb43f8e3 100644
--- a/newlib/libc/string/local.h
+++ b/newlib/libc/string/local.h
@@ -17,12 +17,21 @@  int __wcwidth (wint_t);
 # define __inhibit_loop_to_libcall
 #endif
 
-/* Nonzero if X is not aligned on a "long" boundary.  */
+/* Nonzero if X is not aligned on a "long" boundary.
+ * This macro is used to skip a few bytes to find an aligned pointer.
+ * It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
+ * to avoid small performance penalties (if they are not zero).  */
 #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
 
+#ifdef _HAVE_HW_MISALIGNED_ACCESS
+/* Hardware performs unaligned operations with little
+ * to no penalty compared to byte-to-byte copy.  */
+#define UNALIGNED_X_Y(X, Y) (0)
+#else /* _HAVE_HW_MISALIGNED_ACCESS */
 /* Nonzero if either X or Y is not aligned on a "long" boundary.  */
 #define UNALIGNED_X_Y(X, Y) \
   (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
+#endif /* _HAVE_HW_MISALIGNED_ACCESS */
 
 /* How many bytes are copied each iteration of the word copy loop.  */
 #define LITTLE_BLOCK_SIZE (sizeof (long))