[6/6,v5] newlib: introduce --enable-newlib-hw-misaligned-access option
Commit Message
Some hardware may perform better when copying unaligned
word-sized memory compared to byte-by-byte copying.
In case not defined explicitly by --enable-newlib-hw-misaligned-access
config option or variable $default_newlib_hw_misaligned_access in
configure.host file the compiler check will be performed to detect if
__riscv_misaligned_fast or __riscv_misaligned_slow is defined.
This commit introduces autodetection for RISC-V. Additionally, checking
for __ARM_FEATURE_UNALIGNED could be checked for ARM architecture.
However, this was not implemented in the commit, as changes in
newlib/libc/machine/[arm|aarch64] need to be performed.
---
newlib/README | 7 ++++++
newlib/configure.ac | 34 ++++++++++++++++++++++++++++++
newlib/configure.host | 6 ++++++
newlib/libc/machine/riscv/memcpy.c | 4 ++++
newlib/libc/machine/riscv/strcmp.S | 5 ++++-
newlib/libc/machine/riscv/strcpy.c | 2 ++
newlib/libc/string/local.h | 11 +++++++++-
7 files changed, 67 insertions(+), 2 deletions(-)
--
2.43.0
@@ -477,6 +477,13 @@ One feature can be enabled by specifying `--enable-FEATURE=yes' or
Disabled by default.
+`--enable-newlib-hw-misaligned-access'
+ Use word-by-word access for misaligned memory in string functions
+ (e.g., memcpy, strcmp, etc.) instead of byte-by-byte access.
+ This approach may offer better performance on some architectures
+ that have little to no penalty for unaligned memory access.
+ Disabled by default.
+
Running the Testsuite
=====================
@@ -322,6 +322,17 @@ AC_ARG_ENABLE(newlib-use-malloc-in-execl,
*) AC_MSG_ERROR(bad value ${enableval} for newlib-use-malloc-in-execl option) ;;
esac], [newlib_use_malloc_in_execl=no])dnl
+dnl Support --enable-newlib-hw-misaligned-access
+AC_ARG_ENABLE(newlib-hw-misaligned-access,
+[ --enable-newlib-hw-misaligned-access Use hardware word-by-word access instead of byte-by-byte for misaligned memory ],
+[if test "${newlib_hw_misaligned_access+set}" != set; then
+ case "${enableval}" in
+ yes) newlib_hw_misaligned_access=yes;;
+ no) newlib_hw_misaligned_access=no ;;
+ *) AC_MSG_ERROR(bad value ${enableval} for newlib-hw-misaligned-access option) ;;
+ esac
+ fi], [newlib_hw_misaligned_access=])dnl
+
AM_ENABLE_MULTILIB(, ..)
NEWLIB_CONFIGURE(.)
@@ -540,6 +551,29 @@ if test "${newlib_use_malloc_in_execl}" = "yes"; then
AC_DEFINE(_EXECL_USE_MALLOC, 1, [Define if using malloc for execl, execle and execlp.])
fi
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+ AC_CACHE_CHECK([if $CC has enabled misaligned hardware access],
+ [newlib_cv_hw_misaligned_access], [dnl
+ cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+ newlib_cv_hw_misaligned_access=no
+ if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c])
+ then
+ newlib_cv_hw_misaligned_access=yes
+ fi
+ rm -f conftest*])
+ newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+ AC_DEFINE(_HAVE_HW_MISALIGNED_ACCESS, 1, [Define if hardware has little to no penalty for misaligned memory access.])
+fi
+
dnl
dnl Parse --enable-newlib-iconv-encodings option argument
dnl
@@ -76,6 +76,7 @@ default_newlib_io_pos_args=no
default_newlib_atexit_dynamic_alloc=yes
default_newlib_nano_malloc=no
default_newlib_reent_check_verify=yes
+default_newlib_hw_misaligned_access=
lpfx="lib_a-"
newlib_msg_warn=
@@ -943,6 +944,11 @@ if [ "x${newlib_reent_check_verify}" = "x" ]; then
fi
fi
+# Set newlib-hw-misaligned-access to default if not defined.
+if [ "x${newlib_hw_misaligned_access}" = "x" ]; then
+ newlib_hw_misaligned_access="${default_newlib_hw_misaligned_access}";
+fi
+
# Remove rpc headers if xdr_dir not specified
if [ "x${xdr_dir}" = "x" ]; then
noinclude="${noinclude} rpc/types.h rpc/xdr.h"
@@ -33,8 +33,12 @@ memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
const char *b = (const char *)bb;
char *end = a + n;
uintptr_t msk = sizeof (long) - 1;
+#if __riscv_misaligned_slow || __riscv_misaligned_fast
+ if (n < sizeof (long))
+#else
if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
|| n < sizeof (long)))
+#endif
{
small:
if (__builtin_expect (a < end, 1))
@@ -30,10 +30,13 @@ strcmp:
.size strcmp, .-strcmp
#else
- or a4, a0, a1
li t2, -1
+
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
+ or a4, a0, a1
and a4, a4, SZREG-1
bnez a4, .Lmisaligned
+#endif
#if SZREG == 4
li a5, 0x7f7f7f7f
@@ -17,8 +17,10 @@ char *strcpy(char *dst, const char *src)
char *dst0 = dst;
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
if (__builtin_expect(!misaligned, 1))
+#endif
{
long *ldst = (long *)dst;
const long *lsrc = (const long *)src;
@@ -17,12 +17,21 @@ int __wcwidth (wint_t);
# define __inhibit_loop_to_libcall
#endif
-/* Nonzero if X is not aligned on a "long" boundary. */
+/* Nonzero if X is not aligned on a "long" boundary.
+ * This macro is used to skip a few bytes to find an aligned pointer.
+ * It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
+ * to avoid small performance penalties (if they are not zero). */
#define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
+#ifdef _HAVE_HW_MISALIGNED_ACCESS
+/* Hardware performs unaligned operations with little
+ * to no penalty compared to byte-to-byte copy. */
+#define UNALIGNED_X_Y(X, Y) (0)
+#else /* _HAVE_HW_MISALIGNED_ACCESS */
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED_X_Y(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
+#endif /* _HAVE_HW_MISALIGNED_ACCESS */
/* How many bytes are copied each iteration of the word copy loop. */
#define LITTLE_BLOCK_SIZE (sizeof (long))