[6/6,v2] newlib: introduce --enable-newlib-hw-misaligned-access option
Commit Message
Some hardware may perform better when copying unaligned
word-sized memory compared to byte-by-byte copying.
In case not defined explicitly by --enable-newlib-hw-misaligned-access
config option or variable $default_newlib_hw_misaligned_access in
configure.host file the compiler check will be performed to detect if
__riscv_misaligned_fast or __riscv_misaligned_slow is defined.
This commit introduces autodetection for RISC-V. Additionally, checking
for __ARM_FEATURE_UNALIGNED could be checked for ARM architecture.
However, this was not implemented in the commit, as changes in
newlib/libc/machine/[arm|aarch64] need to be performed.
---
newlib/configure | 51 ++++++++++++++++++++++++++++++
newlib/configure.ac | 34 ++++++++++++++++++++
newlib/configure.host | 6 ++++
newlib/libc/machine/riscv/memcpy.c | 4 +++
newlib/libc/machine/riscv/strcmp.S | 5 ++-
newlib/libc/machine/riscv/strcpy.c | 2 ++
newlib/libc/string/local.h | 11 ++++++-
newlib/newlib.hin | 3 ++
8 files changed, 114 insertions(+), 2 deletions(-)
--
2.43.0
Comments
On 2025-01-28 12:59, Alexey Lapshin wrote:
> Some hardware may perform better when copying unaligned
> word-sized memory compared to byte-by-byte copying.
>
> In case not defined explicitly by --enable-newlib-hw-misaligned-access
> config option or variable $default_newlib_hw_misaligned_access in
> configure.host file the compiler check will be performed to detect if
> __riscv_misaligned_fast or __riscv_misaligned_slow is defined.
>
> This commit introduces autodetection for RISC-V. Additionally, checking
> for __ARM_FEATURE_UNALIGNED could be checked for ARM architecture.
> However, this was not implemented in the commit, as changes in
> newlib/libc/machine/[arm|aarch64] need to be performed.
> ---
> newlib/configure | 51 ++++++++++++++++++++++++++++++
> newlib/configure.ac | 34 ++++++++++++++++++++
> newlib/configure.host | 6 ++++
> newlib/libc/machine/riscv/memcpy.c | 4 +++
> newlib/libc/machine/riscv/strcmp.S | 5 ++-
> newlib/libc/machine/riscv/strcpy.c | 2 ++
> newlib/libc/string/local.h | 11 ++++++-
> newlib/newlib.hin | 3 ++
> 8 files changed, 114 insertions(+), 2 deletions(-)
>
> diff --git a/newlib/configure b/newlib/configure
> index bf8d08100..2336fa68c 100755
> --- a/newlib/configure
> +++ b/newlib/configure
> @@ -999,6 +999,7 @@ enable_newlib_nano_formatted_io
> enable_newlib_retargetable_locking
> enable_newlib_long_time_t
> enable_newlib_use_gdtoa
> +enable_newlib_hw_misaligned_access
> enable_multilib
> enable_target_optspace
> enable_malloc_debugging
> @@ -1668,6 +1669,7 @@ Optional Features:
> --enable-newlib-retargetable-locking Allow locking routines to be retargeted at link time
> --enable-newlib-long-time_t define time_t to long
> --enable-newlib-use-gdtoa Use gdtoa rather than legacy ldtoa
> + --enable-newlib-hw-misaligned-access Use hardware word-to-word insetead byte-to-byte copy on misaligned memory
> --enable-multilib build many library versions (default)
> --enable-target-optspace optimize for space
> --enable-malloc-debugging indicate malloc debugging requested
> @@ -2594,6 +2596,19 @@ else
> newlib_use_gdtoa=yes
> fi
>
> +# Check whether --enable-newlib-hw-misaligned-access was given.
> +if test "${enable_newlib_hw_misaligned_access+set}" = set; then :
> + enableval=$enable_newlib_hw_misaligned_access; if test "${newlib_hw_misaligned_access+set}" != set; then
> + case "${enableval}" in
> + yes) newlib_hw_misaligned_access=yes;;
> + no) newlib_hw_misaligned_access=no ;;
> + *) as_fn_error $? "bad value ${enableval} for newlib-hw-misaligned-access option" "$LINENO" 5 ;;
> + esac
> + fi
> +else
> + newlib_hw_misaligned_access=
> +fi
> +
> # Default to --enable-multilib
> # Check whether --enable-multilib was given.
> if test "${enable_multilib+set}" = set; then :
> @@ -6674,6 +6689,42 @@ $as_echo "#define _WANT_USE_GDTOA 1" >>confdefs.h
>
> fi
>
> +if test "x${newlib_hw_misaligned_access}" = "x"; then
> + { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC has enabled misaligned hardware access" >&5
> +$as_echo_n "checking if $CC has enabled misaligned hardware access... " >&6; }
> +if ${newlib_cv_hw_misaligned_access+:} false; then :
> + $as_echo_n "(cached) " >&6
> +else
> + cat > conftest.c <<EOF
> +#if __riscv_misaligned_fast || __riscv_misaligned_slow
> +void misalign_access_supported(void) {}
> +#else
> +#error "misaligned access is not supported"
> +#endif
> +EOF
> + newlib_cv_hw_misaligned_access=no
> + if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c'
> + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
> + (eval $ac_try) 2>&5
> + ac_status=$?
> + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
> + test $ac_status = 0; }; }
> + then
> + newlib_cv_hw_misaligned_access=yes
> + fi
> + rm -f conftest*
> +fi
> +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $newlib_cv_hw_misaligned_access" >&5
> +$as_echo "$newlib_cv_hw_misaligned_access" >&6; }
> + newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
> +fi
> +
> +if test "${newlib_hw_misaligned_access}" = "yes"; then
> +
> +$as_echo "#define _HAVE_HW_MISALIGNED_ACCESS 1" >>confdefs.h
> +
> +fi
> +
>
> if test "x${iconv_encodings}" != "x" \
> || test "x${iconv_to_encodings}" != "x" \
> diff --git a/newlib/configure.ac b/newlib/configure.ac
> index 55e5a9446..3a10b671b 100644
> --- a/newlib/configure.ac
> +++ b/newlib/configure.ac
> @@ -313,6 +313,17 @@ AC_ARG_ENABLE(newlib-use-gdtoa,
> esac
> fi], [newlib_use_gdtoa=yes])dnl
>
> +dnl Support --enable-newlib-hw-misaligned-access
> +AC_ARG_ENABLE(newlib-hw-misaligned-access,
> +[ --enable-newlib-hw-misaligned-access Use hardware word-to-word insetead byte-to-byte copy on misaligned memory ],
> +[if test "${newlib_hw_misaligned_access+set}" != set; then
> + case "${enableval}" in
> + yes) newlib_hw_misaligned_access=yes;;
> + no) newlib_hw_misaligned_access=no ;;
> + *) AC_MSG_ERROR(bad value ${enableval} for newlib-hw-misaligned-access option) ;;
> + esac
> + fi], [newlib_hw_misaligned_access=])dnl
> +
> AM_ENABLE_MULTILIB(, ..)
> NEWLIB_CONFIGURE(.)
>
> @@ -527,6 +538,29 @@ if test "${newlib_use_gdtoa}" = "yes"; then
> AC_DEFINE(_WANT_USE_GDTOA, 1, [Define if using gdtoa rather than legacy ldtoa.])
> fi
>
> +if test "x${newlib_hw_misaligned_access}" = "x"; then
> + AC_CACHE_CHECK([if $CC has enabled misaligned hardware access],
> + [newlib_cv_hw_misaligned_access], [dnl
> + cat > conftest.c <<EOF
> +#if __riscv_misaligned_fast || __riscv_misaligned_slow
> +void misalign_access_supported(void) {}
> +#else
> +#error "misaligned access is not supported"
> +#endif
> +EOF
> + newlib_cv_hw_misaligned_access=no
> + if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c])
> + then
> + newlib_cv_hw_misaligned_access=yes
> + fi
> + rm -f conftest*])
> + newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
> +fi
> +
> +if test "${newlib_hw_misaligned_access}" = "yes"; then
> + AC_DEFINE(_HAVE_HW_MISALIGNED_ACCESS, 1, [Define if hardware has little to no penalty for misaligned memory access.])
> +fi
> +
> dnl
> dnl Parse --enable-newlib-iconv-encodings option argument
> dnl
> diff --git a/newlib/configure.host b/newlib/configure.host
> index ff2e51275..c43cfcf9a 100644
> --- a/newlib/configure.host
> +++ b/newlib/configure.host
> @@ -76,6 +76,7 @@ default_newlib_io_pos_args=no
> default_newlib_atexit_dynamic_alloc=yes
> default_newlib_nano_malloc=no
> default_newlib_reent_check_verify=yes
> +default_newlib_hw_misaligned_access=
> lpfx="lib_a-"
> newlib_msg_warn=
>
> @@ -943,6 +944,11 @@ if [ "x${newlib_reent_check_verify}" = "x" ]; then
> fi
> fi
>
> +# Set newlib-hw-misaligned-access to default if not defined.
> +if [ "x${newlib_hw_misaligned_access}" = "x" ]; then
> + newlib_hw_misaligned_access="${default_newlib_hw_misaligned_access}";
> +fi
> +
> # Remove rpc headers if xdr_dir not specified
> if [ "x${xdr_dir}" = "x" ]; then
> noinclude="${noinclude} rpc/types.h rpc/xdr.h"
> diff --git a/newlib/libc/machine/riscv/memcpy.c b/newlib/libc/machine/riscv/memcpy.c
> index 4098f3ab1..e1a34a8c8 100644
> --- a/newlib/libc/machine/riscv/memcpy.c
> +++ b/newlib/libc/machine/riscv/memcpy.c
> @@ -33,8 +33,12 @@ memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
> const char *b = (const char *)bb;
> char *end = a + n;
> uintptr_t msk = sizeof (long) - 1;
> +#if __riscv_misaligned_slow || __riscv_misaligned_fast
> + if (n < sizeof (long))
> +#else
> if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
> || n < sizeof (long)))
> +#endif
> {
> small:
> if (__builtin_expect (a < end, 1))
> diff --git a/newlib/libc/machine/riscv/strcmp.S b/newlib/libc/machine/riscv/strcmp.S
> index 9af9ca1f3..12c39db94 100644
> --- a/newlib/libc/machine/riscv/strcmp.S
> +++ b/newlib/libc/machine/riscv/strcmp.S
> @@ -30,10 +30,13 @@ strcmp:
>
> .size strcmp, .-strcmp
> #else
> - or a4, a0, a1
> li t2, -1
> +
> +#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
> + or a4, a0, a1
> and a4, a4, SZREG-1
> bnez a4, .Lmisaligned
> +#endif
>
> #if SZREG == 4
> li a5, 0x7f7f7f7f
> diff --git a/newlib/libc/machine/riscv/strcpy.c b/newlib/libc/machine/riscv/strcpy.c
> index 6d802fa8e..08aef64ba 100644
> --- a/newlib/libc/machine/riscv/strcpy.c
> +++ b/newlib/libc/machine/riscv/strcpy.c
> @@ -17,8 +17,10 @@ char *strcpy(char *dst, const char *src)
> char *dst0 = dst;
>
> #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
> +#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
> int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
> if (__builtin_expect(!misaligned, 1))
> +#endif
> {
> long *ldst = (long *)dst;
> const long *lsrc = (const long *)src;
> diff --git a/newlib/libc/string/local.h b/newlib/libc/string/local.h
> index fb8e6c65c..263ca5cbc 100644
> --- a/newlib/libc/string/local.h
> +++ b/newlib/libc/string/local.h
> @@ -17,12 +17,21 @@ int __wcwidth (wint_t);
> # define __inhibit_loop_to_libcall
> #endif
>
> -/* Nonzero if X is not aligned on a "long" boundary. */
> +/* Nonzero if X is not aligned on a "long" boundary.
> + * This macro is used to skip a few bytes to find an aligned pointer.
> + * It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
> + * to avoid small performance penalties (if they are not zero). */
> #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
>
> +#ifdef _HAVE_HW_MISALIGNED_ACCESS
> +/* Hardware performs unaligned operations with little
> + * to no penalty compared to byte-to-byte copy. */
> +#define UNALIGNED_X_Y(X, Y) (0)
> +#else // _HAVE_HW_MISALIGNED_ACCESS
I think the above comment should be C-style (/* ... */).
> /* Nonzero if either X or Y is not aligned on a "long" boundary. */
> #define UNALIGNED_X_Y(X, Y) \
> (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
> +#endif // _HAVE_HW_MISALIGNED_ACCESS
Same here.
Kind regards,
Torbjörn
>
> /* How many bytes are copied each iteration of the word copy loop. */
> #define LITTLE_BLOCK_SIZE (sizeof (long))
> diff --git a/newlib/newlib.hin b/newlib/newlib.hin
> index 831846940..daba1972d 100644
> --- a/newlib/newlib.hin
> +++ b/newlib/newlib.hin
> @@ -425,6 +425,9 @@
> /* Define if wide char orientation is supported. */
> #undef _WIDE_ORIENT
>
> +/* Define if hardware has little to no penalty for misaligned memory access. */
> +#undef _HAVE_HW_MISALIGNED_ACCESS
> +
> /* The newlib minor version number. */
> #undef __NEWLIB_MINOR__
>
Some hardware may perform better when copying unaligned
word-sized memory compared to byte-by-byte copying.
In case not defined explicitly by --enable-newlib-hw-misaligned-access
config option or variable $default_newlib_hw_misaligned_access in
configure.host file the compiler check will be performed to detect if
__riscv_misaligned_fast or __riscv_misaligned_slow is defined.
This commit introduces autodetection for RISC-V. Additionally, checking
for __ARM_FEATURE_UNALIGNED could be checked for ARM architecture.
However, this was not implemented in the commit, as changes in
newlib/libc/machine/[arm|aarch64] need to be performed.
---
newlib/configure | 51 ++++++++++++++++++++++++++++++
newlib/configure.ac | 34 ++++++++++++++++++++
newlib/configure.host | 6 ++++
newlib/libc/machine/riscv/memcpy.c | 4 +++
newlib/libc/machine/riscv/strcmp.S | 5 ++-
newlib/libc/machine/riscv/strcpy.c | 2 ++
newlib/libc/string/local.h | 11 ++++++-
newlib/newlib.hin | 3 ++
8 files changed, 114 insertions(+), 2 deletions(-)
diff --git a/newlib/configure b/newlib/configure
index bf8d08100..2336fa68c 100755
--- a/newlib/configure
+++ b/newlib/configure
@@ -999,6 +999,7 @@ enable_newlib_nano_formatted_io
enable_newlib_retargetable_locking
enable_newlib_long_time_t
enable_newlib_use_gdtoa
+enable_newlib_hw_misaligned_access
enable_multilib
enable_target_optspace
enable_malloc_debugging
@@ -1668,6 +1669,7 @@ Optional Features:
--enable-newlib-retargetable-locking Allow locking routines to be retargeted at link time
--enable-newlib-long-time_t define time_t to long
--enable-newlib-use-gdtoa Use gdtoa rather than legacy ldtoa
+ --enable-newlib-hw-misaligned-access Use hardware word-to-word insetead byte-to-byte copy on misaligned memory
--enable-multilib build many library versions (default)
--enable-target-optspace optimize for space
--enable-malloc-debugging indicate malloc debugging requested
@@ -2594,6 +2596,19 @@ else
newlib_use_gdtoa=yes
fi
+# Check whether --enable-newlib-hw-misaligned-access was given.
+if test "${enable_newlib_hw_misaligned_access+set}" = set; then :
+ enableval=$enable_newlib_hw_misaligned_access; if test "${newlib_hw_misaligned_access+set}" != set; then
+ case "${enableval}" in
+ yes) newlib_hw_misaligned_access=yes;;
+ no) newlib_hw_misaligned_access=no ;;
+ *) as_fn_error $? "bad value ${enableval} for newlib-hw-misaligned-access option" "$LINENO" 5 ;;
+ esac
+ fi
+else
+ newlib_hw_misaligned_access=
+fi
+
# Default to --enable-multilib
# Check whether --enable-multilib was given.
if test "${enable_multilib+set}" = set; then :
@@ -6674,6 +6689,42 @@ $as_echo "#define _WANT_USE_GDTOA 1" >>confdefs.h
fi
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC has enabled misaligned hardware access" >&5
+$as_echo_n "checking if $CC has enabled misaligned hardware access... " >&6; }
+if ${newlib_cv_hw_misaligned_access+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+ newlib_cv_hw_misaligned_access=no
+ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }
+ then
+ newlib_cv_hw_misaligned_access=yes
+ fi
+ rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $newlib_cv_hw_misaligned_access" >&5
+$as_echo "$newlib_cv_hw_misaligned_access" >&6; }
+ newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+
+$as_echo "#define _HAVE_HW_MISALIGNED_ACCESS 1" >>confdefs.h
+
+fi
+
if test "x${iconv_encodings}" != "x" \
|| test "x${iconv_to_encodings}" != "x" \
diff --git a/newlib/configure.ac b/newlib/configure.ac
index 55e5a9446..3a10b671b 100644
--- a/newlib/configure.ac
+++ b/newlib/configure.ac
@@ -313,6 +313,17 @@ AC_ARG_ENABLE(newlib-use-gdtoa,
esac
fi], [newlib_use_gdtoa=yes])dnl
+dnl Support --enable-newlib-hw-misaligned-access
+AC_ARG_ENABLE(newlib-hw-misaligned-access,
+[ --enable-newlib-hw-misaligned-access Use hardware word-to-word insetead byte-to-byte copy on misaligned memory ],
+[if test "${newlib_hw_misaligned_access+set}" != set; then
+ case "${enableval}" in
+ yes) newlib_hw_misaligned_access=yes;;
+ no) newlib_hw_misaligned_access=no ;;
+ *) AC_MSG_ERROR(bad value ${enableval} for newlib-hw-misaligned-access option) ;;
+ esac
+ fi], [newlib_hw_misaligned_access=])dnl
+
AM_ENABLE_MULTILIB(, ..)
NEWLIB_CONFIGURE(.)
@@ -527,6 +538,29 @@ if test "${newlib_use_gdtoa}" = "yes"; then
AC_DEFINE(_WANT_USE_GDTOA, 1, [Define if using gdtoa rather than legacy ldtoa.])
fi
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+ AC_CACHE_CHECK([if $CC has enabled misaligned hardware access],
+ [newlib_cv_hw_misaligned_access], [dnl
+ cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+ newlib_cv_hw_misaligned_access=no
+ if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c])
+ then
+ newlib_cv_hw_misaligned_access=yes
+ fi
+ rm -f conftest*])
+ newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+ AC_DEFINE(_HAVE_HW_MISALIGNED_ACCESS, 1, [Define if hardware has little to no penalty for misaligned memory access.])
+fi
+
dnl
dnl Parse --enable-newlib-iconv-encodings option argument
dnl
diff --git a/newlib/configure.host b/newlib/configure.host
index ff2e51275..c43cfcf9a 100644
--- a/newlib/configure.host
+++ b/newlib/configure.host
@@ -76,6 +76,7 @@ default_newlib_io_pos_args=no
default_newlib_atexit_dynamic_alloc=yes
default_newlib_nano_malloc=no
default_newlib_reent_check_verify=yes
+default_newlib_hw_misaligned_access=
lpfx="lib_a-"
newlib_msg_warn=
@@ -943,6 +944,11 @@ if [ "x${newlib_reent_check_verify}" = "x" ]; then
fi
fi
+# Set newlib-hw-misaligned-access to default if not defined.
+if [ "x${newlib_hw_misaligned_access}" = "x" ]; then
+ newlib_hw_misaligned_access="${default_newlib_hw_misaligned_access}";
+fi
+
# Remove rpc headers if xdr_dir not specified
if [ "x${xdr_dir}" = "x" ]; then
noinclude="${noinclude} rpc/types.h rpc/xdr.h"
diff --git a/newlib/libc/machine/riscv/memcpy.c b/newlib/libc/machine/riscv/memcpy.c
index 4098f3ab1..e1a34a8c8 100644
--- a/newlib/libc/machine/riscv/memcpy.c
+++ b/newlib/libc/machine/riscv/memcpy.c
@@ -33,8 +33,12 @@ memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
const char *b = (const char *)bb;
char *end = a + n;
uintptr_t msk = sizeof (long) - 1;
+#if __riscv_misaligned_slow || __riscv_misaligned_fast
+ if (n < sizeof (long))
+#else
if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
|| n < sizeof (long)))
+#endif
{
small:
if (__builtin_expect (a < end, 1))
diff --git a/newlib/libc/machine/riscv/strcmp.S b/newlib/libc/machine/riscv/strcmp.S
index 9af9ca1f3..12c39db94 100644
--- a/newlib/libc/machine/riscv/strcmp.S
+++ b/newlib/libc/machine/riscv/strcmp.S
@@ -30,10 +30,13 @@ strcmp:
.size strcmp, .-strcmp
#else
- or a4, a0, a1
li t2, -1
+
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
+ or a4, a0, a1
and a4, a4, SZREG-1
bnez a4, .Lmisaligned
+#endif
#if SZREG == 4
li a5, 0x7f7f7f7f
diff --git a/newlib/libc/machine/riscv/strcpy.c b/newlib/libc/machine/riscv/strcpy.c
index 6d802fa8e..08aef64ba 100644
--- a/newlib/libc/machine/riscv/strcpy.c
+++ b/newlib/libc/machine/riscv/strcpy.c
@@ -17,8 +17,10 @@ char *strcpy(char *dst, const char *src)
char *dst0 = dst;
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
if (__builtin_expect(!misaligned, 1))
+#endif
{
long *ldst = (long *)dst;
const long *lsrc = (const long *)src;
diff --git a/newlib/libc/string/local.h b/newlib/libc/string/local.h
index fb8e6c65c..8cb43f8e3 100644
--- a/newlib/libc/string/local.h
+++ b/newlib/libc/string/local.h
@@ -17,12 +17,21 @@ int __wcwidth (wint_t);
# define __inhibit_loop_to_libcall
#endif
-/* Nonzero if X is not aligned on a "long" boundary. */
+/* Nonzero if X is not aligned on a "long" boundary.
+ * This macro is used to skip a few bytes to find an aligned pointer.
+ * It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
+ * to avoid small performance penalties (if they are not zero). */
#define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
+#ifdef _HAVE_HW_MISALIGNED_ACCESS
+/* Hardware performs unaligned operations with little
+ * to no penalty compared to byte-to-byte copy. */
+#define UNALIGNED_X_Y(X, Y) (0)
+#else /* _HAVE_HW_MISALIGNED_ACCESS */
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED_X_Y(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
+#endif /* _HAVE_HW_MISALIGNED_ACCESS */
/* How many bytes are copied each iteration of the word copy loop. */
#define LITTLE_BLOCK_SIZE (sizeof (long))
diff --git a/newlib/newlib.hin b/newlib/newlib.hin
index 831846940..daba1972d 100644
--- a/newlib/newlib.hin
+++ b/newlib/newlib.hin
@@ -425,6 +425,9 @@
/* Define if wide char orientation is supported. */
#undef _WIDE_ORIENT
+/* Define if hardware has little to no penalty for misaligned memory access. */
+#undef _HAVE_HW_MISALIGNED_ACCESS
+
/* The newlib minor version number. */
#undef __NEWLIB_MINOR__
--
2.43.0
Hi Alexey,
On Jan 29 08:27, Alexey Lapshin wrote:
> newlib/configure | 51 ++++++++++++++++++++++++++++++
> newlib/newlib.hin | 3 ++
You don't have to add the generated files to your patch, because we'll
regenerate them anyway when commiting a patch changing the configury.
However, if you add a configury option, can you please add it to
the README file as well?
Jeff, can you also take a look here, please?
Thanks,
Corinna
Hi Alexey,
On Jan 29 08:27, Alexey Lapshin wrote:
> newlib/configure | 51 ++++++++++++++++++++++++++++++
> newlib/newlib.hin | 3 ++
You don't have to add the generated files to your patch, because we'll
regenerate them anyway when commiting a patch changing the configury.
However, if you add a configury option, can you please add it to
the README file as well?
Jeff, can you also take a look here, please?
Thanks,
Corinna
@@ -999,6 +999,7 @@ enable_newlib_nano_formatted_io
enable_newlib_retargetable_locking
enable_newlib_long_time_t
enable_newlib_use_gdtoa
+enable_newlib_hw_misaligned_access
enable_multilib
enable_target_optspace
enable_malloc_debugging
@@ -1668,6 +1669,7 @@ Optional Features:
--enable-newlib-retargetable-locking Allow locking routines to be retargeted at link time
--enable-newlib-long-time_t define time_t to long
--enable-newlib-use-gdtoa Use gdtoa rather than legacy ldtoa
+ --enable-newlib-hw-misaligned-access Use hardware word-to-word insetead byte-to-byte copy on misaligned memory
--enable-multilib build many library versions (default)
--enable-target-optspace optimize for space
--enable-malloc-debugging indicate malloc debugging requested
@@ -2594,6 +2596,19 @@ else
newlib_use_gdtoa=yes
fi
+# Check whether --enable-newlib-hw-misaligned-access was given.
+if test "${enable_newlib_hw_misaligned_access+set}" = set; then :
+ enableval=$enable_newlib_hw_misaligned_access; if test "${newlib_hw_misaligned_access+set}" != set; then
+ case "${enableval}" in
+ yes) newlib_hw_misaligned_access=yes;;
+ no) newlib_hw_misaligned_access=no ;;
+ *) as_fn_error $? "bad value ${enableval} for newlib-hw-misaligned-access option" "$LINENO" 5 ;;
+ esac
+ fi
+else
+ newlib_hw_misaligned_access=
+fi
+
# Default to --enable-multilib
# Check whether --enable-multilib was given.
if test "${enable_multilib+set}" = set; then :
@@ -6674,6 +6689,42 @@ $as_echo "#define _WANT_USE_GDTOA 1" >>confdefs.h
fi
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC has enabled misaligned hardware access" >&5
+$as_echo_n "checking if $CC has enabled misaligned hardware access... " >&6; }
+if ${newlib_cv_hw_misaligned_access+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+ newlib_cv_hw_misaligned_access=no
+ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }
+ then
+ newlib_cv_hw_misaligned_access=yes
+ fi
+ rm -f conftest*
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $newlib_cv_hw_misaligned_access" >&5
+$as_echo "$newlib_cv_hw_misaligned_access" >&6; }
+ newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+
+$as_echo "#define _HAVE_HW_MISALIGNED_ACCESS 1" >>confdefs.h
+
+fi
+
if test "x${iconv_encodings}" != "x" \
|| test "x${iconv_to_encodings}" != "x" \
@@ -313,6 +313,17 @@ AC_ARG_ENABLE(newlib-use-gdtoa,
esac
fi], [newlib_use_gdtoa=yes])dnl
+dnl Support --enable-newlib-hw-misaligned-access
+AC_ARG_ENABLE(newlib-hw-misaligned-access,
+[ --enable-newlib-hw-misaligned-access Use hardware word-to-word insetead byte-to-byte copy on misaligned memory ],
+[if test "${newlib_hw_misaligned_access+set}" != set; then
+ case "${enableval}" in
+ yes) newlib_hw_misaligned_access=yes;;
+ no) newlib_hw_misaligned_access=no ;;
+ *) AC_MSG_ERROR(bad value ${enableval} for newlib-hw-misaligned-access option) ;;
+ esac
+ fi], [newlib_hw_misaligned_access=])dnl
+
AM_ENABLE_MULTILIB(, ..)
NEWLIB_CONFIGURE(.)
@@ -527,6 +538,29 @@ if test "${newlib_use_gdtoa}" = "yes"; then
AC_DEFINE(_WANT_USE_GDTOA, 1, [Define if using gdtoa rather than legacy ldtoa.])
fi
+if test "x${newlib_hw_misaligned_access}" = "x"; then
+ AC_CACHE_CHECK([if $CC has enabled misaligned hardware access],
+ [newlib_cv_hw_misaligned_access], [dnl
+ cat > conftest.c <<EOF
+#if __riscv_misaligned_fast || __riscv_misaligned_slow
+void misalign_access_supported(void) {}
+#else
+#error "misaligned access is not supported"
+#endif
+EOF
+ newlib_cv_hw_misaligned_access=no
+ if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -c conftest.c])
+ then
+ newlib_cv_hw_misaligned_access=yes
+ fi
+ rm -f conftest*])
+ newlib_hw_misaligned_access=$newlib_cv_hw_misaligned_access
+fi
+
+if test "${newlib_hw_misaligned_access}" = "yes"; then
+ AC_DEFINE(_HAVE_HW_MISALIGNED_ACCESS, 1, [Define if hardware has little to no penalty for misaligned memory access.])
+fi
+
dnl
dnl Parse --enable-newlib-iconv-encodings option argument
dnl
@@ -76,6 +76,7 @@ default_newlib_io_pos_args=no
default_newlib_atexit_dynamic_alloc=yes
default_newlib_nano_malloc=no
default_newlib_reent_check_verify=yes
+default_newlib_hw_misaligned_access=
lpfx="lib_a-"
newlib_msg_warn=
@@ -943,6 +944,11 @@ if [ "x${newlib_reent_check_verify}" = "x" ]; then
fi
fi
+# Set newlib-hw-misaligned-access to default if not defined.
+if [ "x${newlib_hw_misaligned_access}" = "x" ]; then
+ newlib_hw_misaligned_access="${default_newlib_hw_misaligned_access}";
+fi
+
# Remove rpc headers if xdr_dir not specified
if [ "x${xdr_dir}" = "x" ]; then
noinclude="${noinclude} rpc/types.h rpc/xdr.h"
@@ -33,8 +33,12 @@ memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
const char *b = (const char *)bb;
char *end = a + n;
uintptr_t msk = sizeof (long) - 1;
+#if __riscv_misaligned_slow || __riscv_misaligned_fast
+ if (n < sizeof (long))
+#else
if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
|| n < sizeof (long)))
+#endif
{
small:
if (__builtin_expect (a < end, 1))
@@ -30,10 +30,13 @@ strcmp:
.size strcmp, .-strcmp
#else
- or a4, a0, a1
li t2, -1
+
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
+ or a4, a0, a1
and a4, a4, SZREG-1
bnez a4, .Lmisaligned
+#endif
#if SZREG == 4
li a5, 0x7f7f7f7f
@@ -17,8 +17,10 @@ char *strcpy(char *dst, const char *src)
char *dst0 = dst;
#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+#if !(__riscv_misaligned_slow || __riscv_misaligned_fast)
int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
if (__builtin_expect(!misaligned, 1))
+#endif
{
long *ldst = (long *)dst;
const long *lsrc = (const long *)src;
@@ -17,12 +17,21 @@ int __wcwidth (wint_t);
# define __inhibit_loop_to_libcall
#endif
-/* Nonzero if X is not aligned on a "long" boundary. */
+/* Nonzero if X is not aligned on a "long" boundary.
+ * This macro is used to skip a few bytes to find an aligned pointer.
+ * It's better to keep it as is even if _HAVE_HW_MISALIGNED_ACCESS is enabled,
+ * to avoid small performance penalties (if they are not zero). */
#define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
+#ifdef _HAVE_HW_MISALIGNED_ACCESS
+/* Hardware performs unaligned operations with little
+ * to no penalty compared to byte-to-byte copy. */
+#define UNALIGNED_X_Y(X, Y) (0)
+#else // _HAVE_HW_MISALIGNED_ACCESS
/* Nonzero if either X or Y is not aligned on a "long" boundary. */
#define UNALIGNED_X_Y(X, Y) \
(((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
+#endif // _HAVE_HW_MISALIGNED_ACCESS
/* How many bytes are copied each iteration of the word copy loop. */
#define LITTLE_BLOCK_SIZE (sizeof (long))
@@ -425,6 +425,9 @@
/* Define if wide char orientation is supported. */
#undef _WIDE_ORIENT
+/* Define if hardware has little to no penalty for misaligned memory access. */
+#undef _HAVE_HW_MISALIGNED_ACCESS
+
/* The newlib minor version number. */
#undef __NEWLIB_MINOR__