[6/6] newlib: introduce HAVE_HW_UNALIGNED_ACCESS option

Message ID d8e79e3448b5abff472d45bbea6110ac051a56b0.camel@espressif.com
State New
Headers
Series Refactor and optimize string/memory functions |

Commit Message

Alexey Lapshin Jan. 27, 2025, 10:51 a.m. UTC
  Some hardware may perform better when copying unaligned
word-sized memory compared to byte-by-byte copying.
---
 newlib/configure.host              |  6 ++++++
 newlib/libc/machine/riscv/memcpy.c |  4 ++++
 newlib/libc/machine/riscv/strcmp.S |  5 ++++-
 newlib/libc/machine/riscv/strcpy.c |  2 ++
 newlib/libc/string/local.h         | 11 ++++++++++-
 5 files changed, 26 insertions(+), 2 deletions(-)

-- 
2.43.0
  

Comments

Christian Herber (OSS) Jan. 27, 2025, 10:58 a.m. UTC | #1
Shouldn't this be controlled by __riscv_misaligned_fast according to https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#preprocessor-definitions

> -----Original Message-----
> From: Alexey Lapshin <alexey.lapshin@espressif.com>
> Sent: Monday, 27 January 2025 11:52
> To: newlib@sourceware.org
> Cc: Alexey Gerenkov <alexey.gerenkov@espressif.com>; Ivan Grokhotkov
> <ivan@espressif.com>
> Subject: [PATCH 6/6] newlib: introduce HAVE_HW_UNALIGNED_ACCESS
> option
> 
> Some hardware may perform better when copying unaligned word-sized
> memory compared to byte-by-byte copying.
> ---
>  newlib/configure.host              |  6 ++++++
>  newlib/libc/machine/riscv/memcpy.c |  4 ++++
> newlib/libc/machine/riscv/strcmp.S |  5 ++++-
> newlib/libc/machine/riscv/strcpy.c |  2 ++
>  newlib/libc/string/local.h         | 11 ++++++++++-
>  5 files changed, 26 insertions(+), 2 deletions(-)
> 
> diff --git a/newlib/configure.host b/newlib/configure.host index
> ff2e51275..0adb1aced 100644
> --- a/newlib/configure.host
> +++ b/newlib/configure.host
> @@ -454,6 +454,12 @@ case "${host}" in
>  	newlib_cflags="${newlib_cflags} -D_NO_GETPASS -D_NO_SIGSET -
> D_NO_WORDEXP -D_NO_POPEN -D_NO_POSIX_SPAWN"
>  	newlib_cflags="${newlib_cflags} -DHAVE_FCNTL -DHAVE_BLKSIZE -
> DHAVE_OPENDIR -DHAVE_RENAME"
>  	newlib_cflags="${newlib_cflags} -DGETREENT_PROVIDED -
> DSIGNAL_PROVIDED"
> +	newlib_cflags="${newlib_cflags} -D__ESP__"
> +	case "${host_cpu}" in
> +	  riscv*)
> +	    newlib_cflags="${newlib_cflags} -
> DHAVE_HW_UNALIGNED_ACCESS"
> +	    ;;
> +	esac
>  	;;
>    a29k-*-*)
>  	sys_dir=a29khif
> diff --git a/newlib/libc/machine/riscv/memcpy.c
> b/newlib/libc/machine/riscv/memcpy.c
> index 4098f3ab1..f2d335b33 100644
> --- a/newlib/libc/machine/riscv/memcpy.c
> +++ b/newlib/libc/machine/riscv/memcpy.c
> @@ -33,8 +33,12 @@ memcpy(void *__restrict aa, const void *__restrict bb,
> size_t n)
>    const char *b = (const char *)bb;
>    char *end = a + n;
>    uintptr_t msk = sizeof (long) - 1;
> +#ifdef HAVE_HW_UNALIGNED_ACCESS
> +  if (n < sizeof (long))
> +#else
>    if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
>  	       || n < sizeof (long)))
> +#endif // HAVE_HW_UNALIGNED_ACCESS
>      {
>  small:
>        if (__builtin_expect (a < end, 1)) diff --git
> a/newlib/libc/machine/riscv/strcmp.S b/newlib/libc/machine/riscv/strcmp.S
> index 9af9ca1f3..bb0be9afd 100644
> --- a/newlib/libc/machine/riscv/strcmp.S
> +++ b/newlib/libc/machine/riscv/strcmp.S
> @@ -30,10 +30,13 @@ strcmp:
> 
>  .size	strcmp, .-strcmp
>  #else
> -  or    a4, a0, a1
>    li    t2, -1
> +
> +#ifndef HAVE_HW_UNALIGNED_ACCESS
> +  or    a4, a0, a1
>    and   a4, a4, SZREG-1
>    bnez  a4, .Lmisaligned
> +#endif
> 
>  #if SZREG == 4
>    li a5, 0x7f7f7f7f
> diff --git a/newlib/libc/machine/riscv/strcpy.c
> b/newlib/libc/machine/riscv/strcpy.c
> index 6d802fa8e..7d7554615 100644
> --- a/newlib/libc/machine/riscv/strcpy.c
> +++ b/newlib/libc/machine/riscv/strcpy.c
> @@ -17,8 +17,10 @@ char *strcpy(char *dst, const char *src)
>    char *dst0 = dst;
> 
>  #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
> +#ifndef HAVE_HW_UNALIGNED_ACCESS
>    int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
>    if (__builtin_expect(!misaligned, 1))
> +#endif
>      {
>        long *ldst = (long *)dst;
>        const long *lsrc = (const long *)src; diff --git a/newlib/libc/string/local.h
> b/newlib/libc/string/local.h index fb8e6c65c..5eb34319a 100644
> --- a/newlib/libc/string/local.h
> +++ b/newlib/libc/string/local.h
> @@ -17,12 +17,21 @@ int __wcwidth (wint_t);  # define
> __inhibit_loop_to_libcall  #endif
> 
> -/* Nonzero if X is not aligned on a "long" boundary.  */
> +/* Nonzero if X is not aligned on a "long" boundary.
> + * This macro is used to skip a few bytes to find an aligned pointer.
> + * It's better to keep it as is even if HAVE_HW_UNALIGNED_ACCESS is
> +enabled,
> + * to avoid small performance penalties (if they are not zero).  */
>  #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
> 
> +#ifdef HAVE_HW_UNALIGNED_ACCESS
> +/* Hardware performs unaligned operations with little
> + * to no penalty compared to byte-to-byte copy.  */ #define
> +UNALIGNED_X_Y(X, Y) (0) #else // HAVE_HW_UNALIGNED_ACCESS
>  /* Nonzero if either X or Y is not aligned on a "long" boundary.  */  #define
> UNALIGNED_X_Y(X, Y) \
>    (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
> +#endif // HAVE_HW_UNALIGNED_ACCESS
> 
>  /* How many bytes are copied each iteration of the word copy loop.  */
> #define LITTLE_BLOCK_SIZE (sizeof (long))
> --
> 2.43.0
  
Alexey Lapshin Jan. 27, 2025, 11:27 a.m. UTC | #2
Thanks for highlighting that! But unfortunately this was added too recently in GCC (14.1.0).
Moving to approach with __riscv_misaligned_fast will not work for older GCC.
Also, I'm not sure if this will be applicable to other compilers as well

https://github.com/gcc-mirror/gcc/commit/6e23440b5df4011bbe1dbee74d47641125dd7d16


On Mon, 2025-01-27 at 10:58 +0000, Christian Herber (OSS) wrote:
> Shouldn't this be controlled by __riscv_misaligned_fast according to https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#preprocessor-definitions
> 
>
  
Christian Herber (OSS) Jan. 27, 2025, 11:36 a.m. UTC | #3
Clang has also adopted this: https://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20240610/587759.html

I would suggest to work out a solution that works for older and newer versions of compilers.

IMHO, for older versions, this macro could be simply defined with -D.
  

Patch

diff --git a/newlib/configure.host b/newlib/configure.host
index ff2e51275..0adb1aced 100644
--- a/newlib/configure.host
+++ b/newlib/configure.host
@@ -454,6 +454,12 @@  case "${host}" in
 	newlib_cflags="${newlib_cflags} -D_NO_GETPASS -D_NO_SIGSET -D_NO_WORDEXP -D_NO_POPEN -D_NO_POSIX_SPAWN"
 	newlib_cflags="${newlib_cflags} -DHAVE_FCNTL -DHAVE_BLKSIZE -DHAVE_OPENDIR -DHAVE_RENAME"
 	newlib_cflags="${newlib_cflags} -DGETREENT_PROVIDED -DSIGNAL_PROVIDED"
+	newlib_cflags="${newlib_cflags} -D__ESP__"
+	case "${host_cpu}" in
+	  riscv*)
+	    newlib_cflags="${newlib_cflags} -DHAVE_HW_UNALIGNED_ACCESS"
+	    ;;
+	esac
 	;;
   a29k-*-*)
 	sys_dir=a29khif
diff --git a/newlib/libc/machine/riscv/memcpy.c b/newlib/libc/machine/riscv/memcpy.c
index 4098f3ab1..f2d335b33 100644
--- a/newlib/libc/machine/riscv/memcpy.c
+++ b/newlib/libc/machine/riscv/memcpy.c
@@ -33,8 +33,12 @@  memcpy(void *__restrict aa, const void *__restrict bb, size_t n)
   const char *b = (const char *)bb;
   char *end = a + n;
   uintptr_t msk = sizeof (long) - 1;
+#ifdef HAVE_HW_UNALIGNED_ACCESS
+  if (n < sizeof (long))
+#else
   if (unlikely ((((uintptr_t)a & msk) != ((uintptr_t)b & msk))
 	       || n < sizeof (long)))
+#endif // HAVE_HW_UNALIGNED_ACCESS
     {
 small:
       if (__builtin_expect (a < end, 1))
diff --git a/newlib/libc/machine/riscv/strcmp.S b/newlib/libc/machine/riscv/strcmp.S
index 9af9ca1f3..bb0be9afd 100644
--- a/newlib/libc/machine/riscv/strcmp.S
+++ b/newlib/libc/machine/riscv/strcmp.S
@@ -30,10 +30,13 @@  strcmp:
 
 .size	strcmp, .-strcmp
 #else
-  or    a4, a0, a1
   li    t2, -1
+
+#ifndef HAVE_HW_UNALIGNED_ACCESS
+  or    a4, a0, a1
   and   a4, a4, SZREG-1
   bnez  a4, .Lmisaligned
+#endif
 
 #if SZREG == 4
   li a5, 0x7f7f7f7f
diff --git a/newlib/libc/machine/riscv/strcpy.c b/newlib/libc/machine/riscv/strcpy.c
index 6d802fa8e..7d7554615 100644
--- a/newlib/libc/machine/riscv/strcpy.c
+++ b/newlib/libc/machine/riscv/strcpy.c
@@ -17,8 +17,10 @@  char *strcpy(char *dst, const char *src)
   char *dst0 = dst;
 
 #if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+#ifndef HAVE_HW_UNALIGNED_ACCESS
   int misaligned = ((uintptr_t)dst | (uintptr_t)src) & (sizeof (long) - 1);
   if (__builtin_expect(!misaligned, 1))
+#endif
     {
       long *ldst = (long *)dst;
       const long *lsrc = (const long *)src;
diff --git a/newlib/libc/string/local.h b/newlib/libc/string/local.h
index fb8e6c65c..5eb34319a 100644
--- a/newlib/libc/string/local.h
+++ b/newlib/libc/string/local.h
@@ -17,12 +17,21 @@  int __wcwidth (wint_t);
 # define __inhibit_loop_to_libcall
 #endif
 
-/* Nonzero if X is not aligned on a "long" boundary.  */
+/* Nonzero if X is not aligned on a "long" boundary.
+ * This macro is used to skip a few bytes to find an aligned pointer.
+ * It's better to keep it as is even if HAVE_HW_UNALIGNED_ACCESS is enabled,
+ * to avoid small performance penalties (if they are not zero).  */
 #define UNALIGNED_X(X) ((long)X & (sizeof (long) - 1))
 
+#ifdef HAVE_HW_UNALIGNED_ACCESS
+/* Hardware performs unaligned operations with little
+ * to no penalty compared to byte-to-byte copy.  */
+#define UNALIGNED_X_Y(X, Y) (0)
+#else // HAVE_HW_UNALIGNED_ACCESS
 /* Nonzero if either X or Y is not aligned on a "long" boundary.  */
 #define UNALIGNED_X_Y(X, Y) \
   (((long)X & (sizeof (long) - 1)) | ((long)Y & (sizeof (long) - 1)))
+#endif // HAVE_HW_UNALIGNED_ACCESS
 
 /* How many bytes are copied each iteration of the word copy loop.  */
 #define LITTLE_BLOCK_SIZE (sizeof (long))