strchr, strchrnul: implement strchr() as strchrnul()
Commit Message
---
newlib/libc/string/strchr.c | 81 +---------------------------------
newlib/libc/string/strchrnul.c | 77 +++++++++++++++++++++++++++++++-
2 files changed, 77 insertions(+), 81 deletions(-)
Comments
Why?
Please add a descriptive commit message with the reasoning and the
desired result for this change.
Thanks,
Corinna
On Dec 5 21:58, James Tirta Halim wrote:
> ---
> newlib/libc/string/strchr.c | 81 +---------------------------------
> newlib/libc/string/strchrnul.c | 77 +++++++++++++++++++++++++++++++-
> 2 files changed, 77 insertions(+), 81 deletions(-)
>
> diff --git a/newlib/libc/string/strchr.c b/newlib/libc/string/strchr.c
> index 96f30be04..382275b1d 100644
> --- a/newlib/libc/string/strchr.c
> +++ b/newlib/libc/string/strchr.c
> @@ -30,87 +30,10 @@ QUICKREF
> #include <string.h>
> #include <limits.h>
>
> -/* Nonzero if X is not aligned on a "long" boundary. */
> -#define UNALIGNED(X) ((long)X & (sizeof (long) - 1))
> -
> -/* How many bytes are loaded each iteration of the word copy loop. */
> -#define LBLOCKSIZE (sizeof (long))
> -
> -#if LONG_MAX == 2147483647L
> -#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
> -#else
> -#if LONG_MAX == 9223372036854775807L
> -/* Nonzero if X (a long int) contains a NULL byte. */
> -#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
> -#else
> -#error long int is not a 32bit or 64bit type.
> -#endif
> -#endif
> -
> -/* DETECTCHAR returns nonzero if (long)X contains the byte used
> - to fill (long)MASK. */
> -#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
> -
> char *
> strchr (const char *s1,
> int i)
> {
> - const unsigned char *s = (const unsigned char *)s1;
> - unsigned char c = i;
> -
> -#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
> - unsigned long mask,j;
> - unsigned long *aligned_addr;
> -
> - /* Special case for finding 0. */
> - if (!c)
> - {
> - while (UNALIGNED (s))
> - {
> - if (!*s)
> - return (char *) s;
> - s++;
> - }
> - /* Operate a word at a time. */
> - aligned_addr = (unsigned long *) s;
> - while (!DETECTNULL (*aligned_addr))
> - aligned_addr++;
> - /* Found the end of string. */
> - s = (const unsigned char *) aligned_addr;
> - while (*s)
> - s++;
> - return (char *) s;
> - }
> -
> - /* All other bytes. Align the pointer, then search a long at a time. */
> - while (UNALIGNED (s))
> - {
> - if (!*s)
> - return NULL;
> - if (*s == c)
> - return (char *) s;
> - s++;
> - }
> -
> - mask = c;
> - for (j = 8; j < LBLOCKSIZE * 8; j <<= 1)
> - mask = (mask << j) | mask;
> -
> - aligned_addr = (unsigned long *) s;
> - while (!DETECTNULL (*aligned_addr) && !DETECTCHAR (*aligned_addr, mask))
> - aligned_addr++;
> -
> - /* The block of bytes currently pointed to by aligned_addr
> - contains either a null or the target char, or both. We
> - catch it using the bytewise search. */
> -
> - s = (unsigned char *) aligned_addr;
> -
> -#endif /* not PREFER_SIZE_OVER_SPEED */
> -
> - while (*s && *s != c)
> - s++;
> - if (*s == c)
> - return (char *)s;
> - return NULL;
> + s1 = strchrnul(s1, i);
> + return s1 && *s1 ? (char *) s1 : NULL;
> }
> diff --git a/newlib/libc/string/strchrnul.c b/newlib/libc/string/strchrnul.c
> index f5c3eb25d..69f66db63 100644
> --- a/newlib/libc/string/strchrnul.c
> +++ b/newlib/libc/string/strchrnul.c
> @@ -29,12 +29,85 @@ QUICKREF
> */
>
> #include <string.h>
> +#include <limits.h>
> +
> +/* Nonzero if X is not aligned on a "long" boundary. */
> +#define UNALIGNED(X) ((long)X & (sizeof (long) - 1))
> +
> +/* How many bytes are loaded each iteration of the word copy loop. */
> +#define LBLOCKSIZE (sizeof (long))
> +
> +#if LONG_MAX == 2147483647L
> +#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
> +#else
> +#if LONG_MAX == 9223372036854775807L
> +/* Nonzero if X (a long int) contains a NULL byte. */
> +#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
> +#else
> +#error long int is not a 32bit or 64bit type.
> +#endif
> +#endif
> +
> +/* DETECTCHAR returns nonzero if (long)X contains the byte used
> + to fill (long)MASK. */
> +#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
>
> char *
> strchrnul (const char *s1,
> int i)
> {
> - char *s = strchr(s1, i);
> + const unsigned char *s = (const unsigned char *)s1;
> + unsigned char c = i;
> +
> +#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
> + unsigned long mask,j;
> + unsigned long *aligned_addr;
> +
> + /* Special case for finding 0. */
> + if (!c)
> + {
> + while (UNALIGNED (s))
> + {
> + if (!*s)
> + return (char *) s;
> + s++;
> + }
> + /* Operate a word at a time. */
> + aligned_addr = (unsigned long *) s;
> + while (!DETECTNULL (*aligned_addr))
> + aligned_addr++;
> + /* Found the end of string. */
> + s = (const unsigned char *) aligned_addr;
> + while (*s)
> + s++;
> + return (char *) s;
> + }
> +
> + /* All other bytes. Align the pointer, then search a long at a time. */
> + while (UNALIGNED (s))
> + {
> + if (!*s || *s == c)
> + return (char *) s;
> + s++;
> + }
> +
> + mask = c;
> + for (j = 8; j < LBLOCKSIZE * 8; j <<= 1)
> + mask = (mask << j) | mask;
> +
> + aligned_addr = (unsigned long *) s;
> + while (!DETECTNULL (*aligned_addr) && !DETECTCHAR (*aligned_addr, mask))
> + aligned_addr++;
> +
> + /* The block of bytes currently pointed to by aligned_addr
> + contains either a null or the target char, or both. We
> + catch it using the bytewise search. */
> +
> + s = (unsigned char *) aligned_addr;
> +
> +#endif /* not PREFER_SIZE_OVER_SPEED */
>
> - return s ? s : (char *)s1 + strlen(s1);
> + while (*s && *s != c)
> + s++;
> + return (char *) s;
> }
> --
> 2.43.0
Currently, strchrnul() is implemented as strchr() and falls back to a
strlen() to find the null-terminator if the character is not
found, scanning the string twice. However, strchr() is going to scan the
whole string anyway and discard the pointer to the null-terminator if the
character is not found, returning NULL.
Instead, we can just implement strchr() with discarding strchrnul()'s
pointer to null-terminator returning NULL and by that avoid calling
strlen() in strchrnul() if a character is not found.
I made a typo in the strchr(), it should be:
return s1 && *s1 ? (char *) s1 : NULL;
which should be:
return *s1 ? (char *) s1 : NULL;
since strchrnul will never return NULL.
We can avoid the strchrnul() function call in strchr() by implementing it
in a separate header like str-two-way.h and including them in both files.
The same could be done with the strlen() part in strchr() since the
implementations look to be the same.
On 05/12/2023 15:52, James wrote:
> Currently, strchrnul() is implemented as strchr() and falls back to a
> strlen() to find the null-terminator if the character is not
> found, scanning the string twice. However, strchr() is going to scan the
> whole string anyway and discard the pointer to the null-terminator if
> the character is not found, returning NULL.
>
> Instead, we can just implement strchr() with discarding strchrnul()'s
> pointer to null-terminator returning NULL and by that avoid calling
> strlen() in strchrnul() if a character is not found.
>
> I made a typo in the strchr(), it should be:
> return s1 && *s1 ? (char *) s1 : NULL;
> which should be:
> return *s1 ? (char *) s1 : NULL;
> since strchrnul will never return NULL.
>
> We can avoid the strchrnul() function call in strchr() by implementing
> it in a separate header like str-two-way.h and including them in both
> files. The same could be done with the strlen() part in strchr() since
> the implementations look to be the same.
strchrnul is not standard, it's a GNU extension. So technically, it
should be possible to redefine strchrnul to something else and strchr
should still work as expected.
The other issue is that many ports implement strchr in assembly for
performance and rely on strchrnul calling that to avoid having too many
functions implemented in assembly.
So I'm not convinced this is a good idea.
R.
@@ -30,87 +30,10 @@ QUICKREF
#include <string.h>
#include <limits.h>
-/* Nonzero if X is not aligned on a "long" boundary. */
-#define UNALIGNED(X) ((long)X & (sizeof (long) - 1))
-
-/* How many bytes are loaded each iteration of the word copy loop. */
-#define LBLOCKSIZE (sizeof (long))
-
-#if LONG_MAX == 2147483647L
-#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
-#else
-#if LONG_MAX == 9223372036854775807L
-/* Nonzero if X (a long int) contains a NULL byte. */
-#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
-#else
-#error long int is not a 32bit or 64bit type.
-#endif
-#endif
-
-/* DETECTCHAR returns nonzero if (long)X contains the byte used
- to fill (long)MASK. */
-#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
-
char *
strchr (const char *s1,
int i)
{
- const unsigned char *s = (const unsigned char *)s1;
- unsigned char c = i;
-
-#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
- unsigned long mask,j;
- unsigned long *aligned_addr;
-
- /* Special case for finding 0. */
- if (!c)
- {
- while (UNALIGNED (s))
- {
- if (!*s)
- return (char *) s;
- s++;
- }
- /* Operate a word at a time. */
- aligned_addr = (unsigned long *) s;
- while (!DETECTNULL (*aligned_addr))
- aligned_addr++;
- /* Found the end of string. */
- s = (const unsigned char *) aligned_addr;
- while (*s)
- s++;
- return (char *) s;
- }
-
- /* All other bytes. Align the pointer, then search a long at a time. */
- while (UNALIGNED (s))
- {
- if (!*s)
- return NULL;
- if (*s == c)
- return (char *) s;
- s++;
- }
-
- mask = c;
- for (j = 8; j < LBLOCKSIZE * 8; j <<= 1)
- mask = (mask << j) | mask;
-
- aligned_addr = (unsigned long *) s;
- while (!DETECTNULL (*aligned_addr) && !DETECTCHAR (*aligned_addr, mask))
- aligned_addr++;
-
- /* The block of bytes currently pointed to by aligned_addr
- contains either a null or the target char, or both. We
- catch it using the bytewise search. */
-
- s = (unsigned char *) aligned_addr;
-
-#endif /* not PREFER_SIZE_OVER_SPEED */
-
- while (*s && *s != c)
- s++;
- if (*s == c)
- return (char *)s;
- return NULL;
+ s1 = strchrnul(s1, i);
+ return s1 && *s1 ? (char *) s1 : NULL;
}
@@ -29,12 +29,85 @@ QUICKREF
*/
#include <string.h>
+#include <limits.h>
+
+/* Nonzero if X is not aligned on a "long" boundary. */
+#define UNALIGNED(X) ((long)X & (sizeof (long) - 1))
+
+/* How many bytes are loaded each iteration of the word copy loop. */
+#define LBLOCKSIZE (sizeof (long))
+
+#if LONG_MAX == 2147483647L
+#define DETECTNULL(X) (((X) - 0x01010101) & ~(X) & 0x80808080)
+#else
+#if LONG_MAX == 9223372036854775807L
+/* Nonzero if X (a long int) contains a NULL byte. */
+#define DETECTNULL(X) (((X) - 0x0101010101010101) & ~(X) & 0x8080808080808080)
+#else
+#error long int is not a 32bit or 64bit type.
+#endif
+#endif
+
+/* DETECTCHAR returns nonzero if (long)X contains the byte used
+ to fill (long)MASK. */
+#define DETECTCHAR(X,MASK) (DETECTNULL(X ^ MASK))
char *
strchrnul (const char *s1,
int i)
{
- char *s = strchr(s1, i);
+ const unsigned char *s = (const unsigned char *)s1;
+ unsigned char c = i;
+
+#if !defined(PREFER_SIZE_OVER_SPEED) && !defined(__OPTIMIZE_SIZE__)
+ unsigned long mask,j;
+ unsigned long *aligned_addr;
+
+ /* Special case for finding 0. */
+ if (!c)
+ {
+ while (UNALIGNED (s))
+ {
+ if (!*s)
+ return (char *) s;
+ s++;
+ }
+ /* Operate a word at a time. */
+ aligned_addr = (unsigned long *) s;
+ while (!DETECTNULL (*aligned_addr))
+ aligned_addr++;
+ /* Found the end of string. */
+ s = (const unsigned char *) aligned_addr;
+ while (*s)
+ s++;
+ return (char *) s;
+ }
+
+ /* All other bytes. Align the pointer, then search a long at a time. */
+ while (UNALIGNED (s))
+ {
+ if (!*s || *s == c)
+ return (char *) s;
+ s++;
+ }
+
+ mask = c;
+ for (j = 8; j < LBLOCKSIZE * 8; j <<= 1)
+ mask = (mask << j) | mask;
+
+ aligned_addr = (unsigned long *) s;
+ while (!DETECTNULL (*aligned_addr) && !DETECTCHAR (*aligned_addr, mask))
+ aligned_addr++;
+
+ /* The block of bytes currently pointed to by aligned_addr
+ contains either a null or the target char, or both. We
+ catch it using the bytewise search. */
+
+ s = (unsigned char *) aligned_addr;
+
+#endif /* not PREFER_SIZE_OVER_SPEED */
- return s ? s : (char *)s1 + strlen(s1);
+ while (*s && *s != c)
+ s++;
+ return (char *) s;
}