strcasestr: try to find non-alpha char in NEEDLE
Checks
Context |
Check |
Description |
redhat-pt-bot/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
linaro-tcwg-bot/tcwg_glibc_build--master-arm |
success
|
Testing passed
|
redhat-pt-bot/TryBot-32bit |
success
|
Build for i686
|
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-arm |
success
|
Testing passed
|
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 |
success
|
Testing passed
|
Commit Message
---
string/strcasestr.c | 37 ++++++++++++++++++++++++++++++-------
1 file changed, 30 insertions(+), 7 deletions(-)
Comments
On 11/28/23 09:01, James Tirta Halim wrote:
> ---
James,
May you please clarify your copyright assignment status?
Please review "2.1. Copyright FSF or disclaimer" in the contribution checklist:
https://sourceware.org/glibc/wiki/Contribution%20checklist#Copyright_FSF_or_disclaimer
> string/strcasestr.c | 37 ++++++++++++++++++++++++++++++-------
> 1 file changed, 30 insertions(+), 7 deletions(-)
>
> diff --git a/string/strcasestr.c b/string/strcasestr.c
> index 2f6b4f8641..65eae2f047 100644
> --- a/string/strcasestr.c
> +++ b/string/strcasestr.c
> @@ -54,7 +54,6 @@
> #define STRCASESTR __strcasestr
> #endif
>
> -
> /* Find the first occurrence of NEEDLE in HAYSTACK, using
> case-insensitive comparison. This function gives unspecified
> results in multibyte locales. */
> @@ -63,18 +62,42 @@ STRCASESTR (const char *haystack, const char *needle)
> {
> size_t needle_len; /* Length of NEEDLE. */
> size_t haystack_len; /* Known minimum length of HAYSTACK. */
> + const char *h, *n;
>
> /* Handle empty NEEDLE special case. */
> if (needle[0] == '\0')
> return (char *) haystack;
>
> - /* Ensure HAYSTACK length is at least as long as NEEDLE length.
> - Since a match may occur early on in a huge HAYSTACK, use strnlen
> - and read ahead a few cachelines for improved performance. */
> - needle_len = strlen (needle);
> - haystack_len = __strnlen (haystack, needle_len + 256);
> - if (haystack_len < needle_len)
> + /* Try to find a non-alphanumeric character in NEEDLE to pass to
> + strchr() while checking if HAYSTACK is as long as NEEDLE. */
> + for (h = haystack, n = needle; *h && isalpha (*n); ++h, ++n);
> + if (__glibc_unlikely (*h == '\0'))
> return NULL;
> + if (*n) {
> + size_t shift;
> + shift = n - needle;
> + haystack = strchr (h + shift, *n);
> + if (__glibc_unlikely (haystack == NULL))
> + return NULL;
> + haystack -= shift;
> + /* Check if we have an early match. */
> + for (h = haystack, n = needle; TOLOWER (*h) == TOLOWER (*n) && *h; ++h, ++n);
> + if (*n == '\0')
> + return (char *)haystack;
> + if (__glibc_unlikely (*h == '\0'))
> + return NULL;
> + if ((size_t) (n - needle) > shift)
> + shift = n - needle;
> + /* Since a match may occur early on in a huge HAYSTACK, use strnlen
> + and read ahead a few cachelines for improved performance. */
> + needle_len = shift + strlen (needle + shift);
> + haystack_len = shift + __strnlen (h + shift, 256);
> + if (__glibc_unlikely (haystack_len < needle_len))
> + return NULL;
> + } else {
> + needle_len = n - needle;
> + haystack_len = needle_len + __strnlen (haystack + needle_len, 256);
> + }
>
> /* Perform the search. Abstract memory is considered to be an array
> of 'unsigned char' values, not an array of 'char' values. See
@@ -54,7 +54,6 @@
#define STRCASESTR __strcasestr
#endif
-
/* Find the first occurrence of NEEDLE in HAYSTACK, using
case-insensitive comparison. This function gives unspecified
results in multibyte locales. */
@@ -63,18 +62,42 @@ STRCASESTR (const char *haystack, const char *needle)
{
size_t needle_len; /* Length of NEEDLE. */
size_t haystack_len; /* Known minimum length of HAYSTACK. */
+ const char *h, *n;
/* Handle empty NEEDLE special case. */
if (needle[0] == '\0')
return (char *) haystack;
- /* Ensure HAYSTACK length is at least as long as NEEDLE length.
- Since a match may occur early on in a huge HAYSTACK, use strnlen
- and read ahead a few cachelines for improved performance. */
- needle_len = strlen (needle);
- haystack_len = __strnlen (haystack, needle_len + 256);
- if (haystack_len < needle_len)
+ /* Try to find a non-alphanumeric character in NEEDLE to pass to
+ strchr() while checking if HAYSTACK is as long as NEEDLE. */
+ for (h = haystack, n = needle; *h && isalpha (*n); ++h, ++n);
+ if (__glibc_unlikely (*h == '\0'))
return NULL;
+ if (*n) {
+ size_t shift;
+ shift = n - needle;
+ haystack = strchr (h + shift, *n);
+ if (__glibc_unlikely (haystack == NULL))
+ return NULL;
+ haystack -= shift;
+ /* Check if we have an early match. */
+ for (h = haystack, n = needle; TOLOWER (*h) == TOLOWER (*n) && *h; ++h, ++n);
+ if (*n == '\0')
+ return (char *)haystack;
+ if (__glibc_unlikely (*h == '\0'))
+ return NULL;
+ if ((size_t) (n - needle) > shift)
+ shift = n - needle;
+ /* Since a match may occur early on in a huge HAYSTACK, use strnlen
+ and read ahead a few cachelines for improved performance. */
+ needle_len = shift + strlen (needle + shift);
+ haystack_len = shift + __strnlen (h + shift, 256);
+ if (__glibc_unlikely (haystack_len < needle_len))
+ return NULL;
+ } else {
+ needle_len = n - needle;
+ haystack_len = needle_len + __strnlen (haystack + needle_len, 256);
+ }
/* Perform the search. Abstract memory is considered to be an array
of 'unsigned char' values, not an array of 'char' values. See