strcasestr: try to find non-alpha char in NEEDLE

Message ID 20231128140137.81955-1-tirtajames45@gmail.com
State Superseded
Headers
Series strcasestr: try to find non-alpha char in NEEDLE |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Testing passed
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Testing passed

Commit Message

James Tirta Halim Nov. 28, 2023, 2:01 p.m. UTC
  ---
 string/strcasestr.c | 37 ++++++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)
  

Comments

Carlos O'Donell Dec. 4, 2023, 2:44 p.m. UTC | #1
On 11/28/23 09:01, James Tirta Halim wrote:
> ---

James,

May you please clarify your copyright assignment status?

Please review "2.1. Copyright FSF or disclaimer" in the contribution checklist:
https://sourceware.org/glibc/wiki/Contribution%20checklist#Copyright_FSF_or_disclaimer


>  string/strcasestr.c | 37 ++++++++++++++++++++++++++++++-------
>  1 file changed, 30 insertions(+), 7 deletions(-)
> 
> diff --git a/string/strcasestr.c b/string/strcasestr.c
> index 2f6b4f8641..65eae2f047 100644
> --- a/string/strcasestr.c
> +++ b/string/strcasestr.c
> @@ -54,7 +54,6 @@
>  #define STRCASESTR __strcasestr
>  #endif
>  
> -
>  /* Find the first occurrence of NEEDLE in HAYSTACK, using
>     case-insensitive comparison.  This function gives unspecified
>     results in multibyte locales.  */
> @@ -63,18 +62,42 @@ STRCASESTR (const char *haystack, const char *needle)
>  {
>    size_t needle_len; /* Length of NEEDLE.  */
>    size_t haystack_len; /* Known minimum length of HAYSTACK.  */
> +  const char *h, *n;
>  
>    /* Handle empty NEEDLE special case.  */
>    if (needle[0] == '\0')
>      return (char *) haystack;
>  
> -  /* Ensure HAYSTACK length is at least as long as NEEDLE length.
> -     Since a match may occur early on in a huge HAYSTACK, use strnlen
> -     and read ahead a few cachelines for improved performance.  */
> -  needle_len = strlen (needle);
> -  haystack_len = __strnlen (haystack, needle_len + 256);
> -  if (haystack_len < needle_len)
> +  /* Try to find a non-alphanumeric character in NEEDLE to pass to
> +     strchr() while checking if HAYSTACK is as long as NEEDLE.  */
> +  for (h = haystack, n = needle; *h && isalpha (*n); ++h, ++n);
> +  if (__glibc_unlikely (*h == '\0'))
>      return NULL;
> +  if (*n) {
> +    size_t shift;
> +    shift = n - needle;
> +    haystack = strchr (h + shift, *n);
> +    if (__glibc_unlikely (haystack == NULL))
> +      return NULL;
> +    haystack -= shift;
> +    /* Check if we have an early match. */
> +    for (h = haystack, n = needle; TOLOWER (*h) == TOLOWER (*n) && *h; ++h, ++n);
> +    if (*n == '\0')
> +      return (char *)haystack;
> +    if (__glibc_unlikely (*h == '\0'))
> +      return NULL;
> +    if ((size_t) (n - needle) > shift)
> +      shift = n - needle;
> +  /* Since a match may occur early on in a huge HAYSTACK, use strnlen
> +     and read ahead a few cachelines for improved performance.  */
> +    needle_len = shift + strlen (needle + shift);
> +    haystack_len = shift + __strnlen (h + shift, 256);
> +    if (__glibc_unlikely (haystack_len < needle_len))
> +      return NULL;
> +  } else {
> +    needle_len = n - needle;
> +    haystack_len = needle_len + __strnlen (haystack + needle_len, 256);
> +  }
>  
>    /* Perform the search.  Abstract memory is considered to be an array
>       of 'unsigned char' values, not an array of 'char' values.  See
  

Patch

diff --git a/string/strcasestr.c b/string/strcasestr.c
index 2f6b4f8641..65eae2f047 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -54,7 +54,6 @@ 
 #define STRCASESTR __strcasestr
 #endif
 
-
 /* Find the first occurrence of NEEDLE in HAYSTACK, using
    case-insensitive comparison.  This function gives unspecified
    results in multibyte locales.  */
@@ -63,18 +62,42 @@  STRCASESTR (const char *haystack, const char *needle)
 {
   size_t needle_len; /* Length of NEEDLE.  */
   size_t haystack_len; /* Known minimum length of HAYSTACK.  */
+  const char *h, *n;
 
   /* Handle empty NEEDLE special case.  */
   if (needle[0] == '\0')
     return (char *) haystack;
 
-  /* Ensure HAYSTACK length is at least as long as NEEDLE length.
-     Since a match may occur early on in a huge HAYSTACK, use strnlen
-     and read ahead a few cachelines for improved performance.  */
-  needle_len = strlen (needle);
-  haystack_len = __strnlen (haystack, needle_len + 256);
-  if (haystack_len < needle_len)
+  /* Try to find a non-alphanumeric character in NEEDLE to pass to
+     strchr() while checking if HAYSTACK is as long as NEEDLE.  */
+  for (h = haystack, n = needle; *h && isalpha (*n); ++h, ++n);
+  if (__glibc_unlikely (*h == '\0'))
     return NULL;
+  if (*n) {
+    size_t shift;
+    shift = n - needle;
+    haystack = strchr (h + shift, *n);
+    if (__glibc_unlikely (haystack == NULL))
+      return NULL;
+    haystack -= shift;
+    /* Check if we have an early match. */
+    for (h = haystack, n = needle; TOLOWER (*h) == TOLOWER (*n) && *h; ++h, ++n);
+    if (*n == '\0')
+      return (char *)haystack;
+    if (__glibc_unlikely (*h == '\0'))
+      return NULL;
+    if ((size_t) (n - needle) > shift)
+      shift = n - needle;
+  /* Since a match may occur early on in a huge HAYSTACK, use strnlen
+     and read ahead a few cachelines for improved performance.  */
+    needle_len = shift + strlen (needle + shift);
+    haystack_len = shift + __strnlen (h + shift, 256);
+    if (__glibc_unlikely (haystack_len < needle_len))
+      return NULL;
+  } else {
+    needle_len = n - needle;
+    haystack_len = needle_len + __strnlen (haystack + needle_len, 256);
+  }
 
   /* Perform the search.  Abstract memory is considered to be an array
      of 'unsigned char' values, not an array of 'char' values.  See