[1/2] posix: Falling back to non wide mode in case of encoding error [BZ #14185]

Message ID 20210202130804.1920933-1-adhemerval.zanella@linaro.org
State Committed
Commit a79328c745219dcb395070cdcd3be065a8347f24
Delegated to: Adhemerval Zanella Netto
Headers
Series [1/2] posix: Falling back to non wide mode in case of encoding error [BZ #14185] |

Commit Message

Adhemerval Zanella Feb. 2, 2021, 1:08 p.m. UTC
  Gnulib has added the proposed fix with aed23714d60 (done in 2005), but
recently with a glibc merge with 67306f6 (done in 2020 with sync back)
it has fallback to old semantic to return -1 on in case of failure.

From gnulib developer feedback it was an oversight.  Although the full
fix for BZ #14185 would require to rewrite fnmatch implementation to use
mbrtowc instead of mbsrtowcs on the full input, this mitigate the issue
and it has been used by gnulib for a long time.

This patch also removes the alloca usage on the string convertion to
wide characters before calling the internal function.

Checked on x86_64-linux-gnu.
---
 posix/fnmatch.c         | 160 ++++++++++++++--------------------------
 posix/tst-fnmatch.input |   2 +
 2 files changed, 58 insertions(+), 104 deletions(-)
  

Comments

Florian Weimer Feb. 22, 2021, 10:54 a.m. UTC | #1
* Adhemerval Zanella via Libc-alpha:

> +static int
> +fnmatch_convert_to_wide (const char *str, struct scratch_buffer *buf,
> +                         size_t *n)
> +{
> +  mbstate_t ps;
> +  memset (&ps, '\0', sizeof (ps));
> +
> +  size_t nw = buf->length / sizeof (wchar_t);
> +  *n = strnlen (str, nw - 1);
> +  if (__glibc_likely (*n < nw))
> +    {
> +      const char *p = str;
> +      *n = mbsrtowcs (buf->data, &p, *n + 1, &ps);
> +      if (__glibc_unlikely (*n == (size_t) -1))
> +        /* Something wrong.
> +           XXX Do we have to set 'errno' to something which mbsrtows hasn't
> +           already done?  */
> +        return -1;
> +      if (p == NULL)
> +        return 0;
> +      memset (&ps, '\0', sizeof (ps));
> +    }
> +
> +  *n = mbsrtowcs (NULL, &str, 0, &ps);
> +  if (__glibc_unlikely (*n == (size_t) -1))
> +    return -1;
> +  if (!scratch_buffer_set_array_size (buf, *n + 1, sizeof (wchar_t)))
> +    {
> +      __set_errno (ENOMEM);
> +      return -2;
> +    }
> +  assert (mbsinit (&ps));
> +  mbsrtowcs (buf->data, &str, *n + 1, &ps);
> +  return 0;
> +}
>

This, along with

> +      if (r == -2 || r == 0)
> +        return r;

below causes fnmatch to return the undocumented -2 error value, I think.
Shouldn't we keep failing with -1 on error?

Thanks,
Florian
  
Florian Weimer Feb. 22, 2021, 10:56 a.m. UTC | #2
* Florian Weimer via Libc-alpha:

> * Adhemerval Zanella via Libc-alpha:
>
>> +static int
>> +fnmatch_convert_to_wide (const char *str, struct scratch_buffer *buf,
>> +                         size_t *n)
>> +{
>> +  mbstate_t ps;
>> +  memset (&ps, '\0', sizeof (ps));
>> +
>> +  size_t nw = buf->length / sizeof (wchar_t);
>> +  *n = strnlen (str, nw - 1);
>> +  if (__glibc_likely (*n < nw))
>> +    {
>> +      const char *p = str;
>> +      *n = mbsrtowcs (buf->data, &p, *n + 1, &ps);
>> +      if (__glibc_unlikely (*n == (size_t) -1))
>> +        /* Something wrong.
>> +           XXX Do we have to set 'errno' to something which mbsrtows hasn't
>> +           already done?  */
>> +        return -1;
>> +      if (p == NULL)
>> +        return 0;
>> +      memset (&ps, '\0', sizeof (ps));
>> +    }
>> +
>> +  *n = mbsrtowcs (NULL, &str, 0, &ps);
>> +  if (__glibc_unlikely (*n == (size_t) -1))
>> +    return -1;
>> +  if (!scratch_buffer_set_array_size (buf, *n + 1, sizeof (wchar_t)))
>> +    {
>> +      __set_errno (ENOMEM);
>> +      return -2;
>> +    }
>> +  assert (mbsinit (&ps));
>> +  mbsrtowcs (buf->data, &str, *n + 1, &ps);
>> +  return 0;
>> +}
>>
>
> This, along with
>
>> +      if (r == -2 || r == 0)
>> +        return r;
>
> below causes fnmatch to return the undocumented -2 error value, I think.
> Shouldn't we keep failing with -1 on error?

I see that this is what the existing code does.  So the patch looks okay
to me after all.

Thanks,
Florian
  

Patch

diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index b8a71f164d..a66c9196c7 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -75,6 +75,7 @@  extern int fnmatch (const char *pattern, const char *string, int flags);
 
 #include <intprops.h>
 #include <flexmember.h>
+#include <scratch_buffer.h>
 
 #ifdef _LIBC
 typedef ptrdiff_t idx_t;
@@ -231,121 +232,72 @@  is_char_class (const wchar_t *wcs)
 
 #include "fnmatch_loop.c"
 
+static int
+fnmatch_convert_to_wide (const char *str, struct scratch_buffer *buf,
+                         size_t *n)
+{
+  mbstate_t ps;
+  memset (&ps, '\0', sizeof (ps));
+
+  size_t nw = buf->length / sizeof (wchar_t);
+  *n = strnlen (str, nw - 1);
+  if (__glibc_likely (*n < nw))
+    {
+      const char *p = str;
+      *n = mbsrtowcs (buf->data, &p, *n + 1, &ps);
+      if (__glibc_unlikely (*n == (size_t) -1))
+        /* Something wrong.
+           XXX Do we have to set 'errno' to something which mbsrtows hasn't
+           already done?  */
+        return -1;
+      if (p == NULL)
+        return 0;
+      memset (&ps, '\0', sizeof (ps));
+    }
+
+  *n = mbsrtowcs (NULL, &str, 0, &ps);
+  if (__glibc_unlikely (*n == (size_t) -1))
+    return -1;
+  if (!scratch_buffer_set_array_size (buf, *n + 1, sizeof (wchar_t)))
+    {
+      __set_errno (ENOMEM);
+      return -2;
+    }
+  assert (mbsinit (&ps));
+  mbsrtowcs (buf->data, &str, *n + 1, &ps);
+  return 0;
+}
 
 int
 fnmatch (const char *pattern, const char *string, int flags)
 {
   if (__glibc_unlikely (MB_CUR_MAX != 1))
     {
-      mbstate_t ps;
       size_t n;
-      const char *p;
-      wchar_t *wpattern_malloc = NULL;
-      wchar_t *wpattern;
-      wchar_t *wstring_malloc = NULL;
-      wchar_t *wstring;
-      size_t alloca_used = 0;
-
-      /* Convert the strings into wide characters.  */
-      memset (&ps, '\0', sizeof (ps));
-      p = pattern;
-      n = strnlen (pattern, 1024);
-      if (__glibc_likely (n < 1024))
+      struct scratch_buffer wpattern;
+      scratch_buffer_init (&wpattern);
+      struct scratch_buffer wstring;
+      scratch_buffer_init (&wstring);
+      int r;
+
+      /* Convert the strings into wide characters.  Any conversion issue
+         fallback to the ascii version.  */
+      r = fnmatch_convert_to_wide (pattern, &wpattern, &n);
+      if (r == 0)
         {
-          wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-                                                 alloca_used);
-          n = mbsrtowcs (wpattern, &p, n + 1, &ps);
-          if (__glibc_unlikely (n == (size_t) -1))
-            /* Something wrong.
-               XXX Do we have to set 'errno' to something which mbsrtows hasn't
-               already done?  */
-            return -1;
-          if (p)
-            {
-              memset (&ps, '\0', sizeof (ps));
-              goto prepare_wpattern;
-            }
-        }
-      else
-        {
-        prepare_wpattern:
-          n = mbsrtowcs (NULL, &pattern, 0, &ps);
-          if (__glibc_unlikely (n == (size_t) -1))
-            /* Something wrong.
-               XXX Do we have to set 'errno' to something which mbsrtows hasn't
-               already done?  */
-            return -1;
-          if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-            {
-              __set_errno (ENOMEM);
-              return -2;
-            }
-          wpattern_malloc = wpattern
-            = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-          assert (mbsinit (&ps));
-          if (wpattern == NULL)
-            return -2;
-          (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps);
-        }
-
-      assert (mbsinit (&ps));
-      n = strnlen (string, 1024);
-      p = string;
-      if (__glibc_likely (n < 1024))
-        {
-          wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-                                                alloca_used);
-          n = mbsrtowcs (wstring, &p, n + 1, &ps);
-          if (__glibc_unlikely (n == (size_t) -1))
-            {
-              /* Something wrong.
-                 XXX Do we have to set 'errno' to something which
-                 mbsrtows hasn't already done?  */
-            free_return:
-              free (wpattern_malloc);
-              return -1;
-            }
-          if (p)
-            {
-              memset (&ps, '\0', sizeof (ps));
-              goto prepare_wstring;
-            }
-        }
-      else
-        {
-        prepare_wstring:
-          n = mbsrtowcs (NULL, &string, 0, &ps);
-          if (__glibc_unlikely (n == (size_t) -1))
-            /* Something wrong.
-               XXX Do we have to set 'errno' to something which mbsrtows hasn't
-               already done?  */
-            goto free_return;
-          if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-            {
-              free (wpattern_malloc);
-              __set_errno (ENOMEM);
-              return -2;
-            }
-
-          wstring_malloc = wstring
-            = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-          if (wstring == NULL)
-            {
-              free (wpattern_malloc);
-              return -2;
-            }
-          assert (mbsinit (&ps));
-          (void) mbsrtowcs (wstring, &string, n + 1, &ps);
-        }
-
-      int res = internal_fnwmatch (wpattern, wstring, wstring + n,
+          r = fnmatch_convert_to_wide (string, &wstring, &n);
+          if (r == 0)
+            r = internal_fnwmatch (wpattern.data, wstring.data,
+                                   (wchar_t *) wstring.data + n,
                                    flags & FNM_PERIOD, flags, NULL,
-                                   alloca_used);
+                                   false);
+        }
 
-      free (wstring_malloc);
-      free (wpattern_malloc);
+      scratch_buffer_free (&wstring);
+      scratch_buffer_free (&wpattern);
 
-      return res;
+      if (r == -2 || r == 0)
+        return r;
     }
 
   return internal_fnmatch (pattern, string, string + strlen (string),
diff --git a/posix/tst-fnmatch.input b/posix/tst-fnmatch.input
index 4fef4ee829..67aac5aada 100644
--- a/posix/tst-fnmatch.input
+++ b/posix/tst-fnmatch.input
@@ -676,6 +676,8 @@  C		 "x"			"*"		       0       PATHNAME|LEADING_DIR
 C		 "x/y"			"*"		       0       PATHNAME|LEADING_DIR
 C		 "x/y/z"		"*"		       0       PATHNAME|LEADING_DIR
 C		 "x"			"*x"		       0       PATHNAME|LEADING_DIR
+
+en_US.UTF-8	 "\366.csv"		"*.csv"                0
 C		 "x/y"			"*x"		       0       PATHNAME|LEADING_DIR
 C		 "x/y/z"		"*x"		       0       PATHNAME|LEADING_DIR
 C		 "x"			"x*"		       0       PATHNAME|LEADING_DIR