fnmatch: Use __mbstowcs_alloc [BZ #23519]

Message ID 42836912-fedd-3a3a-97b9-6db10ca39519@redhat.com
State New, archived
Headers

Commit Message

Florian Weimer Aug. 14, 2018, 3:53 p.m. UTC
  Sorry, I posted the wrong version of the patch.  Try this one.
  

Comments

Adhemerval Zanella Aug. 17, 2018, 12:36 p.m. UTC | #1
On 14/08/2018 12:53, Florian Weimer wrote:
> Sorry, I posted the wrong version of the patch.  Try this one.

I think we should first address which should the desirable solution for
BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
strings or 2. fallback on single-bye matching for these invalid entries.
The second option is already on gnulib, so we can either first sync with 
it work to get it sync back (with the __mbstowcs_alloc). It might not be
the best option though, as noted by Rich in comment #4 (it result false
positives).

In any case, I do think a better alternative is in fact avoid make fnmatch
to go temporary wchar_t string for MB_CUR_MAX != 1 (and avoid the mbsrtowcs
call altogether). I started to work on it and it should be doable, the
main time consuming think is to adapt the algorithms to access the
collation tables.
  
Florian Weimer Aug. 17, 2018, 12:48 p.m. UTC | #2
On 08/17/2018 02:36 PM, Adhemerval Zanella wrote:
> 
> 
> On 14/08/2018 12:53, Florian Weimer wrote:
>> Sorry, I posted the wrong version of the patch.  Try this one.
> 
> I think we should first address which should the desirable solution for
> BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
> strings or 2. fallback on single-bye matching for these invalid entries.
> The second option is already on gnulib, so we can either first sync with
> it work to get it sync back (with the __mbstowcs_alloc). It might not be
> the best option though, as noted by Rich in comment #4 (it result false
> positives).

I don't think my proposed cleanup will interfere with these fixes at all 
(less code would have to be deleted), so I don't see the reason why to 
block it.

Thanks,
Florian
  
Adhemerval Zanella Aug. 17, 2018, 12:52 p.m. UTC | #3
On 17/08/2018 09:48, Florian Weimer wrote:
> On 08/17/2018 02:36 PM, Adhemerval Zanella wrote:
>>
>>
>> On 14/08/2018 12:53, Florian Weimer wrote:
>>> Sorry, I posted the wrong version of the patch.  Try this one.
>>
>> I think we should first address which should the desirable solution for
>> BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
>> strings or 2. fallback on single-bye matching for these invalid entries.
>> The second option is already on gnulib, so we can either first sync with
>> it work to get it sync back (with the __mbstowcs_alloc). It might not be
>> the best option though, as noted by Rich in comment #4 (it result false
>> positives).
> 
> I don't think my proposed cleanup will interfere with these fixes at all (less code would have to be deleted), so I don't see the reason why to block it.

I am trying to avoid make it harder to sync back with gnulib, however if
the idea is indeed deviate from it (with a possible different solution
for BZ#14185) further cleanups might be possible on fnmatch implementation.
  

Patch

Subject: [PATCH] fnmatch: Use __mbstowcs_alloc [BZ #23519]
To: libc-alpha@sourceware.org

2018-08-14  Florian Weimer  <fweimer@redhat.com>

	[BZ #23519]
	* posix/fnmatch.c [HANDLE_MULTIBYTE] (fnmatch): Use
	__mbstowcs_alloc.

diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index a9b762624f..1ce6c88d6a 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -35,6 +35,7 @@ 
 #endif
 
 #ifdef _LIBC
+# include <array_length.h>
 # include <alloca.h>
 #else
 # define alloca_account(size., var) alloca (size)
@@ -322,122 +323,28 @@  fnmatch (const char *pattern, const char *string, int flags)
 # if HANDLE_MULTIBYTE
   if (__builtin_expect (MB_CUR_MAX, 1) != 1)
     {
-      mbstate_t ps;
-      size_t n;
-      const char *p;
-      wchar_t *wpattern_malloc = NULL;
-      wchar_t *wpattern;
-      wchar_t *wstring_malloc = NULL;
-      wchar_t *wstring;
-      size_t alloca_used = 0;
+      void *to_free1;
+      wchar_t wpattern_scratch[256];
+      wchar_t *wpattern = __mbstowcs_alloc
+	(pattern, wpattern_scratch, array_length (wpattern_scratch), &to_free1);
+      if (wpattern == NULL)
+	return -1;
 
-      /* Convert the strings into wide characters.  */
-      memset (&ps, '\0', sizeof (ps));
-      p = pattern;
-#ifdef _LIBC
-      n = __strnlen (pattern, 1024);
-#else
-      n = strlen (pattern);
-#endif
-      if (__glibc_likely (n < 1024))
+      void *to_free2;
+      wchar_t wstring_scratch[256];
+      wchar_t *wstring = __mbstowcs_alloc
+	(string, wstring_scratch, array_length (wstring_scratch), &to_free2);
+      if (wstring == NULL)
 	{
-	  wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-						 alloca_used);
-	  n = mbsrtowcs (wpattern, &p, n + 1, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    /* Something wrong.
-	       XXX Do we have to set `errno' to something which mbsrtows hasn't
-	       already done?  */
-	    return -1;
-	  if (p)
-	    {
-	      memset (&ps, '\0', sizeof (ps));
-	      goto prepare_wpattern;
-	    }
+	  free (to_free1);
+	  return -1;
 	}
-      else
-	{
-	prepare_wpattern:
-	  n = mbsrtowcs (NULL, &pattern, 0, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    /* Something wrong.
-	       XXX Do we have to set `errno' to something which mbsrtows hasn't
-	       already done?  */
-	    return -1;
-	  if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-	    {
-	      __set_errno (ENOMEM);
-	      return -2;
-	    }
-	  wpattern_malloc = wpattern
-	    = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-	  assert (mbsinit (&ps));
-	  if (wpattern == NULL)
-	    return -2;
-	  (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps);
-	}
-
-      assert (mbsinit (&ps));
-#ifdef _LIBC
-      n = __strnlen (string, 1024);
-#else
-      n = strlen (string);
-#endif
-      p = string;
-      if (__glibc_likely (n < 1024))
-	{
-	  wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
-						alloca_used);
-	  n = mbsrtowcs (wstring, &p, n + 1, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    {
-	      /* Something wrong.
-		 XXX Do we have to set `errno' to something which
-		 mbsrtows hasn't already done?  */
-	    free_return:
-	      free (wpattern_malloc);
-	      return -1;
-	    }
-	  if (p)
-	    {
-	      memset (&ps, '\0', sizeof (ps));
-	      goto prepare_wstring;
-	    }
-	}
-      else
-	{
-	prepare_wstring:
-	  n = mbsrtowcs (NULL, &string, 0, &ps);
-	  if (__glibc_unlikely (n == (size_t) -1))
-	    /* Something wrong.
-	       XXX Do we have to set `errno' to something which mbsrtows hasn't
-	       already done?  */
-	    goto free_return;
-	  if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
-	    {
-	      free (wpattern_malloc);
-	      __set_errno (ENOMEM);
-	      return -2;
-	    }
-
-	  wstring_malloc = wstring
-	    = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
-	  if (wstring == NULL)
-	    {
-	      free (wpattern_malloc);
-	      return -2;
-	    }
-	  assert (mbsinit (&ps));
-	  (void) mbsrtowcs (wstring, &string, n + 1, &ps);
-	}
-
-      int res = internal_fnwmatch (wpattern, wstring, wstring + n,
-				   flags & FNM_PERIOD, flags, NULL,
-				   alloca_used);
-
-      free (wstring_malloc);
-      free (wpattern_malloc);
 
+      int res = internal_fnwmatch (wpattern,
+				   wstring, wstring + __wcslen (wstring),
+				   flags & FNM_PERIOD, flags, NULL, 0);
+      free (to_free2);
+      free (to_free1);
       return res;
     }
 # endif  /* mbstate_t and mbsrtowcs or _LIBC.  */