fnmatch: Use __mbstowcs_alloc [BZ #23519]
Commit Message
Sorry, I posted the wrong version of the patch. Try this one.
Comments
On 14/08/2018 12:53, Florian Weimer wrote:
> Sorry, I posted the wrong version of the patch. Try this one.
I think we should first address which should the desirable solution for
BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
strings or 2. fallback on single-bye matching for these invalid entries.
The second option is already on gnulib, so we can either first sync with
it work to get it sync back (with the __mbstowcs_alloc). It might not be
the best option though, as noted by Rich in comment #4 (it result false
positives).
In any case, I do think a better alternative is in fact avoid make fnmatch
to go temporary wchar_t string for MB_CUR_MAX != 1 (and avoid the mbsrtowcs
call altogether). I started to work on it and it should be doable, the
main time consuming think is to adapt the algorithms to access the
collation tables.
On 08/17/2018 02:36 PM, Adhemerval Zanella wrote:
>
>
> On 14/08/2018 12:53, Florian Weimer wrote:
>> Sorry, I posted the wrong version of the patch. Try this one.
>
> I think we should first address which should the desirable solution for
> BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
> strings or 2. fallback on single-bye matching for these invalid entries.
> The second option is already on gnulib, so we can either first sync with
> it work to get it sync back (with the __mbstowcs_alloc). It might not be
> the best option though, as noted by Rich in comment #4 (it result false
> positives).
I don't think my proposed cleanup will interfere with these fixes at all
(less code would have to be deleted), so I don't see the reason why to
block it.
Thanks,
Florian
On 17/08/2018 09:48, Florian Weimer wrote:
> On 08/17/2018 02:36 PM, Adhemerval Zanella wrote:
>>
>>
>> On 14/08/2018 12:53, Florian Weimer wrote:
>>> Sorry, I posted the wrong version of the patch. Try this one.
>>
>> I think we should first address which should the desirable solution for
>> BZ#14185: 1. reject as non-matching or give an error on invalid multibyte
>> strings or 2. fallback on single-bye matching for these invalid entries.
>> The second option is already on gnulib, so we can either first sync with
>> it work to get it sync back (with the __mbstowcs_alloc). It might not be
>> the best option though, as noted by Rich in comment #4 (it result false
>> positives).
>
> I don't think my proposed cleanup will interfere with these fixes at all (less code would have to be deleted), so I don't see the reason why to block it.
I am trying to avoid make it harder to sync back with gnulib, however if
the idea is indeed deviate from it (with a possible different solution
for BZ#14185) further cleanups might be possible on fnmatch implementation.
Subject: [PATCH] fnmatch: Use __mbstowcs_alloc [BZ #23519]
To: libc-alpha@sourceware.org
2018-08-14 Florian Weimer <fweimer@redhat.com>
[BZ #23519]
* posix/fnmatch.c [HANDLE_MULTIBYTE] (fnmatch): Use
__mbstowcs_alloc.
@@ -35,6 +35,7 @@
#endif
#ifdef _LIBC
+# include <array_length.h>
# include <alloca.h>
#else
# define alloca_account(size., var) alloca (size)
@@ -322,122 +323,28 @@ fnmatch (const char *pattern, const char *string, int flags)
# if HANDLE_MULTIBYTE
if (__builtin_expect (MB_CUR_MAX, 1) != 1)
{
- mbstate_t ps;
- size_t n;
- const char *p;
- wchar_t *wpattern_malloc = NULL;
- wchar_t *wpattern;
- wchar_t *wstring_malloc = NULL;
- wchar_t *wstring;
- size_t alloca_used = 0;
+ void *to_free1;
+ wchar_t wpattern_scratch[256];
+ wchar_t *wpattern = __mbstowcs_alloc
+ (pattern, wpattern_scratch, array_length (wpattern_scratch), &to_free1);
+ if (wpattern == NULL)
+ return -1;
- /* Convert the strings into wide characters. */
- memset (&ps, '\0', sizeof (ps));
- p = pattern;
-#ifdef _LIBC
- n = __strnlen (pattern, 1024);
-#else
- n = strlen (pattern);
-#endif
- if (__glibc_likely (n < 1024))
+ void *to_free2;
+ wchar_t wstring_scratch[256];
+ wchar_t *wstring = __mbstowcs_alloc
+ (string, wstring_scratch, array_length (wstring_scratch), &to_free2);
+ if (wstring == NULL)
{
- wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
- alloca_used);
- n = mbsrtowcs (wpattern, &p, n + 1, &ps);
- if (__glibc_unlikely (n == (size_t) -1))
- /* Something wrong.
- XXX Do we have to set `errno' to something which mbsrtows hasn't
- already done? */
- return -1;
- if (p)
- {
- memset (&ps, '\0', sizeof (ps));
- goto prepare_wpattern;
- }
+ free (to_free1);
+ return -1;
}
- else
- {
- prepare_wpattern:
- n = mbsrtowcs (NULL, &pattern, 0, &ps);
- if (__glibc_unlikely (n == (size_t) -1))
- /* Something wrong.
- XXX Do we have to set `errno' to something which mbsrtows hasn't
- already done? */
- return -1;
- if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
- {
- __set_errno (ENOMEM);
- return -2;
- }
- wpattern_malloc = wpattern
- = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
- assert (mbsinit (&ps));
- if (wpattern == NULL)
- return -2;
- (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps);
- }
-
- assert (mbsinit (&ps));
-#ifdef _LIBC
- n = __strnlen (string, 1024);
-#else
- n = strlen (string);
-#endif
- p = string;
- if (__glibc_likely (n < 1024))
- {
- wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t),
- alloca_used);
- n = mbsrtowcs (wstring, &p, n + 1, &ps);
- if (__glibc_unlikely (n == (size_t) -1))
- {
- /* Something wrong.
- XXX Do we have to set `errno' to something which
- mbsrtows hasn't already done? */
- free_return:
- free (wpattern_malloc);
- return -1;
- }
- if (p)
- {
- memset (&ps, '\0', sizeof (ps));
- goto prepare_wstring;
- }
- }
- else
- {
- prepare_wstring:
- n = mbsrtowcs (NULL, &string, 0, &ps);
- if (__glibc_unlikely (n == (size_t) -1))
- /* Something wrong.
- XXX Do we have to set `errno' to something which mbsrtows hasn't
- already done? */
- goto free_return;
- if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t)))
- {
- free (wpattern_malloc);
- __set_errno (ENOMEM);
- return -2;
- }
-
- wstring_malloc = wstring
- = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t));
- if (wstring == NULL)
- {
- free (wpattern_malloc);
- return -2;
- }
- assert (mbsinit (&ps));
- (void) mbsrtowcs (wstring, &string, n + 1, &ps);
- }
-
- int res = internal_fnwmatch (wpattern, wstring, wstring + n,
- flags & FNM_PERIOD, flags, NULL,
- alloca_used);
-
- free (wstring_malloc);
- free (wpattern_malloc);
+ int res = internal_fnwmatch (wpattern,
+ wstring, wstring + __wcslen (wstring),
+ flags & FNM_PERIOD, flags, NULL, 0);
+ free (to_free2);
+ free (to_free1);
return res;
}
# endif /* mbstate_t and mbsrtowcs or _LIBC. */