From patchwork Wed Jan 9 14:31:50 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andreas Schwab X-Patchwork-Id: 31016 Received: (qmail 113201 invoked by alias); 9 Jan 2019 14:32:04 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 112791 invoked by uid 89); 9 Jan 2019 14:32:01 -0000 Authentication-Results: sourceware.org; auth=none X-Spam-SWARE-Status: No, score=-25.4 required=5.0 tests=BAYES_00, GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3, KAM_SHORT, KAM_STOCKGEN, SPF_PASS autolearn=ham version=3.3.2 spammy= X-HELO: mx1.suse.de From: Andreas Schwab To: Adhemerval Zanella Cc: libc-alpha@sourceware.org Subject: Re: [PATCH] Fix handling of collating elements in fnmatch (bug 17396, bug 16976) References: <87zhscfq0h.fsf@oldenburg2.str.redhat.com> X-Yow: ..Everything is....FLIPPING AROUND!! Date: Wed, 09 Jan 2019 15:31:50 +0100 In-Reply-To: (Adhemerval Zanella's message of "Wed, 9 Jan 2019 11:35:18 -0200") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/26.1.91 (gnu/linux) MIME-Version: 1.0 On Jan 09 2019, Adhemerval Zanella wrote: > Maybe TEST_COMPARE on both check? Rather TEST_VERIFY(_EXIT). Andreas. [BZ #16976] [BZ #17396] * posix/fnmatch_loop.c (internal_fnmatch, internal_fnwmatch): When looking up collating elements match against (wide) character sequence instead of name. Correct alignment adjustment. * posix/fnmatch.c: Don't include "../locale/elem-hash.h". (WMEMCMP) [HANDLE_MULTIBYTE]: Define. * posix/Makefile (tests): Add tst-fnmatch4 and tst-fnmatch5. (LOCALES): Add cs_CZ.ISO-8859-2. * posix/tst-fnmatch4.c: New file. * posix/tst-fnmatch5.c: New file. * include/wchar.h (__wmemcmp): Declare. * wcsmbs/wmemcmp.c: Define __wmemcmp and add wmemcmp as weak alias. * sysdeps/i386/i686/multiarch/wmemcmp.c: Likewise. * sysdeps/x86_64/multiarch/wmemcmp.c: Likewise. * sysdeps/s390/wmemcmp.c: Likewise. --- include/wchar.h | 2 + posix/Makefile | 4 +- posix/fnmatch.c | 6 +- posix/fnmatch_loop.c | 228 +++++++----------- .../wmemcmp.c => posix/tst-fnmatch4.c | 35 ++- .../s390/wmemcmp.c => posix/tst-fnmatch5.c | 44 ++-- sysdeps/i386/i686/multiarch/wmemcmp.c | 3 +- sysdeps/s390/wmemcmp.c | 7 +- sysdeps/x86_64/multiarch/wmemcmp.c | 3 +- wcsmbs/wmemcmp.c | 9 +- 10 files changed, 160 insertions(+), 181 deletions(-) copy sysdeps/i386/i686/multiarch/wmemcmp.c => posix/tst-fnmatch4.c (59%) copy sysdeps/s390/wmemcmp.c => posix/tst-fnmatch5.c (53%) diff --git a/include/wchar.h b/include/wchar.h index 86506d28e9..614073bcb3 100644 --- a/include/wchar.h +++ b/include/wchar.h @@ -143,6 +143,8 @@ libc_hidden_proto (wmemchr) libc_hidden_proto (__wmemchr) libc_hidden_proto (wmemset) libc_hidden_proto (__wmemset) +extern int __wmemcmp (const wchar_t *__s1, const wchar_t *__s2, size_t __n) + __THROW __attribute_pure__; /* Now define the internal interfaces. */ extern int __wcscasecmp (const wchar_t *__s1, const wchar_t *__s2) diff --git a/posix/Makefile b/posix/Makefile index cfd914ff21..873947f72e 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -93,6 +93,7 @@ tests := test-errno tstgetopt testfnm runtests runptests \ bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \ tst-pathconf tst-rxspencer-no-utf8 \ tst-fnmatch3 bug-regex36 \ + tst-fnmatch4 tst-fnmatch5 \ tst-posix_spawn-fd tst-posix_spawn-setsid \ tst-posix_fadvise tst-posix_fadvise64 \ tst-sysconf-empty-chroot tst-glob_symlinks tst-fexecve \ @@ -168,7 +169,8 @@ $(objpfx)wordexp-tst.out: wordexp-tst.sh $(objpfx)wordexp-test endif LOCALES := cs_CZ.UTF-8 da_DK.ISO-8859-1 de_DE.ISO-8859-1 de_DE.UTF-8 \ - en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 + en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 \ + cs_CZ.ISO-8859-2 include ../gen-locales.mk $(objpfx)bug-regex1.out: $(gen-locales) diff --git a/posix/fnmatch.c b/posix/fnmatch.c index 7b225cf2ba..a58e1743ce 100644 --- a/posix/fnmatch.c +++ b/posix/fnmatch.c @@ -53,7 +53,6 @@ we support a correct implementation only in glibc. */ #ifdef _LIBC # include "../locale/localeinfo.h" -# include "../locale/elem-hash.h" # include "../locale/coll-lookup.h" # include @@ -237,6 +236,11 @@ __wcschrnul (const wchar_t *s, wint_t c) # define MEMPCPY(D, S, N) __wmempcpy (D, S, N) # define MEMCHR(S, C, N) __wmemchr (S, C, N) # define STRCOLL(S1, S2) wcscoll (S1, S2) +# ifdef _LIBC +# define WMEMCMP(S1, S2, N) __wmemcmp (S1, S2, N) +# else +# define WMEMCMP(S1, S2, N) wmemcmp (S1, S2, N) +# endif # define WIDE_CHAR_VERSION 1 /* Change the name the header defines so it doesn't conflict with the version included above. */ diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c index f888c66dfb..fa39b21395 100644 --- a/posix/fnmatch_loop.c +++ b/posix/fnmatch_loop.c @@ -494,26 +494,12 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, { int32_t table_size; const int32_t *symb_table; -# if WIDE_CHAR_VERSION - char str[c1]; - unsigned int strcnt; -# else -# define str (startp + 1) -# endif const unsigned char *extra; int32_t idx; int32_t elem; - int32_t second; - int32_t hash; - # if WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; -#endif + CHAR *wextra; +# endif table_size = _NL_CURRENT_WORD (LC_COLLATE, @@ -525,71 +511,54 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - /* Locate the character in the hashing table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem - + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } + for (elem = 0; elem < table_size; elem++) + if (symb_table[2 * elem] != 0) + { + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating element. */ + idx += 1 + extra[idx]; +# if WIDE_CHAR_VERSION + /* Skip the byte sequence of the + collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + + wextra = (CHAR *) &extra[idx + 4]; + + if (/* Compare the length of the sequence. */ + c1 == wextra[0] + /* Compare the wide char sequence. */ + && WMEMCMP (startp + 1, &wextra[1], + c1) == 0) + /* Yep, this is the entry. */ + break; +# else + if (/* Compare the length of the sequence. */ + c1 == extra[idx] + /* Compare the byte sequence. */ + && memcmp (startp + 1, + &extra[idx + 1], c1) == 0) + /* Yep, this is the entry. */ + break; +# endif + } - if (symb_table[2 * elem] != 0) + if (elem < table_size) { /* Compare the byte sequence but only if this is not part of a range. */ -# if WIDE_CHAR_VERSION - int32_t *wextra; + if (! is_range - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - - wextra = (int32_t *) &extra[idx + 4]; -# endif - - if (! is_range) - { # if WIDE_CHAR_VERSION - for (c1 = 0; - (int32_t) c1 < wextra[idx]; - ++c1) - if (n[c1] != wextra[1 + c1]) - break; - - if ((int32_t) c1 == wextra[idx]) - goto matched; + && WMEMCMP (n, &wextra[1], c1) == 0 # else - for (c1 = 0; c1 < extra[idx]; ++c1) - if (n[c1] != extra[1 + c1]) - break; - - if (c1 == extra[idx]) - goto matched; + && memcmp (n, &extra[idx + 1], c1) == 0 # endif + ) + { + n += c1 - 1; + goto matched; } /* Get the collation sequence value. */ @@ -597,9 +566,9 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, # if WIDE_CHAR_VERSION cold = wextra[1 + wextra[idx]]; # else - /* Adjust for the alignment. */ idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; cold = *((int32_t *) &extra[idx]); # endif @@ -609,10 +578,10 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, { /* No valid character. Match it as a single byte. */ - if (!is_range && *n == str[0]) + if (!is_range && *n == startp[1]) goto matched; - cold = str[0]; + cold = startp[1]; c = *p++; } else @@ -620,7 +589,6 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, } } else -# undef str #endif { c = FOLD (c); @@ -712,25 +680,11 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, { int32_t table_size; const int32_t *symb_table; -# if WIDE_CHAR_VERSION - char str[c1]; - unsigned int strcnt; -# else -# define str (startp + 1) -# endif const unsigned char *extra; int32_t idx; int32_t elem; - int32_t second; - int32_t hash; - # if WIDE_CHAR_VERSION - /* We have to convert the name to a single-byte - string. This is possible since the names - consist of ASCII characters and the internal - representation is UCS4. */ - for (strcnt = 0; strcnt < c1; ++strcnt) - str[strcnt] = startp[1 + strcnt]; + CHAR *wextra; # endif table_size = @@ -743,71 +697,63 @@ FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - /* Locate the character in the hashing - table. */ - hash = elem_hash (str, c1); - - idx = 0; - elem = hash % table_size; - if (symb_table[2 * elem] != 0) - { - second = hash % (table_size - 2) + 1; - - do - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - && (c1 - == extra[symb_table[2 * elem + 1]]) - && memcmp (str, - &extra[symb_table[2 * elem + 1] - + 1], c1) == 0) - { - /* Yep, this is the entry. */ - idx = symb_table[2 * elem + 1]; - idx += 1 + extra[idx]; - break; - } - - /* Next entry. */ - elem += second; - } - while (symb_table[2 * elem] != 0); - } - - if (symb_table[2 * elem] != 0) - { - /* Compare the byte sequence but only if - this is not part of a range. */ + for (elem = 0; elem < table_size; elem++) + if (symb_table[2 * elem] != 0) + { + idx = symb_table[2 * elem + 1]; + /* Skip the name of collating + element. */ + idx += 1 + extra[idx]; # if WIDE_CHAR_VERSION - int32_t *wextra; - - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~4; - - wextra = (int32_t *) &extra[idx + 4]; + /* Skip the byte sequence of the + collating element. */ + idx += 1 + extra[idx]; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; + + wextra = (CHAR *) &extra[idx + 4]; + + if (/* Compare the length of the + sequence. */ + c1 == wextra[0] + /* Compare the wide char sequence. */ + && WMEMCMP (startp + 1, &wextra[1], + c1) == 0) + /* Yep, this is the entry. */ + break; +# else + if (/* Compare the length of the + sequence. */ + c1 == extra[idx] + /* Compare the byte sequence. */ + && memcmp (startp + 1, + &extra[idx + 1], c1) == 0) + /* Yep, this is the entry. */ + break; # endif + } + + if (elem < table_size) + { /* Get the collation sequence value. */ is_seqval = 1; # if WIDE_CHAR_VERSION cend = wextra[1 + wextra[idx]]; # else - /* Adjust for the alignment. */ idx += 1 + extra[idx]; - idx = (idx + 3) & ~4; + /* Adjust for the alignment. */ + idx = (idx + 3) & ~3; cend = *((int32_t *) &extra[idx]); # endif } - else if (symb_table[2 * elem] != 0 && c1 == 1) + else if (c1 == 1) { - cend = str[0]; + cend = startp[1]; c = *p++; } else return FNM_NOMATCH; } -# undef str } else { diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.c b/posix/tst-fnmatch4.c similarity index 59% copy from sysdeps/i386/i686/multiarch/wmemcmp.c copy to posix/tst-fnmatch4.c index ce25991352..370265ddf0 100644 --- a/sysdeps/i386/i686/multiarch/wmemcmp.c +++ b/posix/tst-fnmatch4.c @@ -1,6 +1,5 @@ -/* Multiple versions of wmemcmp. - All versions must be listed in ifunc-impl-list.c. - Copyright (C) 2017-2019 Free Software Foundation, Inc. +/* Test for fnmatch handling of collating elements + Copyright (C) 2019 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -17,14 +16,26 @@ License along with the GNU C Library; if not, see . */ -/* Define multiple versions only for the definition in libc. */ -#if IS_IN (libc) -# define wmemcmp __redirect_wmemcmp -# include -# undef wmemcmp +#include +#include +#include +#include -# define SYMBOL_NAME wmemcmp -# include "ifunc-ssse3-sse4_2.h" +static void +do_test_locale (const char *locale) +{ + TEST_VERIFY_EXIT (setlocale (LC_ALL, locale) != NULL); -libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ()); -#endif + TEST_VERIFY (fnmatch ("[[.ch.]]", "ch", 0) == 0); +} + +static int +do_test (void) +{ + do_test_locale ("cs_CZ.ISO-8859-2"); + do_test_locale ("cs_CZ.UTF-8"); + + return 0; +} + +#include diff --git a/sysdeps/s390/wmemcmp.c b/posix/tst-fnmatch5.c similarity index 53% copy from sysdeps/s390/wmemcmp.c copy to posix/tst-fnmatch5.c index ec0b4027f8..241371c752 100644 --- a/sysdeps/s390/wmemcmp.c +++ b/posix/tst-fnmatch5.c @@ -1,5 +1,5 @@ -/* Multiple versions of wmemcmp. - Copyright (C) 2015-2019 Free Software Foundation, Inc. +/* Test for fnmatch handling of collating elements + Copyright (C) 2019 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -16,23 +16,31 @@ License along with the GNU C Library; if not, see . */ -#include +#include +#include +#include +#include +#include -#if HAVE_WMEMCMP_IFUNC -# include -# include +#define LENGTH 20000000 -# if HAVE_WMEMCMP_C -extern __typeof (wmemcmp) WMEMCMP_C attribute_hidden; -# endif +static char pattern[LENGTH + 7]; -# if HAVE_WMEMCMP_Z13 -extern __typeof (wmemcmp) WMEMCMP_Z13 attribute_hidden; -# endif +static int +do_test (void) +{ + TEST_VERIFY_EXIT (setlocale (LC_ALL, "en_US.UTF-8") != NULL); -s390_libc_ifunc_expr (wmemcmp, wmemcmp, - (HAVE_WMEMCMP_Z13 && (hwcap & HWCAP_S390_VX)) - ? WMEMCMP_Z13 - : WMEMCMP_DEFAULT - ) -#endif + pattern[0] = '['; + pattern[1] = '['; + pattern[2] = '.'; + memset (pattern + 3, 'a', LENGTH); + pattern[LENGTH + 3] = '.'; + pattern[LENGTH + 4] = ']'; + pattern[LENGTH + 5] = ']'; + TEST_VERIFY (fnmatch (pattern, "a", 0) != 0); + + return 0; +} + +#include diff --git a/sysdeps/i386/i686/multiarch/wmemcmp.c b/sysdeps/i386/i686/multiarch/wmemcmp.c index ce25991352..7674530c68 100644 --- a/sysdeps/i386/i686/multiarch/wmemcmp.c +++ b/sysdeps/i386/i686/multiarch/wmemcmp.c @@ -26,5 +26,6 @@ # define SYMBOL_NAME wmemcmp # include "ifunc-ssse3-sse4_2.h" -libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ()); +libc_ifunc_redirected (__redirect_wmemcmp, __wmemcmp, IFUNC_SELECTOR ()); +weak_alias (__wmemcmp, wmemcmp) #endif diff --git a/sysdeps/s390/wmemcmp.c b/sysdeps/s390/wmemcmp.c index ec0b4027f8..2f619550a6 100644 --- a/sysdeps/s390/wmemcmp.c +++ b/sysdeps/s390/wmemcmp.c @@ -23,16 +23,17 @@ # include # if HAVE_WMEMCMP_C -extern __typeof (wmemcmp) WMEMCMP_C attribute_hidden; +extern __typeof (__wmemcmp) WMEMCMP_C attribute_hidden; # endif # if HAVE_WMEMCMP_Z13 -extern __typeof (wmemcmp) WMEMCMP_Z13 attribute_hidden; +extern __typeof (__wmemcmp) WMEMCMP_Z13 attribute_hidden; # endif -s390_libc_ifunc_expr (wmemcmp, wmemcmp, +s390_libc_ifunc_expr (__wmemcmp, __wmemcmp, (HAVE_WMEMCMP_Z13 && (hwcap & HWCAP_S390_VX)) ? WMEMCMP_Z13 : WMEMCMP_DEFAULT ) +weak_alias (__wmemcmp, wmemcmp) #endif diff --git a/sysdeps/x86_64/multiarch/wmemcmp.c b/sysdeps/x86_64/multiarch/wmemcmp.c index 136a7b05e1..826c90bb77 100644 --- a/sysdeps/x86_64/multiarch/wmemcmp.c +++ b/sysdeps/x86_64/multiarch/wmemcmp.c @@ -26,5 +26,6 @@ # define SYMBOL_NAME wmemcmp # include "ifunc-memcmp.h" -libc_ifunc_redirected (__redirect_wmemcmp, wmemcmp, IFUNC_SELECTOR ()); +libc_ifunc_redirected (__redirect_wmemcmp, __wmemcmp, IFUNC_SELECTOR ()); +weak_alias (__wmemcmp, wmemcmp) #endif diff --git a/wcsmbs/wmemcmp.c b/wcsmbs/wmemcmp.c index 5b243bab8f..5e137fd3dd 100644 --- a/wcsmbs/wmemcmp.c +++ b/wcsmbs/wmemcmp.c @@ -18,12 +18,12 @@ #include -#ifndef WMEMCMP -# define WMEMCMP wmemcmp +#ifdef WMEMCMP +# define __wmemcmp WMEMCMP #endif int -WMEMCMP (const wchar_t *s1, const wchar_t *s2, size_t n) +__wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n) { wchar_t c1; wchar_t c2; @@ -81,3 +81,6 @@ WMEMCMP (const wchar_t *s1, const wchar_t *s2, size_t n) return 0; } +#ifndef WMEMCMP +weak_alias (__wmemcmp, wmemcmp) +#endif