Fix fnmatch handling of collating elements (bug 17396, bug 16976)

Message ID mvmwpjgq1xr.fsf@hawking.suse.de
State New, archived
Headers

Commit Message

Andreas Schwab Aug. 16, 2016, 1:27 p.m. UTC
  This fixes the same bug that was fixed by commit 7e2f0d2 for regexp
matching.  As a side effect it also removes the use of an unbound VLA.

Andreas.

	[BZ #16976]
	[BZ #17396]
	* posix/fnmatch_loop.c (internal_fnmatch, internal_fnwmatch): When
	looking up collating elements match against (wide) character
	sequence instead of name.  Correct alignment adjustment.
	* posix/fnmatch.c: Don't include "../locale/elem-hash.h".
	* posix/Makefile (tests): Add tst-fnmatch4 and tst-fnmatch5.
	* posix/tst-fnmatch4.c: New file.
	* posix/tst-fnmatch5.c: New file.
---
 posix/Makefile       |   1 +
 posix/fnmatch.c      |   1 -
 posix/fnmatch_loop.c | 227 ++++++++++++++++++++-------------------------------
 posix/tst-fnmatch4.c |  51 ++++++++++++
 posix/tst-fnmatch5.c |  53 ++++++++++++
 5 files changed, 192 insertions(+), 141 deletions(-)
 create mode 100644 posix/tst-fnmatch4.c
 create mode 100644 posix/tst-fnmatch5.c
  

Comments

Mike Frysinger Aug. 18, 2016, 6:04 p.m. UTC | #1
On 16 Aug 2016 15:27, Andreas Schwab wrote:
> This fixes the same bug that was fixed by commit 7e2f0d2 for regexp
> matching.  As a side effect it also removes the use of an unbound VLA.

i scanned the fnmatch at a high level and looks OK, but might want to
see if anyone with more familiarity chimes in

> --- a/posix/Makefile
> +++ b/posix/Makefile

i was going to say you're missing $(gen-locales) deps for these tests,
but they're in there and have been since Oct 2015.  i guess you've had
this patch for a while now ? :)

> +char pattern[LENGTH + 7];

static

> +  pattern[0] = '[';
> +  pattern[1] = '[';
> +  pattern[2] = '.';
> +  memset (pattern + 3, 'a', LENGTH);
> +  pattern[LENGTH + 3] = '.';
> +  pattern[LENGTH + 4] = ']';
> +  pattern[LENGTH + 5] = ']';

might be a little more readable:
	strcpy (pattern, "[[.", 3);
	memset (pattern + 3, 'a', LENGTH);
	strcpy (pattern + LENGTH + 3, ".]]", 3);

at least, pattern isn't explicitly NUL terminated in the current test
(other than "pattern" being in the bss, but it's also missing static)
-mike
  
Andreas Schwab Aug. 22, 2016, 7:08 a.m. UTC | #2
On Aug 18 2016, Mike Frysinger <vapier@gentoo.org> wrote:

> might be a little more readable:
> 	strcpy (pattern, "[[.", 3);
> 	memset (pattern + 3, 'a', LENGTH);
> 	strcpy (pattern + LENGTH + 3, ".]]", 3);

Except that it doesn't work.

Andreas.
  
Mike Frysinger Aug. 22, 2016, 6:35 p.m. UTC | #3
On 22 Aug 2016 09:08, Andreas Schwab wrote:
> On Aug 18 2016, Mike Frysinger <vapier@gentoo.org> wrote:
> > might be a little more readable:
> > 	strcpy (pattern, "[[.", 3);
> > 	memset (pattern + 3, 'a', LENGTH);
> > 	strcpy (pattern + LENGTH + 3, ".]]", 3);
> 
> Except that it doesn't work.

it's trivial to make it work.  i was just copying & pasting from
your code ... i didn't actually compile it.  nitpicking over the
syntax being valid is wasting time instead of just using the idea
as posted, so why are you doing it ?  just delete the ", 3" and
move on.
-mike
  
Andreas Schwab Aug. 23, 2016, 8:13 a.m. UTC | #4
You cannot force everyone to read your mind.

Andreas.
  
Pedro Alves Aug. 23, 2016, 11:18 a.m. UTC | #5
On 08/16/2016 02:27 PM, Andreas Schwab wrote:
> +++ b/posix/tst-fnmatch4.c
> +   Copyright (C) 2015 Free Software Foundation, Inc.

...

> +++ b/posix/tst-fnmatch5.c
> +   Copyright (C) 2015 Free Software Foundation, Inc.

...

Should include 2016.

Thanks,
Pedro Alves
  
Mike Frysinger Aug. 24, 2016, 7:57 p.m. UTC | #6
On 23 Aug 2016 10:13, Andreas Schwab wrote:
> You cannot force everyone to read your mind.

ignoring the irony of this statement from the person writing it,
what exactly do you need me to clarify in my suggestion now ?
-mike
  

Patch

diff --git a/posix/Makefile b/posix/Makefile
index 3a7719e..b961b86 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -90,6 +90,7 @@  tests		:= tstgetopt testfnm runtests runptests	     \
 		   bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \
 		   tst-pathconf tst-getaddrinfo4 tst-rxspencer-no-utf8 \
 		   tst-fnmatch3 bug-regex36 tst-getaddrinfo5 \
+		   tst-fnmatch4 tst-fnmatch5 \
 		   tst-posix_spawn-fd
 xtests		:= bug-ga2
 ifeq (yes,$(build-shared))
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 8bbaaa7..6bbd3f5 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -53,7 +53,6 @@ 
    we support a correct implementation only in glibc.  */
 #ifdef _LIBC
 # include "../locale/localeinfo.h"
-# include "../locale/elem-hash.h"
 # include "../locale/coll-lookup.h"
 # include <shlib-compat.h>
 
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 229904e..5dfa3a9 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -497,26 +497,12 @@  FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 			  {
 			    int32_t table_size;
 			    const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
-			    char str[c1];
-			    unsigned int strcnt;
-# else
-#  define str (startp + 1)
-# endif
 			    const unsigned char *extra;
 			    int32_t idx;
 			    int32_t elem;
-			    int32_t second;
-			    int32_t hash;
-
 # if WIDE_CHAR_VERSION
-			    /* We have to convert the name to a single-byte
-			       string.  This is possible since the names
-			       consist of ASCII characters and the internal
-			       representation is UCS4.  */
-			    for (strcnt = 0; strcnt < c1; ++strcnt)
-			      str[strcnt] = startp[1 + strcnt];
-#endif
+			    int32_t *wextra;
+# endif
 
 			    table_size =
 			      _NL_CURRENT_WORD (LC_COLLATE,
@@ -528,71 +514,55 @@  FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 			      _NL_CURRENT (LC_COLLATE,
 					   _NL_COLLATE_SYMB_EXTRAMB);
 
-			    /* Locate the character in the hashing table.  */
-			    hash = elem_hash (str, c1);
-
-			    idx = 0;
-			    elem = hash % table_size;
-			    if (symb_table[2 * elem] != 0)
-			      {
-				second = hash % (table_size - 2) + 1;
-
-				do
-				  {
-				    /* First compare the hashing value.  */
-				    if (symb_table[2 * elem] == hash
-					&& (c1
-					    == extra[symb_table[2 * elem + 1]])
-					&& memcmp (str,
-						   &extra[symb_table[2 * elem
-								     + 1]
-							  + 1], c1) == 0)
-				      {
-					/* Yep, this is the entry.  */
-					idx = symb_table[2 * elem + 1];
-					idx += 1 + extra[idx];
-					break;
-				      }
-
-				    /* Next entry.  */
-				    elem += second;
-				  }
-				while (symb_table[2 * elem] != 0);
-			      }
+			    for (elem = 0; elem < table_size; elem++)
+			      if (symb_table[2 * elem] != 0)
+				{
+				  idx = symb_table[2 * elem + 1];
+				  /* Skip the name of collating element.  */
+				  idx += 1 + extra[idx];
+# if WIDE_CHAR_VERSION
+				  /* Skip the byte sequence of the
+				     collating element.  */
+				  idx += 1 + extra[idx];
+				  /* Adjust for the alignment.  */
+				  idx = (idx + 3) & ~3;
+
+				  wextra = (int32_t *) &extra[idx + 4];
+
+				  if (/* Compare the length of the sequence.  */
+				      c1 == wextra[0]
+				      /* Compare the wide char sequence.  */
+				      && memcmp (startp + 1, &wextra[1],
+						 c1 * sizeof (UCHAR)) == 0)
+				    /* Yep, this is the entry.  */
+				    break;
+# else
+				  if (/* Compare the length of the sequence.  */
+				      c1 == extra[idx]
+				      /* Compare the byte sequence.  */
+				      && memcmp (startp + 1,
+						 &extra[idx + 1], c1) == 0)
+				    /* Yep, this is the entry.  */
+				    break;
+# endif
+				}
 
-			    if (symb_table[2 * elem] != 0)
+			    if (elem < table_size)
 			      {
 				/* Compare the byte sequence but only if
 				   this is not part of a range.  */
-# if WIDE_CHAR_VERSION
-				int32_t *wextra;
+				if (! is_range
 
-				idx += 1 + extra[idx];
-				/* Adjust for the alignment.  */
-				idx = (idx + 3) & ~3;
-
-				wextra = (int32_t *) &extra[idx + 4];
-# endif
-
-				if (! is_range)
-				  {
 # if WIDE_CHAR_VERSION
-				    for (c1 = 0;
-					 (int32_t) c1 < wextra[idx];
-					 ++c1)
-				      if (n[c1] != wextra[1 + c1])
-					break;
-
-				    if ((int32_t) c1 == wextra[idx])
-				      goto matched;
+				    && memcmp (n, &wextra[1],
+					       c1 * sizeof (UCHAR)) == 0
 # else
-				    for (c1 = 0; c1 < extra[idx]; ++c1)
-				      if (n[c1] != extra[1 + c1])
-					break;
-
-				    if (c1 == extra[idx])
-				      goto matched;
+				    && memcmp (n, &extra[idx + 1], c1) == 0
 # endif
+				    )
+				  {
+				    n += c1 - 1;
+				    goto matched;
 				  }
 
 				/* Get the collation sequence value.  */
@@ -600,9 +570,9 @@  FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 # if WIDE_CHAR_VERSION
 				cold = wextra[1 + wextra[idx]];
 # else
-				/* Adjust for the alignment.  */
 				idx += 1 + extra[idx];
-				idx = (idx + 3) & ~4;
+				/* Adjust for the alignment.  */
+				idx = (idx + 3) & ~3;
 				cold = *((int32_t *) &extra[idx]);
 # endif
 
@@ -612,10 +582,10 @@  FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 			      {
 				/* No valid character.  Match it as a
 				   single byte.  */
-				if (!is_range && *n == str[0])
+				if (!is_range && *n == startp[1])
 				  goto matched;
 
-				cold = str[0];
+				cold = startp[1];
 				c = *p++;
 			      }
 			    else
@@ -623,7 +593,6 @@  FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 			  }
 		      }
 		    else
-# undef str
 #endif
 		      {
 			c = FOLD (c);
@@ -715,25 +684,11 @@  FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 			      {
 				int32_t table_size;
 				const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
-				char str[c1];
-				unsigned int strcnt;
-# else
-#  define str (startp + 1)
-# endif
 				const unsigned char *extra;
 				int32_t idx;
 				int32_t elem;
-				int32_t second;
-				int32_t hash;
-
 # if WIDE_CHAR_VERSION
-				/* We have to convert the name to a single-byte
-				   string.  This is possible since the names
-				   consist of ASCII characters and the internal
-				   representation is UCS4.  */
-				for (strcnt = 0; strcnt < c1; ++strcnt)
-				  str[strcnt] = startp[1 + strcnt];
+				int32_t *wextra;
 # endif
 
 				table_size =
@@ -746,51 +701,44 @@  FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 				  _NL_CURRENT (LC_COLLATE,
 					       _NL_COLLATE_SYMB_EXTRAMB);
 
-				/* Locate the character in the hashing
-				   table.  */
-				hash = elem_hash (str, c1);
-
-				idx = 0;
-				elem = hash % table_size;
-				if (symb_table[2 * elem] != 0)
-				  {
-				    second = hash % (table_size - 2) + 1;
-
-				    do
-				      {
-					/* First compare the hashing value.  */
-					if (symb_table[2 * elem] == hash
-					    && (c1
-						== extra[symb_table[2 * elem + 1]])
-					    && memcmp (str,
-						       &extra[symb_table[2 * elem + 1]
-							      + 1], c1) == 0)
-					  {
-					    /* Yep, this is the entry.  */
-					    idx = symb_table[2 * elem + 1];
-					    idx += 1 + extra[idx];
-					    break;
-					  }
-
-					/* Next entry.  */
-					elem += second;
-				      }
-				    while (symb_table[2 * elem] != 0);
-				  }
-
-				if (symb_table[2 * elem] != 0)
-				  {
-				    /* Compare the byte sequence but only if
-				       this is not part of a range.  */
+				for (elem = 0; elem < table_size; elem++)
+				  if (symb_table[2 * elem] != 0)
+				    {
+				      idx = symb_table[2 * elem + 1];
+				      /* Skip the name of collating
+					 element.  */
+				      idx += 1 + extra[idx];
 # if WIDE_CHAR_VERSION
-				    int32_t *wextra;
-
-				    idx += 1 + extra[idx];
-				    /* Adjust for the alignment.  */
-				    idx = (idx + 3) & ~4;
-
-				    wextra = (int32_t *) &extra[idx + 4];
+				      /* Skip the byte sequence of the
+					 collating element.  */
+				      idx += 1 + extra[idx];
+				      /* Adjust for the alignment.  */
+				      idx = (idx + 3) & ~3;
+
+				      wextra = (int32_t *) &extra[idx + 4];
+
+				      if (/* Compare the length of the
+					     sequence.  */
+					  c1 == wextra[0]
+					  /* Compare the wide char sequence.  */
+					  && memcmp (startp + 1, &wextra[1],
+						     c1 * sizeof (int32_t)) == 0)
+					/* Yep, this is the entry.  */
+					break;
+# else
+				      if (/* Compare the length of the
+					     sequence.  */
+					  c1 == extra[idx]
+					  /* Compare the byte sequence.  */
+					  && memcmp (startp + 1,
+						     &extra[idx + 1], c1) == 0)
+					/* Yep, this is the entry.  */
+					break;
 # endif
+				    }
+
+				if (elem < table_size)
+				  {
 				    /* Get the collation sequence value.  */
 				    is_seqval = 1;
 # if WIDE_CHAR_VERSION
@@ -798,19 +746,18 @@  FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
 # else
 				    /* Adjust for the alignment.  */
 				    idx += 1 + extra[idx];
-				    idx = (idx + 3) & ~4;
+				    idx = (idx + 3) & ~3;
 				    cend = *((int32_t *) &extra[idx]);
 # endif
 				  }
-				else if (symb_table[2 * elem] != 0 && c1 == 1)
+				else if (c1 == 1)
 				  {
-				    cend = str[0];
+				    cend = startp[1];
 				    c = *p++;
 				  }
 				else
 				  return FNM_NOMATCH;
 			      }
-# undef str
 			  }
 			else
 			  {
diff --git a/posix/tst-fnmatch4.c b/posix/tst-fnmatch4.c
new file mode 100644
index 0000000..7484a5c
--- /dev/null
+++ b/posix/tst-fnmatch4.c
@@ -0,0 +1,51 @@ 
+/* Test for fnmatch handling of collating elements
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <locale.h>
+#include <fnmatch.h>
+
+static int
+do_test_locale (const char *locale)
+{
+  const char *pattern = "[[.ll.]]";
+
+  if (setlocale (LC_ALL, locale) == NULL)
+    {
+      printf ("could not set locale %s\n", locale);
+      return 1;
+    }
+
+  if (fnmatch (pattern, "ll", 0) != 0)
+    {
+      printf ("%s didn't match in locale %s\n", pattern, locale);
+      return 1;
+    }
+
+  return 0;
+}
+
+static int
+do_test (void)
+{
+  return (do_test_locale ("es_US.ISO-8859-1")
+	  || do_test_locale ("es_US.UTF-8"));
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/posix/tst-fnmatch5.c b/posix/tst-fnmatch5.c
new file mode 100644
index 0000000..0ac1ae0
--- /dev/null
+++ b/posix/tst-fnmatch5.c
@@ -0,0 +1,53 @@ 
+/* Test for fnmatch handling of collating elements
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fnmatch.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+
+#define LENGTH 20000000
+
+char pattern[LENGTH + 7];
+
+static int
+do_test (void)
+{
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    {
+      puts ("could not set locale");
+      return 1;
+    }
+  pattern[0] = '[';
+  pattern[1] = '[';
+  pattern[2] = '.';
+  memset (pattern + 3, 'a', LENGTH);
+  pattern[LENGTH + 3] = '.';
+  pattern[LENGTH + 4] = ']';
+  pattern[LENGTH + 5] = ']';
+  int ret = fnmatch (pattern, "a", 0);
+  if (ret == 0)
+    {
+      puts ("fnmatch returned 0 for invalid pattern");
+      return 1;
+    }
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"