[BZ,#16009] fix memory handling in strxfrm_l

Hello,

this patch solves bug #16009 by implementing an additional path in 
strxfrm that does not depend on caching the weight and rule indices.

In detail the following changed:

* The old main loop was factored out of strxfrm_l into the function 
do_xfrm_cached to be able to alternativly use the non-caching version 
do_xfrm.

* strxfrm_l allocates a a fixed size array on the stack. If this is not 
sufficiant to store the weight and rule indices, the non-caching path is 
taken. As the cache size is not dependent on the input there can be no 
problems with integer overflows or stack allocations greater than 
__MAX_ALLOCA_CUTOFF. Note that malloc-ing is not possible because the 
definition of strxfrm does not allow an oom errorhandling.

* The uncached path determines the weight and rule index for every char 
and for every pass again. Handling of backward sequences needs a special 
threatment, I found no way to implement it without allocation. This is 
now done by pushing the backward sequence on the stack within a single 
linked list that can later easily be traversed backwards (but not 
free'd) and avoids the problem of stack allocations beyond 
__MAX_ALLOCA_CUTOFF. Here again, malloc is not possible.

* Passing all the locale data array by array resulted in very long 
parameter lists, so I introduced a structure that holds them.

* Checking for zero src string has been moved a bit upwards, it is 
before the locale data initialization now.

* To verify that the non-caching path works correct I added a test run 
to localedata/sort-test.sh & localedata/xfrm-test.c where all strings 
are patched up with spaces so that they are too large for the caching path.

make tests && make xcheck report no additional errors. Unfortunately the 
diff of strxfrm_l.c is a bit messy, it looks factoring out the main loop 
was too much for git. :|

Leonhard

2014-11-29  Leonhard Holz <leonhard.holz@web.de>

         [BZ #16009]
	* string/strxfrm_l.c (STRXFRM): Allocate fixed size cache
	for weights and rules. Use do_xfrm_cached if data fits in
	cache, do_xfrm otherwise. Moved former main loop to...
	* (do_xfrm_cached): New function.
	* (do_xfrm): Non-caching version of do_xfrm_cached. Uses
	find_idx, find_position and stack_push.
	* (find_idx): New function.
	* (find_position): Likewise.
	* (stack_push): New macro.
	* localedata/sort-test.sh: Added test run for do_xfrm.
	* localedata/xfrm-test.c (main): Added command line
	option -nocache to run the test with strings that are
	too large for the STRXFRM cache.

diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh
index e37129a..c464b05 100644
--- a/localedata/sort-test.sh
+++ b/localedata/sort-test.sh
@@ -53,11 +53,18 @@ for l in $lang; do
    ${common_objpfx}localedata/xfrm-test $id < $cns.in \
    > ${common_objpfx}localedata/$cns.xout || here=1
   cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1
+  ${test_program_prefix_before_env} \
+   ${run_program_env} \
+   LC_ALL=$l ${test_program_prefix_after_env} \
+   ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \
+   > ${common_objpfx}localedata/$cns.xoutl || here=1
+  cmp -s $cns.in ${common_objpfx}localedata/$cns.xoutl || here=1
   if test $here -eq 0; then
     echo "$l xfrm-test OK"
   else
     echo "$l xfrm-test FAIL"
     diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/  /'
+    diff -u $cns.in ${common_objpfx}localedata/$cns.xoutl | sed 's/^/  /'
     status=1
   fi
 done
diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c
index d2aba7d..9ac57bf 100644
--- a/localedata/xfrm-test.c
+++ b/localedata/xfrm-test.c
@@ -24,6 +24,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+/* Keep in sync with string/strxfrm_l.c.  */
+#define CACHE_SIZE 4095
 
 struct lines
 {
@@ -36,7 +38,7 @@ static int xstrcmp (const void *, const void *);
 int
 main (int argc, char *argv[])
 {
-  int result = 0;
+  int result = 0, nocache = 0;
   size_t nstrings, nstrings_max;
   struct lines *strings;
   char *line = NULL;
@@ -44,7 +46,16 @@ main (int argc, char *argv[])
   size_t n;
 
   if (argc < 2)
-    error (1, 0, "usage: %s <random seed>", argv[0]);
+    error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]);
+
+  if (argc == 3)
+    if (strcmp (argv[2], "-nocache") == 0)
+      nocache = 1;
+    else
+      {
+	printf ("Unknown option %s!\n", argv[2]);
+	exit (1);
+      }
 
   setlocale (LC_ALL, "");
 
@@ -59,9 +70,9 @@ main (int argc, char *argv[])
 
   while (1)
     {
-      char saved, *newp;
-      int needed;
-      int l;
+      char saved, *word, *newp;
+      size_t l, line_len, needed;
+
       if (getline (&line, &len, stdin) < 0)
 	break;
 
@@ -83,10 +94,35 @@ main (int argc, char *argv[])
 
       saved = line[l];
       line[l] = '\0';
-      needed = strxfrm (NULL, line, 0);
+
+      if (nocache)
+	{
+	  line_len = strlen (line);
+	  word = malloc (line_len + CACHE_SIZE + 1);
+	  if (word == NULL)
+	    {
+	      perror (argv[0]);
+	      exit (1);
+	    }
+	  memset (word, ' ', CACHE_SIZE);
+	  memcpy (word + CACHE_SIZE, line, line_len);
+	  word[line_len + CACHE_SIZE] = '\0';
+	}
+      else
+        word = line;
+
+      needed = strxfrm (NULL, word, 0);
       newp = malloc (needed + 1);
-      strxfrm (newp, line, needed + 1);
+      if (newp == NULL)
+	{
+	  perror (argv[0]);
+	  exit (1);
+	}
+      strxfrm (newp, word, needed + 1);
       strings[nstrings].xfrm = newp;
+
+      if (nocache)
+	free (word);
       line[l] = saved;
       ++nstrings;
     }
  

[BZ,#16009] fix memory handling in strxfrm_l

Commit Message

Comments

Patch