[BZ,#16009] fix memory handling in strxfrm_l
Commit Message
2015-01-11 Leonhard Holz <leonhard.holz@web.de>
[BZ #16009]
* string/strxfrm_l.c (STRXFRM): Allocate fixed size cache for weights and
rules. Use do_xfrm_cached if data fits in cache, do_xfrm otherwise. Moved
former main loop to...
* (do_xfrm_cached): New function.
* (do_xfrm): Non-caching version of do_xfrm_cached. Uses find_idx,
find_position and stack_push.
* (find_idx): New function.
* (find_position): Likewise.
* localedata/sort-test.sh: Added test run for do_xfrm.
* localedata/xfrm-test.c (main): Added command line option -nocache to run
the test with strings that are too large for the STRXFRM cache.
---
localedata/sort-test.sh | 7 +
localedata/xfrm-test.c | 52 +++++-
string/strxfrm_l.c | 488 ++++++++++++++++++++++++++++++++++++++----------
3 files changed, 447 insertions(+), 100 deletions(-)
Comments
Thanks, this looks good to me now. I'll commit this once Carlos
confirms that I can.
Siddhesh
On Sun, 11 Jan 2015 11:55:08 +0100
Leonhard Holz <leonhard.holz@web.de> wrote:
> 2015-01-11 Leonhard Holz <leonhard.holz@web.de>
>
> [BZ #16009]
> * string/strxfrm_l.c (STRXFRM): Allocate fixed size cache for
> weights and rules. Use do_xfrm_cached if data fits in cache, do_xfrm
> otherwise. Moved former main loop to...
> * (do_xfrm_cached): New function.
> * (do_xfrm): Non-caching version of do_xfrm_cached. Uses
> find_idx, find_position and stack_push.
> * (find_idx): New function.
> * (find_position): Likewise.
> * localedata/sort-test.sh: Added test run for do_xfrm.
> * localedata/xfrm-test.c (main): Added command line option
> -nocache to run the test with strings that are too large for the
> STRXFRM cache.
>
> ---
> localedata/sort-test.sh | 7 +
> localedata/xfrm-test.c | 52 +++++-
> string/strxfrm_l.c | 488
> ++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 447
> insertions(+), 100 deletions(-)
>
> diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh
> index dc23268..1a10262 100644
> --- a/localedata/sort-test.sh
> +++ b/localedata/sort-test.sh
> @@ -53,11 +53,18 @@ for l in $lang; do
> ${common_objpfx}localedata/xfrm-test $id < $cns.in \
> > ${common_objpfx}localedata/$cns.xout || here=1
> cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1
> + ${test_program_prefix_before_env} \
> + ${run_program_env} \
> + LC_ALL=$l ${test_program_prefix_after_env} \
> + ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \
> + > ${common_objpfx}localedata/$cns.nocache.xout || here=1
> + cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout ||
> here=1 if test $here -eq 0; then
> echo "$l xfrm-test OK"
> else
> echo "$l xfrm-test FAIL"
> diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed
> 's/^/ /'
> + diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout |
> sed 's/^/ /' status=1
> fi
> done
> diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c
> index 79494ba..3ab2140 100644
> --- a/localedata/xfrm-test.c
> +++ b/localedata/xfrm-test.c
> @@ -23,7 +23,10 @@
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> +#include <stdbool.h>
>
> +/* Keep in sync with string/strxfrm_l.c. */
> +#define SMALL_STR_SIZE 4095
>
> struct lines
> {
> @@ -37,6 +40,7 @@ int
> main (int argc, char *argv[])
> {
> int result = 0;
> + bool nocache = false;
> size_t nstrings, nstrings_max;
> struct lines *strings;
> char *line = NULL;
> @@ -44,7 +48,18 @@ main (int argc, char *argv[])
> size_t n;
>
> if (argc < 2)
> - error (1, 0, "usage: %s <random seed>", argv[0]);
> + error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]);
> +
> + if (argc == 3)
> + {
> + if (strcmp (argv[2], "-nocache") == 0)
> + nocache = true;
> + else
> + {
> + printf ("Unknown option %s!\n", argv[2]);
> + exit (1);
> + }
> + }
>
> setlocale (LC_ALL, "");
>
> @@ -59,9 +74,9 @@ main (int argc, char *argv[])
>
> while (1)
> {
> - char saved, *newp;
> - int needed;
> - int l;
> + char saved, *word, *newp;
> + size_t l, line_len, needed;
> +
> if (getline (&line, &len, stdin) < 0)
> break;
>
> @@ -83,10 +98,35 @@ main (int argc, char *argv[])
>
> saved = line[l];
> line[l] = '\0';
> - needed = strxfrm (NULL, line, 0);
> +
> + if (nocache)
> + {
> + line_len = strlen (line);
> + word = malloc (line_len + SMALL_STR_SIZE + 1);
> + if (word == NULL)
> + {
> + printf ("malloc failed: %m\n");
> + exit (1);
> + }
> + memset (word, ' ', SMALL_STR_SIZE);
> + memcpy (word + SMALL_STR_SIZE, line, line_len);
> + word[line_len + SMALL_STR_SIZE] = '\0';
> + }
> + else
> + word = line;
> +
> + needed = strxfrm (NULL, word, 0);
> newp = malloc (needed + 1);
> - strxfrm (newp, line, needed + 1);
> + if (newp == NULL)
> + {
> + printf ("malloc failed: %m\n");
> + exit (1);
> + }
> + strxfrm (newp, word, needed + 1);
> strings[nstrings].xfrm = newp;
> +
> + if (nocache)
> + free (word);
> line[l] = saved;
> ++nstrings;
> }
> diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c
> index 9849447..921d1f7 100644
> --- a/string/strxfrm_l.c
> +++ b/string/strxfrm_l.c
> @@ -40,9 +40,24 @@
> #define CONCAT(a,b) CONCAT1(a,b)
> #define CONCAT1(a,b) a##b
>
> +/* Maximum string size that is calculated with cached indices.
> Right now this
> + is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4
> has to be
> + lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in
> sync. */ +#define SMALL_STR_SIZE 4095
> +
> #include "../locale/localeinfo.h"
> #include WEIGHT_H
>
> +/* Group locale data for shorter parameter lists. */
> +typedef struct
> +{
> + uint_fast32_t nrules;
> + unsigned char *rulesets;
> + USTRING_TYPE *weights;
> + int32_t *table;
> + USTRING_TYPE *extra;
> + int32_t *indirect;
> +} locale_data_t;
>
> #ifndef WIDE_CHAR_VERSION
>
> @@ -81,113 +96,325 @@ utf8_encode (char *buf, int val)
> }
> #endif
>
> +/* Find next weight and rule index. Inlined since called for every
> char. */ +static __always_inline size_t
> +find_idx (const USTRING_TYPE **us, int32_t *weight_idx,
> + unsigned char *rule_idx, const locale_data_t *l_data,
> const int pass) +{
> + int32_t tmp = findidx (l_data->table, l_data->indirect,
> l_data->extra, us,
> + -1);
> + *rule_idx = tmp >> 24;
> + int32_t idx = tmp & 0xffffff;
> + size_t len = l_data->weights[idx++];
> +
> + /* Skip over indices of previous levels. */
> + for (int i = 0; i < pass; i++)
> + {
> + idx += len;
> + len = l_data->weights[idx++];
> + }
>
> -size_t
> -STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n,
> __locale_t l)
> + *weight_idx = idx;
> + return len;
> +}
> +
> +static int
> +find_position (const USTRING_TYPE *us, const locale_data_t *l_data,
> + const int pass)
> {
> - struct __locale_data *current = l->__locales[LC_COLLATE];
> - uint_fast32_t nrules = current->values[_NL_ITEM_INDEX
> (_NL_COLLATE_NRULES)].word;
> - /* We don't assign the following values right away since it might
> be
> - unnecessary in case there are no rules. */
> - const unsigned char *rulesets;
> - const int32_t *table;
> - const USTRING_TYPE *weights;
> - const USTRING_TYPE *extra;
> - const int32_t *indirect;
> + int32_t weight_idx;
> + unsigned char rule_idx;
> + const USTRING_TYPE *usrc = us;
> +
> + find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass);
> + return l_data->rulesets[rule_idx * l_data->nrules + pass] &
> sort_position; +}
> +
> +/* Do the transformation. */
> +static size_t
> +do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n,
> + const locale_data_t *l_data)
> +{
> + int32_t weight_idx;
> + unsigned char rule_idx;
> uint_fast32_t pass;
> - size_t needed;
> + size_t needed = 0;
> size_t last_needed;
> - const USTRING_TYPE *usrc;
> - size_t srclen = STRLEN (src);
> - int32_t *idxarr;
> - unsigned char *rulearr;
> - size_t idxmax;
> - size_t idxcnt;
> - int use_malloc;
>
> - if (nrules == 0)
> + /* Now the passes over the weights. */
> + for (pass = 0; pass < l_data->nrules; ++pass)
> {
> - if (n != 0)
> - STPNCPY (dest, src, MIN (srclen + 1, n));
> + size_t backw_len = 0;
> + last_needed = needed;
> + const USTRING_TYPE *cur = usrc;
> + const USTRING_TYPE *backw_start = NULL;
>
> - return srclen;
> - }
> + /* We assume that if a rule has defined `position' in one
> section
> + this is true for all of them. */
> + int position = find_position (cur, l_data, pass);
>
> - rulesets = (const unsigned char *)
> - current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
> - table = (const int32_t *)
> - current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
> - weights = (const USTRING_TYPE *)
> - current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
> - extra = (const USTRING_TYPE *)
> - current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
> - indirect = (const int32_t *)
> - current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
> - use_malloc = 0;
> + if (position == 0)
> + {
> + while (*cur != L('\0'))
> + {
> + const USTRING_TYPE *pos = cur;
> + size_t len = find_idx (&cur, &weight_idx, &rule_idx,
> l_data,
> + pass);
> + int rule = l_data->rulesets[rule_idx * l_data->nrules
> + pass];
> - assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
> - assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
> - assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
> - assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
> + if ((rule & sort_forward) != 0)
> + {
> + /* Handle the pushed backward sequence. */
> + if (backw_start != NULL)
> + {
> + for (size_t i = backw_len; i > 0; )
> + {
> + int32_t weight_idx;
> + unsigned char rule_idx;
> + size_t len = find_idx (&backw_start,
> &weight_idx,
> + &rule_idx, l_data,
> pass);
> + if (needed + i < n)
> + for (size_t j = len; j > 0; j--)
> + dest[needed + i - j] =
> + l_data->weights[weight_idx++];
> +
> + i -= len;
> + }
>
> - /* Handle an empty string as a special case. */
> - if (srclen == 0)
> - {
> - if (n != 0)
> - *dest = L('\0');
> - return 0;
> - }
> + needed += backw_len;
> + backw_start = NULL;
> + backw_len = 0;
> + }
>
> - /* We need the elements of the string as unsigned values since they
> - are used as indeces. */
> - usrc = (const USTRING_TYPE *) src;
> -
> - /* Perform the first pass over the string and while doing this find
> - and store the weights for each character. Since we want this to
> - be as fast as possible we are using `alloca' to store the
> temporary
> - values. But since there is no limit on the length of the string
> - we have to use `malloc' if the string is too long. We should be
> - very conservative here. */
> - if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1)))
> - {
> - idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t)
> + 1));
> - rulearr = (unsigned char *) &idxarr[srclen];
> -
> - if (idxarr == NULL)
> - /* No memory. Well, go with the stack then.
> -
> - XXX Once this implementation is stable we will handle this
> - differently. Instead of precomputing the indeces we will
> - do this in time. This means, though, that this happens
> for
> - every pass again. */
> - goto try_stack;
> - use_malloc = 1;
> - }
> - else
> - {
> - try_stack:
> - idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
> - rulearr = (unsigned char *) alloca (srclen + 1);
> + /* Now handle the forward element. */
> + if (needed + len < n)
> + while (len-- > 0)
> + dest[needed++] = l_data->weights[weight_idx++];
> + else
> + /* No more characters fit into the buffer. */
> + needed += len;
> + }
> + else
> + {
> + /* Remember start of the backward sequence & track
> length. */
> + if (backw_start == NULL)
> + backw_start = pos;
> + backw_len += len;
> + }
> + }
> +
> +
> + /* Handle the pushed backward sequence. */
> + if (backw_start != NULL)
> + {
> + for (size_t i = backw_len; i > 0; )
> + {
> + size_t len = find_idx (&backw_start, &weight_idx,
> &rule_idx,
> + l_data, pass);
> + if (needed + i < n)
> + for (size_t j = len; j > 0; j--)
> + dest[needed + i - j] =
> + l_data->weights[weight_idx++];
> +
> + i -= len;
> + }
> +
> + needed += backw_len;
> + }
> + }
> + else
> + {
> + int val = 1;
> +#ifndef WIDE_CHAR_VERSION
> + char buf[7];
> + size_t buflen;
> +#endif
> + size_t i;
> +
> + while (*cur != L('\0'))
> + {
> + const USTRING_TYPE *pos = cur;
> + size_t len = find_idx (&cur, &weight_idx, &rule_idx,
> l_data,
> + pass);
> + int rule = l_data->rulesets[rule_idx * l_data->nrules
> + pass]; +
> + if ((rule & sort_forward) != 0)
> + {
> + /* Handle the pushed backward sequence. */
> + if (backw_start != NULL)
> + {
> + for (size_t p = backw_len; p > 0; p--)
> + {
> + size_t len;
> + int32_t weight_idx;
> + unsigned char rule_idx;
> + const USTRING_TYPE *backw_cur =
> backw_start; +
> + /* To prevent a warning init the used
> vars. */
> + len = find_idx (&backw_cur, &weight_idx,
> + &rule_idx, l_data, pass);
> +
> + for (i = 1; i < p; i++)
> + len = find_idx (&backw_cur, &weight_idx,
> + &rule_idx, l_data, pass);
> +
> + if (len != 0)
> + {
> +#ifdef WIDE_CHAR_VERSION
> + if (needed + 1 + len < n)
> + {
> + dest[needed] = val;
> + for (i = 0; i < len; ++i)
> + dest[needed + 1 + i] =
> + l_data->weights[weight_idx +
> i];
> + }
> + needed += 1 + len;
> +#else
> + buflen = utf8_encode (buf, val);
> + if (needed + buflen + len < n)
> + {
> + for (i = 0; i < buflen; ++i)
> + dest[needed + i] = buf[i];
> + for (i = 0; i < len; ++i)
> + dest[needed + buflen + i] =
> + l_data->weights[weight_idx +
> i];
> + }
> + needed += buflen + len;
> +#endif
> + val = 1;
> + }
> + else
> + ++val;
> + }
> +
> + backw_start = NULL;
> + backw_len = 0;
> + }
> +
> + /* Now handle the forward element. */
> + if (len != 0)
> + {
> +#ifdef WIDE_CHAR_VERSION
> + if (needed + 1 + len < n)
> + {
> + dest[needed] = val;
> + for (i = 0; i < len; ++i)
> + dest[needed + 1 + i] =
> + l_data->weights[weight_idx + i];
> + }
> + needed += 1 + len;
> +#else
> + buflen = utf8_encode (buf, val);
> + if (needed + buflen + len < n)
> + {
> + for (i = 0; i < buflen; ++i)
> + dest[needed + i] = buf[i];
> + for (i = 0; i < len; ++i)
> + dest[needed + buflen + i] =
> + l_data->weights[weight_idx + i];
> + }
> + needed += buflen + len;
> +#endif
> + val = 1;
> + }
> + else
> + ++val;
> + }
> + else
> + {
> + /* Remember start of the backward sequence & track
> length. */
> + if (backw_start == NULL)
> + backw_start = pos;
> + backw_len++;
> + }
> + }
> +
> + /* Handle the pushed backward sequence. */
> + if (backw_start != NULL)
> + {
> + for (size_t p = backw_len; p > 0; p--)
> + {
> + size_t len;
> + int32_t weight_idx;
> + unsigned char rule_idx;
> + const USTRING_TYPE *backw_cur = backw_start;
> +
> + /* To prevent a warning init the used vars. */
> + len = find_idx (&backw_cur, &weight_idx,
> + &rule_idx, l_data, pass);
> +
> + for (i = 1; i < p; i++)
> + len = find_idx (&backw_cur, &weight_idx,
> + &rule_idx, l_data, pass);
> +
> + if (len != 0)
> + {
> +#ifdef WIDE_CHAR_VERSION
> + if (needed + 1 + len < n)
> + {
> + dest[needed] = val;
> + for (i = 0; i < len; ++i)
> + dest[needed + 1 + i] =
> + l_data->weights[weight_idx + i];
> + }
> + needed += 1 + len;
> +#else
> + buflen = utf8_encode (buf, val);
> + if (needed + buflen + len < n)
> + {
> + for (i = 0; i < buflen; ++i)
> + dest[needed + i] = buf[i];
> + for (i = 0; i < len; ++i)
> + dest[needed + buflen + i] =
> + l_data->weights[weight_idx + i];
> + }
> + needed += buflen + len;
> +#endif
> + val = 1;
> + }
> + else
> + ++val;
> + }
> + }
> + }
> +
> + /* Finally store the byte to separate the passes or terminate
> + the string. */
> + if (needed < n)
> + dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0');
> + ++needed;
> }
>
> - idxmax = 0;
> - do
> + /* This is a little optimization: many collation specifications
> have
> + a `position' rule at the end and if no non-ignored character
> + is found the last \1 byte is immediately followed by a \0 byte
> + signalling this. We can avoid the \1 byte(s). */
> + if (needed > 2 && needed == last_needed + 1)
> {
> - int32_t tmp = findidx (table, indirect, extra, &usrc, -1);
> - rulearr[idxmax] = tmp >> 24;
> - idxarr[idxmax] = tmp & 0xffffff;
> -
> - ++idxmax;
> + /* Remove the \1 byte. */
> + if (--needed <= n)
> + dest[needed - 1] = L('\0');
> }
> - while (*usrc != L('\0'));
>
> - /* This element is only read, the value never used but to determine
> - another value which then is ignored. */
> - rulearr[idxmax] = '\0';
> + /* Return the number of bytes/words we need, but don't count the
> NUL
> + byte/word at the end. */
> + return needed - 1;
> +}
> +
> +/* Do the transformation using weight-index and rule cache. */
> +static size_t
> +do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t
> *l_data,
> + size_t idxmax, int32_t *idxarr, const unsigned char
> *rulearr) +{
> + uint_fast32_t nrules = l_data->nrules;
> + unsigned char *rulesets = l_data->rulesets;
> + USTRING_TYPE *weights = l_data->weights;
> + uint_fast32_t pass;
> + size_t needed = 0;
> + size_t last_needed;
> + size_t idxcnt;
>
> - /* Now the passes over the weights. We now use the indeces we
> found
> - before. */
> - needed = 0;
> + /* Now the passes over the weights. */
> for (pass = 0; pass < nrules; ++pass)
> {
> size_t backw_stop = ~0ul;
> @@ -433,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE
> *src, size_t n, __locale_t l) dest[needed - 1] = L('\0');
> }
>
> - /* Free the memory if needed. */
> - if (use_malloc)
> - free (idxarr);
> -
> /* Return the number of bytes/words we need, but don't count the
> NUL byte/word at the end. */
> return needed - 1;
> }
> +
> +size_t
> +STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n,
> __locale_t l) +{
> + locale_data_t l_data;
> + struct __locale_data *current = l->__locales[LC_COLLATE];
> + l_data.nrules = current->values[_NL_ITEM_INDEX
> (_NL_COLLATE_NRULES)].word; +
> + /* Handle byte comparison case. */
> + if (l_data.nrules == 0)
> + {
> + size_t srclen = STRLEN (src);
> +
> + if (n != 0)
> + STPNCPY (dest, src, MIN (srclen + 1, n));
> +
> + return srclen;
> + }
> +
> + /* Handle an empty string, code hereafter relies on strlen (src) >
> 0. */
> + if (*src == L('\0'))
> + {
> + if (n != 0)
> + *dest = L('\0');
> + return 0;
> + }
> +
> + /* Get the locale data. */
> + l_data.rulesets = (unsigned char *)
> + current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
> + l_data.table = (int32_t *)
> + current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
> + l_data.weights = (USTRING_TYPE *)
> + current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
> + l_data.extra = (USTRING_TYPE *)
> + current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
> + l_data.indirect = (int32_t *)
> + current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; +
> + assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0])
> == 0);
> + assert (((uintptr_t) l_data.weights) % __alignof__
> (l_data.weights[0]) == 0);
> + assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0])
> == 0);
> + assert (((uintptr_t) l_data.indirect) % __alignof__
> (l_data.indirect[0]) == 0); +
> + /* We need the elements of the string as unsigned values since they
> + are used as indeces. */
> + const USTRING_TYPE *usrc = (const USTRING_TYPE *) src;
> +
> + /* Allocate cache for small strings on the stack and fill it with
> weight and
> + rule indices. If the cache size is not sufficient, continue
> with the
> + uncached xfrm version. */
> + size_t idxmax = 0;
> + const USTRING_TYPE *cur = usrc;
> + int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t));
> + unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1);
> +
> + do
> + {
> + int32_t tmp = findidx (l_data.table, l_data.indirect,
> l_data.extra, &cur,
> + -1);
> + rulearr[idxmax] = tmp >> 24;
> + idxarr[idxmax] = tmp & 0xffffff;
> +
> + ++idxmax;
> + }
> + while (*cur != L('\0') && idxmax < SMALL_STR_SIZE);
> +
> + /* This element is only read, the value never used but to determine
> + another value which then is ignored. */
> + rulearr[idxmax] = '\0';
> +
> + /* Do the transformation. */
> + if (*cur == L('\0'))
> + return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr,
> rulearr);
> + else
> + return do_xfrm (usrc, dest, n, &l_data);
> +}
> libc_hidden_def (STRXFRM)
>
> #ifndef WIDE_CHAR_VERSION
@@ -53,11 +53,18 @@ for l in $lang; do
${common_objpfx}localedata/xfrm-test $id < $cns.in \
> ${common_objpfx}localedata/$cns.xout || here=1
cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1
+ ${test_program_prefix_before_env} \
+ ${run_program_env} \
+ LC_ALL=$l ${test_program_prefix_after_env} \
+ ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \
+ > ${common_objpfx}localedata/$cns.nocache.xout || here=1
+ cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout || here=1
if test $here -eq 0; then
echo "$l xfrm-test OK"
else
echo "$l xfrm-test FAIL"
diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/ /'
+ diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout | sed 's/^/ /'
status=1
fi
done
@@ -23,7 +23,10 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <stdbool.h>
+/* Keep in sync with string/strxfrm_l.c. */
+#define SMALL_STR_SIZE 4095
struct lines
{
@@ -37,6 +40,7 @@ int
main (int argc, char *argv[])
{
int result = 0;
+ bool nocache = false;
size_t nstrings, nstrings_max;
struct lines *strings;
char *line = NULL;
@@ -44,7 +48,18 @@ main (int argc, char *argv[])
size_t n;
if (argc < 2)
- error (1, 0, "usage: %s <random seed>", argv[0]);
+ error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]);
+
+ if (argc == 3)
+ {
+ if (strcmp (argv[2], "-nocache") == 0)
+ nocache = true;
+ else
+ {
+ printf ("Unknown option %s!\n", argv[2]);
+ exit (1);
+ }
+ }
setlocale (LC_ALL, "");
@@ -59,9 +74,9 @@ main (int argc, char *argv[])
while (1)
{
- char saved, *newp;
- int needed;
- int l;
+ char saved, *word, *newp;
+ size_t l, line_len, needed;
+
if (getline (&line, &len, stdin) < 0)
break;
@@ -83,10 +98,35 @@ main (int argc, char *argv[])
saved = line[l];
line[l] = '\0';
- needed = strxfrm (NULL, line, 0);
+
+ if (nocache)
+ {
+ line_len = strlen (line);
+ word = malloc (line_len + SMALL_STR_SIZE + 1);
+ if (word == NULL)
+ {
+ printf ("malloc failed: %m\n");
+ exit (1);
+ }
+ memset (word, ' ', SMALL_STR_SIZE);
+ memcpy (word + SMALL_STR_SIZE, line, line_len);
+ word[line_len + SMALL_STR_SIZE] = '\0';
+ }
+ else
+ word = line;
+
+ needed = strxfrm (NULL, word, 0);
newp = malloc (needed + 1);
- strxfrm (newp, line, needed + 1);
+ if (newp == NULL)
+ {
+ printf ("malloc failed: %m\n");
+ exit (1);
+ }
+ strxfrm (newp, word, needed + 1);
strings[nstrings].xfrm = newp;
+
+ if (nocache)
+ free (word);
line[l] = saved;
++nstrings;
}
@@ -40,9 +40,24 @@
#define CONCAT(a,b) CONCAT1(a,b)
#define CONCAT1(a,b) a##b
+/* Maximum string size that is calculated with cached indices. Right now this
+ is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 has to be
+ lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in sync. */
+#define SMALL_STR_SIZE 4095
+
#include "../locale/localeinfo.h"
#include WEIGHT_H
+/* Group locale data for shorter parameter lists. */
+typedef struct
+{
+ uint_fast32_t nrules;
+ unsigned char *rulesets;
+ USTRING_TYPE *weights;
+ int32_t *table;
+ USTRING_TYPE *extra;
+ int32_t *indirect;
+} locale_data_t;
#ifndef WIDE_CHAR_VERSION
@@ -81,113 +96,325 @@ utf8_encode (char *buf, int val)
}
#endif
+/* Find next weight and rule index. Inlined since called for every char. */
+static __always_inline size_t
+find_idx (const USTRING_TYPE **us, int32_t *weight_idx,
+ unsigned char *rule_idx, const locale_data_t *l_data, const int pass)
+{
+ int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us,
+ -1);
+ *rule_idx = tmp >> 24;
+ int32_t idx = tmp & 0xffffff;
+ size_t len = l_data->weights[idx++];
+
+ /* Skip over indices of previous levels. */
+ for (int i = 0; i < pass; i++)
+ {
+ idx += len;
+ len = l_data->weights[idx++];
+ }
-size_t
-STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
+ *weight_idx = idx;
+ return len;
+}
+
+static int
+find_position (const USTRING_TYPE *us, const locale_data_t *l_data,
+ const int pass)
{
- struct __locale_data *current = l->__locales[LC_COLLATE];
- uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
- /* We don't assign the following values right away since it might be
- unnecessary in case there are no rules. */
- const unsigned char *rulesets;
- const int32_t *table;
- const USTRING_TYPE *weights;
- const USTRING_TYPE *extra;
- const int32_t *indirect;
+ int32_t weight_idx;
+ unsigned char rule_idx;
+ const USTRING_TYPE *usrc = us;
+
+ find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass);
+ return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position;
+}
+
+/* Do the transformation. */
+static size_t
+do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n,
+ const locale_data_t *l_data)
+{
+ int32_t weight_idx;
+ unsigned char rule_idx;
uint_fast32_t pass;
- size_t needed;
+ size_t needed = 0;
size_t last_needed;
- const USTRING_TYPE *usrc;
- size_t srclen = STRLEN (src);
- int32_t *idxarr;
- unsigned char *rulearr;
- size_t idxmax;
- size_t idxcnt;
- int use_malloc;
- if (nrules == 0)
+ /* Now the passes over the weights. */
+ for (pass = 0; pass < l_data->nrules; ++pass)
{
- if (n != 0)
- STPNCPY (dest, src, MIN (srclen + 1, n));
+ size_t backw_len = 0;
+ last_needed = needed;
+ const USTRING_TYPE *cur = usrc;
+ const USTRING_TYPE *backw_start = NULL;
- return srclen;
- }
+ /* We assume that if a rule has defined `position' in one section
+ this is true for all of them. */
+ int position = find_position (cur, l_data, pass);
- rulesets = (const unsigned char *)
- current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
- table = (const int32_t *)
- current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
- weights = (const USTRING_TYPE *)
- current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
- extra = (const USTRING_TYPE *)
- current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
- indirect = (const int32_t *)
- current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
- use_malloc = 0;
+ if (position == 0)
+ {
+ while (*cur != L('\0'))
+ {
+ const USTRING_TYPE *pos = cur;
+ size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
+ pass);
+ int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
- assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
- assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
- assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
- assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
+ if ((rule & sort_forward) != 0)
+ {
+ /* Handle the pushed backward sequence. */
+ if (backw_start != NULL)
+ {
+ for (size_t i = backw_len; i > 0; )
+ {
+ int32_t weight_idx;
+ unsigned char rule_idx;
+ size_t len = find_idx (&backw_start, &weight_idx,
+ &rule_idx, l_data, pass);
+ if (needed + i < n)
+ for (size_t j = len; j > 0; j--)
+ dest[needed + i - j] =
+ l_data->weights[weight_idx++];
+
+ i -= len;
+ }
- /* Handle an empty string as a special case. */
- if (srclen == 0)
- {
- if (n != 0)
- *dest = L('\0');
- return 0;
- }
+ needed += backw_len;
+ backw_start = NULL;
+ backw_len = 0;
+ }
- /* We need the elements of the string as unsigned values since they
- are used as indeces. */
- usrc = (const USTRING_TYPE *) src;
-
- /* Perform the first pass over the string and while doing this find
- and store the weights for each character. Since we want this to
- be as fast as possible we are using `alloca' to store the temporary
- values. But since there is no limit on the length of the string
- we have to use `malloc' if the string is too long. We should be
- very conservative here. */
- if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1)))
- {
- idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1));
- rulearr = (unsigned char *) &idxarr[srclen];
-
- if (idxarr == NULL)
- /* No memory. Well, go with the stack then.
-
- XXX Once this implementation is stable we will handle this
- differently. Instead of precomputing the indeces we will
- do this in time. This means, though, that this happens for
- every pass again. */
- goto try_stack;
- use_malloc = 1;
- }
- else
- {
- try_stack:
- idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
- rulearr = (unsigned char *) alloca (srclen + 1);
+ /* Now handle the forward element. */
+ if (needed + len < n)
+ while (len-- > 0)
+ dest[needed++] = l_data->weights[weight_idx++];
+ else
+ /* No more characters fit into the buffer. */
+ needed += len;
+ }
+ else
+ {
+ /* Remember start of the backward sequence & track length. */
+ if (backw_start == NULL)
+ backw_start = pos;
+ backw_len += len;
+ }
+ }
+
+
+ /* Handle the pushed backward sequence. */
+ if (backw_start != NULL)
+ {
+ for (size_t i = backw_len; i > 0; )
+ {
+ size_t len = find_idx (&backw_start, &weight_idx, &rule_idx,
+ l_data, pass);
+ if (needed + i < n)
+ for (size_t j = len; j > 0; j--)
+ dest[needed + i - j] =
+ l_data->weights[weight_idx++];
+
+ i -= len;
+ }
+
+ needed += backw_len;
+ }
+ }
+ else
+ {
+ int val = 1;
+#ifndef WIDE_CHAR_VERSION
+ char buf[7];
+ size_t buflen;
+#endif
+ size_t i;
+
+ while (*cur != L('\0'))
+ {
+ const USTRING_TYPE *pos = cur;
+ size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
+ pass);
+ int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
+
+ if ((rule & sort_forward) != 0)
+ {
+ /* Handle the pushed backward sequence. */
+ if (backw_start != NULL)
+ {
+ for (size_t p = backw_len; p > 0; p--)
+ {
+ size_t len;
+ int32_t weight_idx;
+ unsigned char rule_idx;
+ const USTRING_TYPE *backw_cur = backw_start;
+
+ /* To prevent a warning init the used vars. */
+ len = find_idx (&backw_cur, &weight_idx,
+ &rule_idx, l_data, pass);
+
+ for (i = 1; i < p; i++)
+ len = find_idx (&backw_cur, &weight_idx,
+ &rule_idx, l_data, pass);
+
+ if (len != 0)
+ {
+#ifdef WIDE_CHAR_VERSION
+ if (needed + 1 + len < n)
+ {
+ dest[needed] = val;
+ for (i = 0; i < len; ++i)
+ dest[needed + 1 + i] =
+ l_data->weights[weight_idx + i];
+ }
+ needed += 1 + len;
+#else
+ buflen = utf8_encode (buf, val);
+ if (needed + buflen + len < n)
+ {
+ for (i = 0; i < buflen; ++i)
+ dest[needed + i] = buf[i];
+ for (i = 0; i < len; ++i)
+ dest[needed + buflen + i] =
+ l_data->weights[weight_idx + i];
+ }
+ needed += buflen + len;
+#endif
+ val = 1;
+ }
+ else
+ ++val;
+ }
+
+ backw_start = NULL;
+ backw_len = 0;
+ }
+
+ /* Now handle the forward element. */
+ if (len != 0)
+ {
+#ifdef WIDE_CHAR_VERSION
+ if (needed + 1 + len < n)
+ {
+ dest[needed] = val;
+ for (i = 0; i < len; ++i)
+ dest[needed + 1 + i] =
+ l_data->weights[weight_idx + i];
+ }
+ needed += 1 + len;
+#else
+ buflen = utf8_encode (buf, val);
+ if (needed + buflen + len < n)
+ {
+ for (i = 0; i < buflen; ++i)
+ dest[needed + i] = buf[i];
+ for (i = 0; i < len; ++i)
+ dest[needed + buflen + i] =
+ l_data->weights[weight_idx + i];
+ }
+ needed += buflen + len;
+#endif
+ val = 1;
+ }
+ else
+ ++val;
+ }
+ else
+ {
+ /* Remember start of the backward sequence & track length. */
+ if (backw_start == NULL)
+ backw_start = pos;
+ backw_len++;
+ }
+ }
+
+ /* Handle the pushed backward sequence. */
+ if (backw_start != NULL)
+ {
+ for (size_t p = backw_len; p > 0; p--)
+ {
+ size_t len;
+ int32_t weight_idx;
+ unsigned char rule_idx;
+ const USTRING_TYPE *backw_cur = backw_start;
+
+ /* To prevent a warning init the used vars. */
+ len = find_idx (&backw_cur, &weight_idx,
+ &rule_idx, l_data, pass);
+
+ for (i = 1; i < p; i++)
+ len = find_idx (&backw_cur, &weight_idx,
+ &rule_idx, l_data, pass);
+
+ if (len != 0)
+ {
+#ifdef WIDE_CHAR_VERSION
+ if (needed + 1 + len < n)
+ {
+ dest[needed] = val;
+ for (i = 0; i < len; ++i)
+ dest[needed + 1 + i] =
+ l_data->weights[weight_idx + i];
+ }
+ needed += 1 + len;
+#else
+ buflen = utf8_encode (buf, val);
+ if (needed + buflen + len < n)
+ {
+ for (i = 0; i < buflen; ++i)
+ dest[needed + i] = buf[i];
+ for (i = 0; i < len; ++i)
+ dest[needed + buflen + i] =
+ l_data->weights[weight_idx + i];
+ }
+ needed += buflen + len;
+#endif
+ val = 1;
+ }
+ else
+ ++val;
+ }
+ }
+ }
+
+ /* Finally store the byte to separate the passes or terminate
+ the string. */
+ if (needed < n)
+ dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0');
+ ++needed;
}
- idxmax = 0;
- do
+ /* This is a little optimization: many collation specifications have
+ a `position' rule at the end and if no non-ignored character
+ is found the last \1 byte is immediately followed by a \0 byte
+ signalling this. We can avoid the \1 byte(s). */
+ if (needed > 2 && needed == last_needed + 1)
{
- int32_t tmp = findidx (table, indirect, extra, &usrc, -1);
- rulearr[idxmax] = tmp >> 24;
- idxarr[idxmax] = tmp & 0xffffff;
-
- ++idxmax;
+ /* Remove the \1 byte. */
+ if (--needed <= n)
+ dest[needed - 1] = L('\0');
}
- while (*usrc != L('\0'));
- /* This element is only read, the value never used but to determine
- another value which then is ignored. */
- rulearr[idxmax] = '\0';
+ /* Return the number of bytes/words we need, but don't count the NUL
+ byte/word at the end. */
+ return needed - 1;
+}
+
+/* Do the transformation using weight-index and rule cache. */
+static size_t
+do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data,
+ size_t idxmax, int32_t *idxarr, const unsigned char *rulearr)
+{
+ uint_fast32_t nrules = l_data->nrules;
+ unsigned char *rulesets = l_data->rulesets;
+ USTRING_TYPE *weights = l_data->weights;
+ uint_fast32_t pass;
+ size_t needed = 0;
+ size_t last_needed;
+ size_t idxcnt;
- /* Now the passes over the weights. We now use the indeces we found
- before. */
- needed = 0;
+ /* Now the passes over the weights. */
for (pass = 0; pass < nrules; ++pass)
{
size_t backw_stop = ~0ul;
@@ -433,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
dest[needed - 1] = L('\0');
}
- /* Free the memory if needed. */
- if (use_malloc)
- free (idxarr);
-
/* Return the number of bytes/words we need, but don't count the NUL
byte/word at the end. */
return needed - 1;
}
+
+size_t
+STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
+{
+ locale_data_t l_data;
+ struct __locale_data *current = l->__locales[LC_COLLATE];
+ l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
+
+ /* Handle byte comparison case. */
+ if (l_data.nrules == 0)
+ {
+ size_t srclen = STRLEN (src);
+
+ if (n != 0)
+ STPNCPY (dest, src, MIN (srclen + 1, n));
+
+ return srclen;
+ }
+
+ /* Handle an empty string, code hereafter relies on strlen (src) > 0. */
+ if (*src == L('\0'))
+ {
+ if (n != 0)
+ *dest = L('\0');
+ return 0;
+ }
+
+ /* Get the locale data. */
+ l_data.rulesets = (unsigned char *)
+ current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
+ l_data.table = (int32_t *)
+ current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
+ l_data.weights = (USTRING_TYPE *)
+ current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
+ l_data.extra = (USTRING_TYPE *)
+ current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
+ l_data.indirect = (int32_t *)
+ current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
+
+ assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0);
+ assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0);
+ assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0);
+ assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0);
+
+ /* We need the elements of the string as unsigned values since they
+ are used as indeces. */
+ const USTRING_TYPE *usrc = (const USTRING_TYPE *) src;
+
+ /* Allocate cache for small strings on the stack and fill it with weight and
+ rule indices. If the cache size is not sufficient, continue with the
+ uncached xfrm version. */
+ size_t idxmax = 0;
+ const USTRING_TYPE *cur = usrc;
+ int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t));
+ unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1);
+
+ do
+ {
+ int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur,
+ -1);
+ rulearr[idxmax] = tmp >> 24;
+ idxarr[idxmax] = tmp & 0xffffff;
+
+ ++idxmax;
+ }
+ while (*cur != L('\0') && idxmax < SMALL_STR_SIZE);
+
+ /* This element is only read, the value never used but to determine
+ another value which then is ignored. */
+ rulearr[idxmax] = '\0';
+
+ /* Do the transformation. */
+ if (*cur == L('\0'))
+ return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr);
+ else
+ return do_xfrm (usrc, dest, n, &l_data);
+}
libc_hidden_def (STRXFRM)
#ifndef WIDE_CHAR_VERSION