[BZ,#16009] fix memory handling in strxfrm_l

Message ID	1420973708-4447-1-git-send-email-leonhard.holz@web.de
State	Committed
Headers	Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk Sender: libc-alpha-owner@sourceware.org From: Leonhard Holz <leonhard.holz@web.de> To: libc-alpha@sourceware.org Cc: siddhesh@redhat.com Subject: [PATCH][BZ #16009] fix memory handling in strxfrm_l Date: Sun, 11 Jan 2015 11:55:08 +0100 Message-Id: <1420973708-4447-1-git-send-email-leonhard.holz@web.de>

Message ID

1420973708-4447-1-git-send-email-leonhard.holz@web.de

State

Committed

Headers

Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
Sender: libc-alpha-owner@sourceware.org
From: Leonhard Holz <leonhard.holz@web.de>
To: libc-alpha@sourceware.org
Cc: siddhesh@redhat.com
Subject: [PATCH][BZ #16009] fix memory handling in strxfrm_l
Date: Sun, 11 Jan 2015 11:55:08 +0100
Message-Id: <1420973708-4447-1-git-send-email-leonhard.holz@web.de>

Commit Message

Leonhard Holz Jan. 11, 2015, 10:55 a.m. UTC

  2015-01-11  Leonhard Holz <leonhard.holz@web.de>

	[BZ #16009]
	* string/strxfrm_l.c (STRXFRM): Allocate fixed size cache for weights and
	rules. Use do_xfrm_cached if data fits in cache, do_xfrm otherwise. Moved
	former main loop to...
	* (do_xfrm_cached): New function.
	* (do_xfrm): Non-caching version of do_xfrm_cached. Uses find_idx,
	find_position and stack_push.
	* (find_idx): New function.
	* (find_position): Likewise.
	* localedata/sort-test.sh: Added test run for do_xfrm.
	* localedata/xfrm-test.c (main): Added command line option -nocache to run
	the test with strings that are too large for the STRXFRM cache.

---
 localedata/sort-test.sh |   7 +
 localedata/xfrm-test.c  |  52 +++++-
 string/strxfrm_l.c      | 488 ++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 447 insertions(+), 100 deletions(-)

Comments

Siddhesh Poyarekar Jan. 12, 2015, 12:39 p.m. UTC | #1

Thanks, this looks good to me now.  I'll commit this once Carlos
confirms that I can.

Siddhesh

On Sun, 11 Jan 2015 11:55:08 +0100
Leonhard Holz <leonhard.holz@web.de> wrote:

> 2015-01-11  Leonhard Holz <leonhard.holz@web.de>
> 
> 	[BZ #16009]
> 	* string/strxfrm_l.c (STRXFRM): Allocate fixed size cache for
> weights and rules. Use do_xfrm_cached if data fits in cache, do_xfrm
> otherwise. Moved former main loop to...
> 	* (do_xfrm_cached): New function.
> 	* (do_xfrm): Non-caching version of do_xfrm_cached. Uses
> find_idx, find_position and stack_push.
> 	* (find_idx): New function.
> 	* (find_position): Likewise.
> 	* localedata/sort-test.sh: Added test run for do_xfrm.
> 	* localedata/xfrm-test.c (main): Added command line option
> -nocache to run the test with strings that are too large for the
> STRXFRM cache.
> 
> ---
>  localedata/sort-test.sh |   7 +
>  localedata/xfrm-test.c  |  52 +++++-
>  string/strxfrm_l.c      | 488
> ++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 447
> insertions(+), 100 deletions(-)
> 
> diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh
> index dc23268..1a10262 100644
> --- a/localedata/sort-test.sh
> +++ b/localedata/sort-test.sh
> @@ -53,11 +53,18 @@ for l in $lang; do
>     ${common_objpfx}localedata/xfrm-test $id < $cns.in \
>     > ${common_objpfx}localedata/$cns.xout || here=1
>    cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1
> +  ${test_program_prefix_before_env} \
> +   ${run_program_env} \
> +   LC_ALL=$l ${test_program_prefix_after_env} \
> +   ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \
> +   > ${common_objpfx}localedata/$cns.nocache.xout || here=1
> +  cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout ||
> here=1 if test $here -eq 0; then
>      echo "$l xfrm-test OK"
>    else
>      echo "$l xfrm-test FAIL"
>      diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed
> 's/^/  /'
> +    diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout |
> sed 's/^/  /' status=1
>    fi
>  done
> diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c
> index 79494ba..3ab2140 100644
> --- a/localedata/xfrm-test.c
> +++ b/localedata/xfrm-test.c
> @@ -23,7 +23,10 @@
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> +#include <stdbool.h>
>  
> +/* Keep in sync with string/strxfrm_l.c.  */
> +#define SMALL_STR_SIZE 4095
>  
>  struct lines
>  {
> @@ -37,6 +40,7 @@ int
>  main (int argc, char *argv[])
>  {
>    int result = 0;
> +  bool nocache = false;
>    size_t nstrings, nstrings_max;
>    struct lines *strings;
>    char *line = NULL;
> @@ -44,7 +48,18 @@ main (int argc, char *argv[])
>    size_t n;
>  
>    if (argc < 2)
> -    error (1, 0, "usage: %s <random seed>", argv[0]);
> +    error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]);
> +
> +  if (argc == 3)
> +    {
> +      if (strcmp (argv[2], "-nocache") == 0)
> +	nocache = true;
> +      else
> +	{
> +	  printf ("Unknown option %s!\n", argv[2]);
> +	  exit (1);
> +	}
> +    }
>  
>    setlocale (LC_ALL, "");
>  
> @@ -59,9 +74,9 @@ main (int argc, char *argv[])
>  
>    while (1)
>      {
> -      char saved, *newp;
> -      int needed;
> -      int l;
> +      char saved, *word, *newp;
> +      size_t l, line_len, needed;
> +
>        if (getline (&line, &len, stdin) < 0)
>  	break;
>  
> @@ -83,10 +98,35 @@ main (int argc, char *argv[])
>  
>        saved = line[l];
>        line[l] = '\0';
> -      needed = strxfrm (NULL, line, 0);
> +
> +      if (nocache)
> +	{
> +	  line_len = strlen (line);
> +	  word = malloc (line_len + SMALL_STR_SIZE + 1);
> +	  if (word == NULL)
> +	    {
> +	      printf ("malloc failed: %m\n");
> +	      exit (1);
> +	    }
> +	  memset (word, ' ', SMALL_STR_SIZE);
> +	  memcpy (word + SMALL_STR_SIZE, line, line_len);
> +	  word[line_len + SMALL_STR_SIZE] = '\0';
> +	}
> +      else
> +        word = line;
> +
> +      needed = strxfrm (NULL, word, 0);
>        newp = malloc (needed + 1);
> -      strxfrm (newp, line, needed + 1);
> +      if (newp == NULL)
> +	{
> +	  printf ("malloc failed: %m\n");
> +	  exit (1);
> +	}
> +      strxfrm (newp, word, needed + 1);
>        strings[nstrings].xfrm = newp;
> +
> +      if (nocache)
> +	free (word);
>        line[l] = saved;
>        ++nstrings;
>      }
> diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c
> index 9849447..921d1f7 100644
> --- a/string/strxfrm_l.c
> +++ b/string/strxfrm_l.c
> @@ -40,9 +40,24 @@
>  #define CONCAT(a,b) CONCAT1(a,b)
>  #define CONCAT1(a,b) a##b
>  
> +/* Maximum string size that is calculated with cached indices.
> Right now this
> +   is an arbitrary value open to optimizations.  SMALL_STR_SIZE * 4
> has to be
> +   lower than __MAX_ALLOCA_CUTOFF.  Keep localedata/xfrm-test.c in
> sync.  */ +#define SMALL_STR_SIZE 4095
> +
>  #include "../locale/localeinfo.h"
>  #include WEIGHT_H
>  
> +/* Group locale data for shorter parameter lists.  */
> +typedef struct
> +{
> +  uint_fast32_t nrules;
> +  unsigned char *rulesets;
> +  USTRING_TYPE *weights;
> +  int32_t *table;
> +  USTRING_TYPE *extra;
> +  int32_t *indirect;
> +} locale_data_t;
>  
>  #ifndef WIDE_CHAR_VERSION
>  
> @@ -81,113 +96,325 @@ utf8_encode (char *buf, int val)
>  }
>  #endif
>  
> +/* Find next weight and rule index.  Inlined since called for every
> char.  */ +static __always_inline size_t
> +find_idx (const USTRING_TYPE **us, int32_t *weight_idx,
> +	  unsigned char *rule_idx, const locale_data_t *l_data,
> const int pass) +{
> +  int32_t tmp = findidx (l_data->table, l_data->indirect,
> l_data->extra, us,
> +			 -1);
> +  *rule_idx = tmp >> 24;
> +  int32_t idx = tmp & 0xffffff;
> +  size_t len = l_data->weights[idx++];
> +
> +  /* Skip over indices of previous levels.  */
> +  for (int i = 0; i < pass; i++)
> +    {
> +      idx += len;
> +      len = l_data->weights[idx++];
> +    }
>  
> -size_t
> -STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n,
> __locale_t l)
> +  *weight_idx = idx;
> +  return len;
> +}
> +
> +static int
> +find_position (const USTRING_TYPE *us, const locale_data_t *l_data,
> +	       const int pass)
>  {
> -  struct __locale_data *current = l->__locales[LC_COLLATE];
> -  uint_fast32_t nrules = current->values[_NL_ITEM_INDEX
> (_NL_COLLATE_NRULES)].word;
> -  /* We don't assign the following values right away since it might
> be
> -     unnecessary in case there are no rules.  */
> -  const unsigned char *rulesets;
> -  const int32_t *table;
> -  const USTRING_TYPE *weights;
> -  const USTRING_TYPE *extra;
> -  const int32_t *indirect;
> +  int32_t weight_idx;
> +  unsigned char rule_idx;
> +  const USTRING_TYPE *usrc = us;
> +
> +  find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass);
> +  return l_data->rulesets[rule_idx * l_data->nrules + pass] &
> sort_position; +}
> +
> +/* Do the transformation.  */
> +static size_t
> +do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n,
> +	 const locale_data_t *l_data)
> +{
> +  int32_t weight_idx;
> +  unsigned char rule_idx;
>    uint_fast32_t pass;
> -  size_t needed;
> +  size_t needed = 0;
>    size_t last_needed;
> -  const USTRING_TYPE *usrc;
> -  size_t srclen = STRLEN (src);
> -  int32_t *idxarr;
> -  unsigned char *rulearr;
> -  size_t idxmax;
> -  size_t idxcnt;
> -  int use_malloc;
>  
> -  if (nrules == 0)
> +  /* Now the passes over the weights.  */
> +  for (pass = 0; pass < l_data->nrules; ++pass)
>      {
> -      if (n != 0)
> -	STPNCPY (dest, src, MIN (srclen + 1, n));
> +      size_t backw_len = 0;
> +      last_needed = needed;
> +      const USTRING_TYPE *cur = usrc;
> +      const USTRING_TYPE *backw_start = NULL;
>  
> -      return srclen;
> -    }
> +       /* We assume that if a rule has defined `position' in one
> section
> +         this is true for all of them.  */
> +      int position = find_position (cur, l_data, pass);
>  
> -  rulesets = (const unsigned char *)
> -    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
> -  table = (const int32_t *)
> -    current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
> -  weights = (const USTRING_TYPE *)
> -    current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
> -  extra = (const USTRING_TYPE *)
> -    current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
> -  indirect = (const int32_t *)
> -    current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
> -  use_malloc = 0;
> +      if (position == 0)
> +	{
> +	  while (*cur != L('\0'))
> +	    {
> +	      const USTRING_TYPE *pos = cur;
> +	      size_t len = find_idx (&cur, &weight_idx, &rule_idx,
> l_data,
> +				     pass);
> +	      int rule = l_data->rulesets[rule_idx * l_data->nrules
> + pass]; 
> -  assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
> -  assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
> -  assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
> -  assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
> +	      if ((rule & sort_forward) != 0)
> +		{
> +		  /* Handle the pushed backward sequence.  */
> +		  if (backw_start != NULL)
> +		    {
> +		      for (size_t i = backw_len; i > 0; )
> +			{
> +			  int32_t weight_idx;
> +			  unsigned char rule_idx;
> +			  size_t len = find_idx (&backw_start,
> &weight_idx,
> +						 &rule_idx, l_data,
> pass);
> +			  if (needed + i < n)
> +			    for (size_t j = len; j > 0; j--)
> +			      dest[needed + i - j] =
> +				l_data->weights[weight_idx++];
> +
> +			  i -= len;
> +			}
>  
> -  /* Handle an empty string as a special case.  */
> -  if (srclen == 0)
> -    {
> -      if (n != 0)
> -	*dest = L('\0');
> -      return 0;
> -    }
> +		      needed += backw_len;
> +		      backw_start = NULL;
> +		      backw_len = 0;
> +		    }
>  
> -  /* We need the elements of the string as unsigned values since they
> -     are used as indeces.  */
> -  usrc = (const USTRING_TYPE *) src;
> -
> -  /* Perform the first pass over the string and while doing this find
> -     and store the weights for each character.  Since we want this to
> -     be as fast as possible we are using `alloca' to store the
> temporary
> -     values.  But since there is no limit on the length of the string
> -     we have to use `malloc' if the string is too long.  We should be
> -     very conservative here.  */
> -  if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1)))
> -    {
> -      idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t)
> + 1));
> -      rulearr = (unsigned char *) &idxarr[srclen];
> -
> -      if (idxarr == NULL)
> -	/* No memory.  Well, go with the stack then.
> -
> -	   XXX Once this implementation is stable we will handle this
> -	   differently.  Instead of precomputing the indeces we will
> -	   do this in time.  This means, though, that this happens
> for
> -	   every pass again.  */
> -	goto try_stack;
> -      use_malloc = 1;
> -    }
> -  else
> -    {
> -    try_stack:
> -      idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
> -      rulearr = (unsigned char *) alloca (srclen + 1);
> +		  /* Now handle the forward element.  */
> +		  if (needed + len < n)
> +		    while (len-- > 0)
> +		      dest[needed++] = l_data->weights[weight_idx++];
> +		  else
> +		    /* No more characters fit into the buffer.  */
> +		    needed += len;
> +		}
> +	      else
> +		{
> +		  /* Remember start of the backward sequence & track
> length.  */
> +		  if (backw_start == NULL)
> +		    backw_start = pos;
> +		  backw_len += len;
> +		}
> +	    }
> +
> +
> +	  /* Handle the pushed backward sequence.  */
> +	  if (backw_start != NULL)
> +	    {
> +	      for (size_t i = backw_len; i > 0; )
> +		{
> +		  size_t len = find_idx (&backw_start, &weight_idx,
> &rule_idx,
> +					 l_data, pass);
> +		  if (needed + i < n)
> +		    for (size_t j = len; j > 0; j--)
> +		      dest[needed + i - j] =
> +			l_data->weights[weight_idx++];
> +
> +		  i -= len;
> +		}
> +
> +	      needed += backw_len;
> +	    }
> +	}
> +      else
> +	{
> +	  int val = 1;
> +#ifndef WIDE_CHAR_VERSION
> +	  char buf[7];
> +	  size_t buflen;
> +#endif
> +	  size_t i;
> +
> +	  while (*cur != L('\0'))
> +	    {
> +	      const USTRING_TYPE *pos = cur;
> +	      size_t len = find_idx (&cur, &weight_idx, &rule_idx,
> l_data,
> +				     pass);
> +	      int rule = l_data->rulesets[rule_idx * l_data->nrules
> + pass]; +
> +	      if ((rule & sort_forward) != 0)
> +		{
> +		  /* Handle the pushed backward sequence.  */
> +		  if (backw_start != NULL)
> +		    {
> +		      for (size_t p = backw_len; p > 0; p--)
> +			{
> +			  size_t len;
> +			  int32_t weight_idx;
> +			  unsigned char rule_idx;
> +			  const USTRING_TYPE *backw_cur =
> backw_start; +
> +			  /* To prevent a warning init the used
> vars.  */
> +			  len = find_idx (&backw_cur, &weight_idx,
> +					  &rule_idx, l_data, pass);
> +
> +			  for (i = 1; i < p; i++)
> +			    len = find_idx (&backw_cur, &weight_idx,
> +					    &rule_idx, l_data, pass);
> +
> +			  if (len != 0)
> +			    {
> +#ifdef WIDE_CHAR_VERSION
> +			      if (needed + 1 + len < n)
> +				{
> +				  dest[needed] = val;
> +				  for (i = 0; i < len; ++i)
> +				    dest[needed + 1 + i] =
> +				      l_data->weights[weight_idx +
> i];
> +				}
> +			      needed += 1 + len;
> +#else
> +			      buflen = utf8_encode (buf, val);
> +			      if (needed + buflen + len < n)
> +				{
> +				  for (i = 0; i < buflen; ++i)
> +				    dest[needed + i] = buf[i];
> +				  for (i = 0; i < len; ++i)
> +				    dest[needed + buflen + i] =
> +				      l_data->weights[weight_idx +
> i];
> +				}
> +			      needed += buflen + len;
> +#endif
> +			      val = 1;
> +			    }
> +			  else
> +			    ++val;
> +			}
> +
> +		      backw_start = NULL;
> +		      backw_len = 0;
> +		    }
> +
> +		  /* Now handle the forward element.  */
> +		  if (len != 0)
> +		    {
> +#ifdef WIDE_CHAR_VERSION
> +		      if (needed + 1 + len < n)
> +			{
> +			  dest[needed] = val;
> +			  for (i = 0; i < len; ++i)
> +			    dest[needed + 1 + i] =
> +			      l_data->weights[weight_idx + i];
> +			}
> +		      needed += 1 + len;
> +#else
> +		      buflen = utf8_encode (buf, val);
> +		      if (needed + buflen + len < n)
> +			{
> +			  for (i = 0; i < buflen; ++i)
> +			    dest[needed + i] = buf[i];
> +			  for (i = 0; i < len; ++i)
> +			    dest[needed + buflen + i] =
> +			      l_data->weights[weight_idx + i];
> +			}
> +		      needed += buflen + len;
> +#endif
> +		      val = 1;
> +		    }
> +		  else
> +		    ++val;
> +		}
> +	      else
> +		{
> +		  /* Remember start of the backward sequence & track
> length.  */
> +		  if (backw_start == NULL)
> +		    backw_start = pos;
> +		  backw_len++;
> +		}
> +	    }
> +
> +	  /* Handle the pushed backward sequence.  */
> +	  if (backw_start != NULL)
> +	    {
> +	      for (size_t p = backw_len; p > 0; p--)
> +		{
> +		  size_t len;
> +		  int32_t weight_idx;
> +		  unsigned char rule_idx;
> +		  const USTRING_TYPE *backw_cur = backw_start;
> +
> +		  /* To prevent a warning init the used vars.  */
> +		  len = find_idx (&backw_cur, &weight_idx,
> +				  &rule_idx, l_data, pass);
> +
> +		  for (i = 1; i < p; i++)
> +		    len = find_idx (&backw_cur, &weight_idx,
> +				    &rule_idx, l_data, pass);
> +
> +		  if (len != 0)
> +		    {
> +#ifdef WIDE_CHAR_VERSION
> +		      if (needed + 1 + len < n)
> +			{
> +			  dest[needed] = val;
> +			  for (i = 0; i < len; ++i)
> +			    dest[needed + 1 + i] =
> +			      l_data->weights[weight_idx + i];
> +			}
> +		      needed += 1 + len;
> +#else
> +		      buflen = utf8_encode (buf, val);
> +		      if (needed + buflen + len < n)
> +			{
> +			  for (i = 0; i < buflen; ++i)
> +			    dest[needed + i] = buf[i];
> +			  for (i = 0; i < len; ++i)
> +			    dest[needed + buflen + i] =
> +			      l_data->weights[weight_idx + i];
> +			}
> +		      needed += buflen + len;
> +#endif
> +		      val = 1;
> +		    }
> +		  else
> +		    ++val;
> +		}
> +	    }
> +	}
> +
> +      /* Finally store the byte to separate the passes or terminate
> +	 the string.  */
> +      if (needed < n)
> +	dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0');
> +      ++needed;
>      }
>  
> -  idxmax = 0;
> -  do
> +  /* This is a little optimization: many collation specifications
> have
> +     a `position' rule at the end and if no non-ignored character
> +     is found the last \1 byte is immediately followed by a \0 byte
> +     signalling this.  We can avoid the \1 byte(s).  */
> +  if (needed > 2 && needed == last_needed + 1)
>      {
> -      int32_t tmp = findidx (table, indirect, extra, &usrc, -1);
> -      rulearr[idxmax] = tmp >> 24;
> -      idxarr[idxmax] = tmp & 0xffffff;
> -
> -      ++idxmax;
> +      /* Remove the \1 byte.  */
> +      if (--needed <= n)
> +	dest[needed - 1] = L('\0');
>      }
> -  while (*usrc != L('\0'));
>  
> -  /* This element is only read, the value never used but to determine
> -     another value which then is ignored.  */
> -  rulearr[idxmax] = '\0';
> +  /* Return the number of bytes/words we need, but don't count the
> NUL
> +     byte/word at the end.  */
> +  return needed - 1;
> +}
> +
> +/* Do the transformation using weight-index and rule cache.  */
> +static size_t
> +do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t
> *l_data,
> +		size_t idxmax, int32_t *idxarr, const unsigned char
> *rulearr) +{
> +  uint_fast32_t nrules = l_data->nrules;
> +  unsigned char *rulesets = l_data->rulesets;
> +  USTRING_TYPE *weights = l_data->weights;
> +  uint_fast32_t pass;
> +  size_t needed = 0;
> +  size_t last_needed;
> +  size_t idxcnt;
>  
> -  /* Now the passes over the weights.  We now use the indeces we
> found
> -     before.  */
> -  needed = 0;
> +  /* Now the passes over the weights.  */
>    for (pass = 0; pass < nrules; ++pass)
>      {
>        size_t backw_stop = ~0ul;
> @@ -433,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE
> *src, size_t n, __locale_t l) dest[needed - 1] = L('\0');
>      }
>  
> -  /* Free the memory if needed.  */
> -  if (use_malloc)
> -    free (idxarr);
> -
>    /* Return the number of bytes/words we need, but don't count the
> NUL byte/word at the end.  */
>    return needed - 1;
>  }
> +
> +size_t
> +STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n,
> __locale_t l) +{
> +  locale_data_t l_data;
> +  struct __locale_data *current = l->__locales[LC_COLLATE];
> +  l_data.nrules = current->values[_NL_ITEM_INDEX
> (_NL_COLLATE_NRULES)].word; +
> +  /* Handle byte comparison case.  */
> +  if (l_data.nrules == 0)
> +    {
> +      size_t srclen = STRLEN (src);
> +
> +      if (n != 0)
> +	STPNCPY (dest, src, MIN (srclen + 1, n));
> +
> +      return srclen;
> +    }
> +
> +  /* Handle an empty string, code hereafter relies on strlen (src) >
> 0.  */
> +  if (*src == L('\0'))
> +    {
> +      if (n != 0)
> +	*dest = L('\0');
> +      return 0;
> +    }
> +
> +  /* Get the locale data.  */
> +  l_data.rulesets = (unsigned char *)
> +    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
> +  l_data.table = (int32_t *)
> +    current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
> +  l_data.weights = (USTRING_TYPE *)
> +    current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
> +  l_data.extra = (USTRING_TYPE *)
> +    current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
> +  l_data.indirect = (int32_t *)
> +    current->values[_NL_ITEM_INDEX
> (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; +
> +  assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0])
> == 0);
> +  assert (((uintptr_t) l_data.weights) % __alignof__
> (l_data.weights[0]) == 0);
> +  assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0])
> == 0);
> +  assert (((uintptr_t) l_data.indirect) % __alignof__
> (l_data.indirect[0]) == 0); +
> +  /* We need the elements of the string as unsigned values since they
> +     are used as indeces.  */
> +  const USTRING_TYPE *usrc = (const USTRING_TYPE *) src;
> +
> +  /* Allocate cache for small strings on the stack and fill it with
> weight and
> +     rule indices.  If the cache size is not sufficient, continue
> with the
> +     uncached xfrm version.  */
> +  size_t idxmax = 0;
> +  const USTRING_TYPE *cur = usrc;
> +  int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t));
> +  unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1);
> +
> +  do
> +    {
> +      int32_t tmp = findidx (l_data.table, l_data.indirect,
> l_data.extra, &cur,
> +			     -1);
> +      rulearr[idxmax] = tmp >> 24;
> +      idxarr[idxmax] = tmp & 0xffffff;
> +
> +      ++idxmax;
> +    }
> +  while (*cur != L('\0') && idxmax < SMALL_STR_SIZE);
> +
> +  /* This element is only read, the value never used but to determine
> +     another value which then is ignored.  */
> +  rulearr[idxmax] = '\0';
> +
> +  /* Do the transformation.  */
> +  if (*cur == L('\0'))
> +    return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr,
> rulearr);
> +  else
> +    return do_xfrm (usrc, dest, n, &l_data);
> +}
>  libc_hidden_def (STRXFRM)
>  
>  #ifndef WIDE_CHAR_VERSION

diff mbox

Patch

diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh
index dc23268..1a10262 100644
--- a/localedata/sort-test.sh
+++ b/localedata/sort-test.sh
@@ -53,11 +53,18 @@  for l in $lang; do
    ${common_objpfx}localedata/xfrm-test $id < $cns.in \
    > ${common_objpfx}localedata/$cns.xout || here=1
   cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1
+  ${test_program_prefix_before_env} \
+   ${run_program_env} \
+   LC_ALL=$l ${test_program_prefix_after_env} \
+   ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \
+   > ${common_objpfx}localedata/$cns.nocache.xout || here=1
+  cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout || here=1
   if test $here -eq 0; then
     echo "$l xfrm-test OK"
   else
     echo "$l xfrm-test FAIL"
     diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/  /'
+    diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout | sed 's/^/  /'
     status=1
   fi
 done
diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c
index 79494ba..3ab2140 100644
--- a/localedata/xfrm-test.c
+++ b/localedata/xfrm-test.c
@@ -23,7 +23,10 @@ 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>
 
+/* Keep in sync with string/strxfrm_l.c.  */
+#define SMALL_STR_SIZE 4095
 
 struct lines
 {
@@ -37,6 +40,7 @@  int
 main (int argc, char *argv[])
 {
   int result = 0;
+  bool nocache = false;
   size_t nstrings, nstrings_max;
   struct lines *strings;
   char *line = NULL;
@@ -44,7 +48,18 @@  main (int argc, char *argv[])
   size_t n;
 
   if (argc < 2)
-    error (1, 0, "usage: %s <random seed>", argv[0]);
+    error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]);
+
+  if (argc == 3)
+    {
+      if (strcmp (argv[2], "-nocache") == 0)
+	nocache = true;
+      else
+	{
+	  printf ("Unknown option %s!\n", argv[2]);
+	  exit (1);
+	}
+    }
 
   setlocale (LC_ALL, "");
 
@@ -59,9 +74,9 @@  main (int argc, char *argv[])
 
   while (1)
     {
-      char saved, *newp;
-      int needed;
-      int l;
+      char saved, *word, *newp;
+      size_t l, line_len, needed;
+
       if (getline (&line, &len, stdin) < 0)
 	break;
 
@@ -83,10 +98,35 @@  main (int argc, char *argv[])
 
       saved = line[l];
       line[l] = '\0';
-      needed = strxfrm (NULL, line, 0);
+
+      if (nocache)
+	{
+	  line_len = strlen (line);
+	  word = malloc (line_len + SMALL_STR_SIZE + 1);
+	  if (word == NULL)
+	    {
+	      printf ("malloc failed: %m\n");
+	      exit (1);
+	    }
+	  memset (word, ' ', SMALL_STR_SIZE);
+	  memcpy (word + SMALL_STR_SIZE, line, line_len);
+	  word[line_len + SMALL_STR_SIZE] = '\0';
+	}
+      else
+        word = line;
+
+      needed = strxfrm (NULL, word, 0);
       newp = malloc (needed + 1);
-      strxfrm (newp, line, needed + 1);
+      if (newp == NULL)
+	{
+	  printf ("malloc failed: %m\n");
+	  exit (1);
+	}
+      strxfrm (newp, word, needed + 1);
       strings[nstrings].xfrm = newp;
+
+      if (nocache)
+	free (word);
       line[l] = saved;
       ++nstrings;
     }
diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c
index 9849447..921d1f7 100644
--- a/string/strxfrm_l.c
+++ b/string/strxfrm_l.c
@@ -40,9 +40,24 @@ 
 #define CONCAT(a,b) CONCAT1(a,b)
 #define CONCAT1(a,b) a##b
 
+/* Maximum string size that is calculated with cached indices.  Right now this
+   is an arbitrary value open to optimizations.  SMALL_STR_SIZE * 4 has to be
+   lower than __MAX_ALLOCA_CUTOFF.  Keep localedata/xfrm-test.c in sync.  */
+#define SMALL_STR_SIZE 4095
+
 #include "../locale/localeinfo.h"
 #include WEIGHT_H
 
+/* Group locale data for shorter parameter lists.  */
+typedef struct
+{
+  uint_fast32_t nrules;
+  unsigned char *rulesets;
+  USTRING_TYPE *weights;
+  int32_t *table;
+  USTRING_TYPE *extra;
+  int32_t *indirect;
+} locale_data_t;
 
 #ifndef WIDE_CHAR_VERSION
 
@@ -81,113 +96,325 @@  utf8_encode (char *buf, int val)
 }
 #endif
 
+/* Find next weight and rule index.  Inlined since called for every char.  */
+static __always_inline size_t
+find_idx (const USTRING_TYPE **us, int32_t *weight_idx,
+	  unsigned char *rule_idx, const locale_data_t *l_data, const int pass)
+{
+  int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us,
+			 -1);
+  *rule_idx = tmp >> 24;
+  int32_t idx = tmp & 0xffffff;
+  size_t len = l_data->weights[idx++];
+
+  /* Skip over indices of previous levels.  */
+  for (int i = 0; i < pass; i++)
+    {
+      idx += len;
+      len = l_data->weights[idx++];
+    }
 
-size_t
-STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
+  *weight_idx = idx;
+  return len;
+}
+
+static int
+find_position (const USTRING_TYPE *us, const locale_data_t *l_data,
+	       const int pass)
 {
-  struct __locale_data *current = l->__locales[LC_COLLATE];
-  uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
-  /* We don't assign the following values right away since it might be
-     unnecessary in case there are no rules.  */
-  const unsigned char *rulesets;
-  const int32_t *table;
-  const USTRING_TYPE *weights;
-  const USTRING_TYPE *extra;
-  const int32_t *indirect;
+  int32_t weight_idx;
+  unsigned char rule_idx;
+  const USTRING_TYPE *usrc = us;
+
+  find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass);
+  return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position;
+}
+
+/* Do the transformation.  */
+static size_t
+do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n,
+	 const locale_data_t *l_data)
+{
+  int32_t weight_idx;
+  unsigned char rule_idx;
   uint_fast32_t pass;
-  size_t needed;
+  size_t needed = 0;
   size_t last_needed;
-  const USTRING_TYPE *usrc;
-  size_t srclen = STRLEN (src);
-  int32_t *idxarr;
-  unsigned char *rulearr;
-  size_t idxmax;
-  size_t idxcnt;
-  int use_malloc;
 
-  if (nrules == 0)
+  /* Now the passes over the weights.  */
+  for (pass = 0; pass < l_data->nrules; ++pass)
     {
-      if (n != 0)
-	STPNCPY (dest, src, MIN (srclen + 1, n));
+      size_t backw_len = 0;
+      last_needed = needed;
+      const USTRING_TYPE *cur = usrc;
+      const USTRING_TYPE *backw_start = NULL;
 
-      return srclen;
-    }
+       /* We assume that if a rule has defined `position' in one section
+         this is true for all of them.  */
+      int position = find_position (cur, l_data, pass);
 
-  rulesets = (const unsigned char *)
-    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
-  table = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
-  weights = (const USTRING_TYPE *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
-  extra = (const USTRING_TYPE *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
-  indirect = (const int32_t *)
-    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
-  use_malloc = 0;
+      if (position == 0)
+	{
+	  while (*cur != L('\0'))
+	    {
+	      const USTRING_TYPE *pos = cur;
+	      size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
+				     pass);
+	      int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
 
-  assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
-  assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
-  assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
-  assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
+	      if ((rule & sort_forward) != 0)
+		{
+		  /* Handle the pushed backward sequence.  */
+		  if (backw_start != NULL)
+		    {
+		      for (size_t i = backw_len; i > 0; )
+			{
+			  int32_t weight_idx;
+			  unsigned char rule_idx;
+			  size_t len = find_idx (&backw_start, &weight_idx,
+						 &rule_idx, l_data, pass);
+			  if (needed + i < n)
+			    for (size_t j = len; j > 0; j--)
+			      dest[needed + i - j] =
+				l_data->weights[weight_idx++];
+
+			  i -= len;
+			}
 
-  /* Handle an empty string as a special case.  */
-  if (srclen == 0)
-    {
-      if (n != 0)
-	*dest = L('\0');
-      return 0;
-    }
+		      needed += backw_len;
+		      backw_start = NULL;
+		      backw_len = 0;
+		    }
 
-  /* We need the elements of the string as unsigned values since they
-     are used as indeces.  */
-  usrc = (const USTRING_TYPE *) src;
-
-  /* Perform the first pass over the string and while doing this find
-     and store the weights for each character.  Since we want this to
-     be as fast as possible we are using `alloca' to store the temporary
-     values.  But since there is no limit on the length of the string
-     we have to use `malloc' if the string is too long.  We should be
-     very conservative here.  */
-  if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1)))
-    {
-      idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1));
-      rulearr = (unsigned char *) &idxarr[srclen];
-
-      if (idxarr == NULL)
-	/* No memory.  Well, go with the stack then.
-
-	   XXX Once this implementation is stable we will handle this
-	   differently.  Instead of precomputing the indeces we will
-	   do this in time.  This means, though, that this happens for
-	   every pass again.  */
-	goto try_stack;
-      use_malloc = 1;
-    }
-  else
-    {
-    try_stack:
-      idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
-      rulearr = (unsigned char *) alloca (srclen + 1);
+		  /* Now handle the forward element.  */
+		  if (needed + len < n)
+		    while (len-- > 0)
+		      dest[needed++] = l_data->weights[weight_idx++];
+		  else
+		    /* No more characters fit into the buffer.  */
+		    needed += len;
+		}
+	      else
+		{
+		  /* Remember start of the backward sequence & track length.  */
+		  if (backw_start == NULL)
+		    backw_start = pos;
+		  backw_len += len;
+		}
+	    }
+
+
+	  /* Handle the pushed backward sequence.  */
+	  if (backw_start != NULL)
+	    {
+	      for (size_t i = backw_len; i > 0; )
+		{
+		  size_t len = find_idx (&backw_start, &weight_idx, &rule_idx,
+					 l_data, pass);
+		  if (needed + i < n)
+		    for (size_t j = len; j > 0; j--)
+		      dest[needed + i - j] =
+			l_data->weights[weight_idx++];
+
+		  i -= len;
+		}
+
+	      needed += backw_len;
+	    }
+	}
+      else
+	{
+	  int val = 1;
+#ifndef WIDE_CHAR_VERSION
+	  char buf[7];
+	  size_t buflen;
+#endif
+	  size_t i;
+
+	  while (*cur != L('\0'))
+	    {
+	      const USTRING_TYPE *pos = cur;
+	      size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
+				     pass);
+	      int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
+
+	      if ((rule & sort_forward) != 0)
+		{
+		  /* Handle the pushed backward sequence.  */
+		  if (backw_start != NULL)
+		    {
+		      for (size_t p = backw_len; p > 0; p--)
+			{
+			  size_t len;
+			  int32_t weight_idx;
+			  unsigned char rule_idx;
+			  const USTRING_TYPE *backw_cur = backw_start;
+
+			  /* To prevent a warning init the used vars.  */
+			  len = find_idx (&backw_cur, &weight_idx,
+					  &rule_idx, l_data, pass);
+
+			  for (i = 1; i < p; i++)
+			    len = find_idx (&backw_cur, &weight_idx,
+					    &rule_idx, l_data, pass);
+
+			  if (len != 0)
+			    {
+#ifdef WIDE_CHAR_VERSION
+			      if (needed + 1 + len < n)
+				{
+				  dest[needed] = val;
+				  for (i = 0; i < len; ++i)
+				    dest[needed + 1 + i] =
+				      l_data->weights[weight_idx + i];
+				}
+			      needed += 1 + len;
+#else
+			      buflen = utf8_encode (buf, val);
+			      if (needed + buflen + len < n)
+				{
+				  for (i = 0; i < buflen; ++i)
+				    dest[needed + i] = buf[i];
+				  for (i = 0; i < len; ++i)
+				    dest[needed + buflen + i] =
+				      l_data->weights[weight_idx + i];
+				}
+			      needed += buflen + len;
+#endif
+			      val = 1;
+			    }
+			  else
+			    ++val;
+			}
+
+		      backw_start = NULL;
+		      backw_len = 0;
+		    }
+
+		  /* Now handle the forward element.  */
+		  if (len != 0)
+		    {
+#ifdef WIDE_CHAR_VERSION
+		      if (needed + 1 + len < n)
+			{
+			  dest[needed] = val;
+			  for (i = 0; i < len; ++i)
+			    dest[needed + 1 + i] =
+			      l_data->weights[weight_idx + i];
+			}
+		      needed += 1 + len;
+#else
+		      buflen = utf8_encode (buf, val);
+		      if (needed + buflen + len < n)
+			{
+			  for (i = 0; i < buflen; ++i)
+			    dest[needed + i] = buf[i];
+			  for (i = 0; i < len; ++i)
+			    dest[needed + buflen + i] =
+			      l_data->weights[weight_idx + i];
+			}
+		      needed += buflen + len;
+#endif
+		      val = 1;
+		    }
+		  else
+		    ++val;
+		}
+	      else
+		{
+		  /* Remember start of the backward sequence & track length.  */
+		  if (backw_start == NULL)
+		    backw_start = pos;
+		  backw_len++;
+		}
+	    }
+
+	  /* Handle the pushed backward sequence.  */
+	  if (backw_start != NULL)
+	    {
+	      for (size_t p = backw_len; p > 0; p--)
+		{
+		  size_t len;
+		  int32_t weight_idx;
+		  unsigned char rule_idx;
+		  const USTRING_TYPE *backw_cur = backw_start;
+
+		  /* To prevent a warning init the used vars.  */
+		  len = find_idx (&backw_cur, &weight_idx,
+				  &rule_idx, l_data, pass);
+
+		  for (i = 1; i < p; i++)
+		    len = find_idx (&backw_cur, &weight_idx,
+				    &rule_idx, l_data, pass);
+
+		  if (len != 0)
+		    {
+#ifdef WIDE_CHAR_VERSION
+		      if (needed + 1 + len < n)
+			{
+			  dest[needed] = val;
+			  for (i = 0; i < len; ++i)
+			    dest[needed + 1 + i] =
+			      l_data->weights[weight_idx + i];
+			}
+		      needed += 1 + len;
+#else
+		      buflen = utf8_encode (buf, val);
+		      if (needed + buflen + len < n)
+			{
+			  for (i = 0; i < buflen; ++i)
+			    dest[needed + i] = buf[i];
+			  for (i = 0; i < len; ++i)
+			    dest[needed + buflen + i] =
+			      l_data->weights[weight_idx + i];
+			}
+		      needed += buflen + len;
+#endif
+		      val = 1;
+		    }
+		  else
+		    ++val;
+		}
+	    }
+	}
+
+      /* Finally store the byte to separate the passes or terminate
+	 the string.  */
+      if (needed < n)
+	dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0');
+      ++needed;
     }
 
-  idxmax = 0;
-  do
+  /* This is a little optimization: many collation specifications have
+     a `position' rule at the end and if no non-ignored character
+     is found the last \1 byte is immediately followed by a \0 byte
+     signalling this.  We can avoid the \1 byte(s).  */
+  if (needed > 2 && needed == last_needed + 1)
     {
-      int32_t tmp = findidx (table, indirect, extra, &usrc, -1);
-      rulearr[idxmax] = tmp >> 24;
-      idxarr[idxmax] = tmp & 0xffffff;
-
-      ++idxmax;
+      /* Remove the \1 byte.  */
+      if (--needed <= n)
+	dest[needed - 1] = L('\0');
     }
-  while (*usrc != L('\0'));
 
-  /* This element is only read, the value never used but to determine
-     another value which then is ignored.  */
-  rulearr[idxmax] = '\0';
+  /* Return the number of bytes/words we need, but don't count the NUL
+     byte/word at the end.  */
+  return needed - 1;
+}
+
+/* Do the transformation using weight-index and rule cache.  */
+static size_t
+do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data,
+		size_t idxmax, int32_t *idxarr, const unsigned char *rulearr)
+{
+  uint_fast32_t nrules = l_data->nrules;
+  unsigned char *rulesets = l_data->rulesets;
+  USTRING_TYPE *weights = l_data->weights;
+  uint_fast32_t pass;
+  size_t needed = 0;
+  size_t last_needed;
+  size_t idxcnt;
 
-  /* Now the passes over the weights.  We now use the indeces we found
-     before.  */
-  needed = 0;
+  /* Now the passes over the weights.  */
   for (pass = 0; pass < nrules; ++pass)
     {
       size_t backw_stop = ~0ul;
@@ -433,14 +660,87 @@  STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
 	dest[needed - 1] = L('\0');
     }
 
-  /* Free the memory if needed.  */
-  if (use_malloc)
-    free (idxarr);
-
   /* Return the number of bytes/words we need, but don't count the NUL
      byte/word at the end.  */
   return needed - 1;
 }
+
+size_t
+STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
+{
+  locale_data_t l_data;
+  struct __locale_data *current = l->__locales[LC_COLLATE];
+  l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
+
+  /* Handle byte comparison case.  */
+  if (l_data.nrules == 0)
+    {
+      size_t srclen = STRLEN (src);
+
+      if (n != 0)
+	STPNCPY (dest, src, MIN (srclen + 1, n));
+
+      return srclen;
+    }
+
+  /* Handle an empty string, code hereafter relies on strlen (src) > 0.  */
+  if (*src == L('\0'))
+    {
+      if (n != 0)
+	*dest = L('\0');
+      return 0;
+    }
+
+  /* Get the locale data.  */
+  l_data.rulesets = (unsigned char *)
+    current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
+  l_data.table = (int32_t *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
+  l_data.weights = (USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
+  l_data.extra = (USTRING_TYPE *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
+  l_data.indirect = (int32_t *)
+    current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
+
+  assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0);
+  assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0);
+  assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0);
+  assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0);
+
+  /* We need the elements of the string as unsigned values since they
+     are used as indeces.  */
+  const USTRING_TYPE *usrc = (const USTRING_TYPE *) src;
+
+  /* Allocate cache for small strings on the stack and fill it with weight and
+     rule indices.  If the cache size is not sufficient, continue with the
+     uncached xfrm version.  */
+  size_t idxmax = 0;
+  const USTRING_TYPE *cur = usrc;
+  int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t));
+  unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1);
+
+  do
+    {
+      int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur,
+			     -1);
+      rulearr[idxmax] = tmp >> 24;
+      idxarr[idxmax] = tmp & 0xffffff;
+
+      ++idxmax;
+    }
+  while (*cur != L('\0') && idxmax < SMALL_STR_SIZE);
+
+  /* This element is only read, the value never used but to determine
+     another value which then is ignored.  */
+  rulearr[idxmax] = '\0';
+
+  /* Do the transformation.  */
+  if (*cur == L('\0'))
+    return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr);
+  else
+    return do_xfrm (usrc, dest, n, &l_data);
+}
 libc_hidden_def (STRXFRM)
 
 #ifndef WIDE_CHAR_VERSION