Message ID | 54A42278.1050500@web.de |
---|---|
State | Superseded |
Headers |
Received: (qmail 31797 invoked by alias); 31 Dec 2014 16:21:20 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: <libc-alpha.sourceware.org> List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org> List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org> List-Archive: <http://sourceware.org/ml/libc-alpha/> List-Post: <mailto:libc-alpha@sourceware.org> List-Help: <mailto:libc-alpha-help@sourceware.org>, <http://sourceware.org/ml/#faqs> Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 31782 invoked by uid 89); 31 Dec 2014 16:21:18 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.1 required=5.0 tests=AWL, BAYES_00, FREEMAIL_FROM, RCVD_IN_DNSWL_NONE, SPF_PASS, T_RP_MATCHES_RCVD autolearn=ham version=3.3.2 X-HELO: mout.web.de Message-ID: <54A42278.1050500@web.de> Date: Wed, 31 Dec 2014 17:21:12 +0100 From: Leonhard Holz <leonhard.holz@web.de> User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20100101 Thunderbird/31.3.0 MIME-Version: 1.0 To: libc-alpha@sourceware.org CC: Siddhesh Poyarekar <siddhesh@redhat.com> Subject: Re: [PATCH][BZ #16009] fix memory handling in strxfrm_l References: <54797C90.7030504@web.de> <20141216075138.GH30928@spoyarek.pnq.redhat.com> <54A3C38B.8060200@web.de> <20141231094759.GE8679@spoyarek.pnq.redhat.com> In-Reply-To: <20141231094759.GE8679@spoyarek.pnq.redhat.com> Content-Type: multipart/mixed; boundary="------------020300070903030108030707" X-UI-Out-Filterresults: notjunk:1; |
Commit Message
Leonhard Holz
Dec. 31, 2014, 4:21 p.m. UTC
Here are the updated diffs. Tests look ok but do not complete because of an (hopefully unrelated) compile error in string/tester.c. Am 31.12.2014 um 10:47 schrieb Siddhesh Poyarekar: > On Wed, Dec 31, 2014 at 10:36:11AM +0100, Leonhard Holz wrote: >> I want to object here. The needed cache size can be in a region where the >> malloc will not fail but hurt the system performance considerable >> (swapping), especially as the malloced size is five times the string size. I >> think that low level functions like strxfrm should not carry such risks. > > Fair enough. In that case just mention in a comment that the size > you've chosen is arbitrary and only ensures that it is small enough to > be contained on the stack. If someone wants to prove that the size is > optimal (or another size is optimal) then they can do that on top of > your change. > > Siddhesh > diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c index 2d3f1bd..7ba6bac 100644 --- a/string/strxfrm_l.c +++ b/string/strxfrm_l.c @@ -40,9 +40,24 @@ #define CONCAT(a,b) CONCAT1(a,b) #define CONCAT1(a,b) a##b +/* Maximum string size that is calculated with cached indices. Right now this + is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 has to be + lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in sync. */ +#define SMALL_STR_SIZE 4095 + #include "../locale/localeinfo.h" #include WEIGHT_H +/* Group locale data for shorter parameter lists. */ +typedef struct +{ + uint_fast32_t nrules; + unsigned char *rulesets; + USTRING_TYPE *weights; + int32_t *table; + USTRING_TYPE *extra; + int32_t *indirect; +} locale_data_t; #ifndef WIDE_CHAR_VERSION @@ -81,113 +96,325 @@ utf8_encode (char *buf, int val) } #endif +/* Find next weight and rule index. Inlined since called for every char. */ +static __always_inline size_t +find_idx (const USTRING_TYPE **us, int32_t *weight_idx, + unsigned char *rule_idx, const locale_data_t *l_data, const int pass) +{ + int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us, + -1); + *rule_idx = tmp >> 24; + int32_t idx = tmp & 0xffffff; + size_t len = l_data->weights[idx++]; + + /* Skip over indices of previous levels. */ + for (int i = 0; i < pass; i++) + { + idx += len; + len = l_data->weights[idx++]; + } -size_t -STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) + *weight_idx = idx; + return len; +} + +static int +find_position (const USTRING_TYPE *us, const locale_data_t *l_data, + const int pass) { - struct __locale_data *current = l->__locales[LC_COLLATE]; - uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; - /* We don't assign the following values right away since it might be - unnecessary in case there are no rules. */ - const unsigned char *rulesets; - const int32_t *table; - const USTRING_TYPE *weights; - const USTRING_TYPE *extra; - const int32_t *indirect; + int32_t weight_idx; + unsigned char rule_idx; + const USTRING_TYPE *usrc = us; + + find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass); + return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position; +} + +/* Do the transformation. */ +static size_t +do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n, + const locale_data_t *l_data) +{ + int32_t weight_idx; + unsigned char rule_idx; uint_fast32_t pass; - size_t needed; + size_t needed = 0; size_t last_needed; - const USTRING_TYPE *usrc; - size_t srclen = STRLEN (src); - int32_t *idxarr; - unsigned char *rulearr; - size_t idxmax; - size_t idxcnt; - int use_malloc; - if (nrules == 0) + /* Now the passes over the weights. */ + for (pass = 0; pass < l_data->nrules; ++pass) { - if (n != 0) - STPNCPY (dest, src, MIN (srclen + 1, n)); + size_t backw_len = 0; + last_needed = needed; + const USTRING_TYPE *cur = usrc; + const USTRING_TYPE *backw_start = NULL; - return srclen; - } + /* We assume that if a rule has defined `position' in one section + this is true for all of them. */ + int position = find_position (cur, l_data, pass); - rulesets = (const unsigned char *) - current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; - table = (const int32_t *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; - weights = (const USTRING_TYPE *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; - extra = (const USTRING_TYPE *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; - indirect = (const int32_t *) - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; - use_malloc = 0; + if (position == 0) + { + while (*cur != L('\0')) + { + const USTRING_TYPE *pos = cur; + size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, + pass); + int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; - assert (((uintptr_t) table) % __alignof__ (table[0]) == 0); - assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0); - assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0); - assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); + if ((rule & sort_forward) != 0) + { + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t i = backw_len; i > 0; ) + { + int32_t weight_idx; + unsigned char rule_idx; + size_t len = find_idx (&backw_start, &weight_idx, + &rule_idx, l_data, pass); + if (needed + i < n) + for (size_t j = len; j > 0; j--) + dest[needed + i - j] = + l_data->weights[weight_idx++]; + + i -= len; + } - /* Handle an empty string as a special case. */ - if (srclen == 0) - { - if (n != 0) - *dest = L('\0'); - return 0; - } + needed += backw_len; + backw_start = NULL; + backw_len = 0; + } - /* We need the elements of the string as unsigned values since they - are used as indeces. */ - usrc = (const USTRING_TYPE *) src; - - /* Perform the first pass over the string and while doing this find - and store the weights for each character. Since we want this to - be as fast as possible we are using `alloca' to store the temporary - values. But since there is no limit on the length of the string - we have to use `malloc' if the string is too long. We should be - very conservative here. */ - if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1))) - { - idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1)); - rulearr = (unsigned char *) &idxarr[srclen]; - - if (idxarr == NULL) - /* No memory. Well, go with the stack then. - - XXX Once this implementation is stable we will handle this - differently. Instead of precomputing the indeces we will - do this in time. This means, though, that this happens for - every pass again. */ - goto try_stack; - use_malloc = 1; - } - else - { - try_stack: - idxarr = (int32_t *) alloca (srclen * sizeof (int32_t)); - rulearr = (unsigned char *) alloca (srclen + 1); + /* Now handle the forward element. */ + if (needed + len < n) + while (len-- > 0) + dest[needed++] = l_data->weights[weight_idx++]; + else + /* No more characters fit into the buffer. */ + needed += len; + } + else + { + /* Remember start of the backward sequence & track length. */ + if (backw_start == NULL) + backw_start = pos; + backw_len += len; + } + } + + + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t i = backw_len; i > 0; ) + { + size_t len = find_idx (&backw_start, &weight_idx, &rule_idx, + l_data, pass); + if (needed + i < n) + for (size_t j = len; j > 0; j--) + dest[needed + i - j] = + l_data->weights[weight_idx++]; + + i -= len; + } + + needed += backw_len; + } + } + else + { + int val = 1; +#ifndef WIDE_CHAR_VERSION + char buf[7]; + size_t buflen; +#endif + size_t i; + + while (*cur != L('\0')) + { + const USTRING_TYPE *pos = cur; + size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, + pass); + int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; + + if ((rule & sort_forward) != 0) + { + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t p = backw_len; p > 0; p--) + { + size_t len; + int32_t weight_idx; + unsigned char rule_idx; + const USTRING_TYPE *backw_cur = backw_start; + + /* To prevent a warning init the used vars. */ + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + for (i = 1; i < p; i++) + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + if (len != 0) + { +#ifdef WIDE_CHAR_VERSION + if (needed + 1 + len < n) + { + dest[needed] = val; + for (i = 0; i < len; ++i) + dest[needed + 1 + i] = + l_data->weights[weight_idx + i]; + } + needed += 1 + len; +#else + buflen = utf8_encode (buf, val); + if (needed + buflen + len < n) + { + for (i = 0; i < buflen; ++i) + dest[needed + i] = buf[i]; + for (i = 0; i < len; ++i) + dest[needed + buflen + i] = + l_data->weights[weight_idx + i]; + } + needed += buflen + len; +#endif + val = 1; + } + else + ++val; + } + + backw_start = NULL; + backw_len = 0; + } + + /* Now handle the forward element. */ + if (len != 0) + { +#ifdef WIDE_CHAR_VERSION + if (needed + 1 + len < n) + { + dest[needed] = val; + for (i = 0; i < len; ++i) + dest[needed + 1 + i] = + l_data->weights[weight_idx + i]; + } + needed += 1 + len; +#else + buflen = utf8_encode (buf, val); + if (needed + buflen + len < n) + { + for (i = 0; i < buflen; ++i) + dest[needed + i] = buf[i]; + for (i = 0; i < len; ++i) + dest[needed + buflen + i] = + l_data->weights[weight_idx + i]; + } + needed += buflen + len; +#endif + val = 1; + } + else + ++val; + } + else + { + /* Remember start of the backward sequence & track length. */ + if (backw_start == NULL) + backw_start = pos; + backw_len++; + } + } + + /* Handle the pushed backward sequence. */ + if (backw_start != NULL) + { + for (size_t p = backw_len; p > 0; p--) + { + size_t len; + int32_t weight_idx; + unsigned char rule_idx; + const USTRING_TYPE *backw_cur = backw_start; + + /* To prevent a warning init the used vars. */ + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + for (i = 1; i < p; i++) + len = find_idx (&backw_cur, &weight_idx, + &rule_idx, l_data, pass); + + if (len != 0) + { +#ifdef WIDE_CHAR_VERSION + if (needed + 1 + len < n) + { + dest[needed] = val; + for (i = 0; i < len; ++i) + dest[needed + 1 + i] = + l_data->weights[weight_idx + i]; + } + needed += 1 + len; +#else + buflen = utf8_encode (buf, val); + if (needed + buflen + len < n) + { + for (i = 0; i < buflen; ++i) + dest[needed + i] = buf[i]; + for (i = 0; i < len; ++i) + dest[needed + buflen + i] = + l_data->weights[weight_idx + i]; + } + needed += buflen + len; +#endif + val = 1; + } + else + ++val; + } + } + } + + /* Finally store the byte to separate the passes or terminate + the string. */ + if (needed < n) + dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0'); + ++needed; } - idxmax = 0; - do + /* This is a little optimization: many collation specifications have + a `position' rule at the end and if no non-ignored character + is found the last \1 byte is immediately followed by a \0 byte + signalling this. We can avoid the \1 byte(s). */ + if (needed > 2 && needed == last_needed + 1) { - int32_t tmp = findidx (table, indirect, extra, &usrc, -1); - rulearr[idxmax] = tmp >> 24; - idxarr[idxmax] = tmp & 0xffffff; - - ++idxmax; + /* Remove the \1 byte. */ + if (--needed <= n) + dest[needed - 1] = L('\0'); } - while (*usrc != L('\0')); - /* This element is only read, the value never used but to determine - another value which then is ignored. */ - rulearr[idxmax] = '\0'; + /* Return the number of bytes/words we need, but don't count the NUL + byte/word at the end. */ + return needed - 1; +} + +/* Do the transformation using weight-index and rule cache. */ +static size_t +do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data, + size_t idxmax, int32_t *idxarr, const unsigned char *rulearr) +{ + uint_fast32_t nrules = l_data->nrules; + unsigned char *rulesets = l_data->rulesets; + USTRING_TYPE *weights = l_data->weights; + uint_fast32_t pass; + size_t needed = 0; + size_t last_needed; + size_t idxcnt; - /* Now the passes over the weights. We now use the indeces we found - before. */ - needed = 0; + /* Now the passes over the weights. */ for (pass = 0; pass < nrules; ++pass) { size_t backw_stop = ~0ul; @@ -433,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) dest[needed - 1] = L('\0'); } - /* Free the memory if needed. */ - if (use_malloc) - free (idxarr); - /* Return the number of bytes/words we need, but don't count the NUL byte/word at the end. */ return needed - 1; } + +size_t +STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) +{ + locale_data_t l_data; + struct __locale_data *current = l->__locales[LC_COLLATE]; + l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; + + /* Handle byte comparison case. */ + if (l_data.nrules == 0) + { + size_t srclen = STRLEN (src); + + if (n != 0) + STPNCPY (dest, src, MIN (srclen + 1, n)); + + return srclen; + } + + /* Handle an empty string, code hereafter relies on strlen (src) > 0. */ + if (*src == L('\0')) + { + if (n != 0) + *dest = L('\0'); + return 0; + } + + /* Get the locale data. */ + l_data.rulesets = (unsigned char *) + current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; + l_data.table = (int32_t *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; + l_data.weights = (USTRING_TYPE *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; + l_data.extra = (USTRING_TYPE *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; + l_data.indirect = (int32_t *) + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; + + assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0); + assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0); + assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0); + assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0); + + /* We need the elements of the string as unsigned values since they + are used as indeces. */ + const USTRING_TYPE *usrc = (const USTRING_TYPE *) src; + + /* Allocate cache for small strings on the stack and fill it with weight and + rule indices. If the cache size is not sufficient, continue with the + uncached xfrm version. */ + size_t idxmax = 0; + const USTRING_TYPE *cur = usrc; + int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t)); + unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1); + + do + { + int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur, + -1); + rulearr[idxmax] = tmp >> 24; + idxarr[idxmax] = tmp & 0xffffff; + + ++idxmax; + } + while (*cur != L('\0') && idxmax < SMALL_STR_SIZE); + + /* This element is only read, the value never used but to determine + another value which then is ignored. */ + rulearr[idxmax] = '\0'; + + /* Do the transformation. */ + if (*cur == L('\0')) + return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr); + else + return do_xfrm (usrc, dest, n, &l_data); +} libc_hidden_def (STRXFRM) #ifndef WIDE_CHAR_VERSION diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c index d2aba7d..c40a7e6 100644 --- a/localedata/xfrm-test.c +++ b/localedata/xfrm-test.c @@ -24,6 +24,8 @@ #include <stdlib.h> #include <string.h> +/* Keep in sync with string/strxfrm_l.c. */ +#define SMALL_STR_SIZE 4095 struct lines { @@ -36,7 +38,7 @@ static int xstrcmp (const void *, const void *); int main (int argc, char *argv[]) { - int result = 0; + int result = 0, nocache = 0; size_t nstrings, nstrings_max; struct lines *strings; char *line = NULL; @@ -44,7 +46,18 @@ main (int argc, char *argv[]) size_t n; if (argc < 2) - error (1, 0, "usage: %s <random seed>", argv[0]); + error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]); + + if (argc == 3) + { + if (strcmp (argv[2], "-nocache") == 0) + nocache = 1; + else + { + printf ("Unknown option %s!\n", argv[2]); + exit (1); + } + } setlocale (LC_ALL, ""); @@ -59,9 +72,9 @@ main (int argc, char *argv[]) while (1) { - char saved, *newp; - int needed; - int l; + char saved, *word, *newp; + size_t l, line_len, needed; + if (getline (&line, &len, stdin) < 0) break; @@ -83,10 +96,35 @@ main (int argc, char *argv[]) saved = line[l]; line[l] = '\0'; - needed = strxfrm (NULL, line, 0); + + if (nocache) + { + line_len = strlen (line); + word = malloc (line_len + SMALL_STR_SIZE + 1); + if (word == NULL) + { + perror (argv[0]); + exit (1); + } + memset (word, ' ', SMALL_STR_SIZE); + memcpy (word + SMALL_STR_SIZE, line, line_len); + word[line_len + SMALL_STR_SIZE] = '\0'; + } + else + word = line; + + needed = strxfrm (NULL, word, 0); newp = malloc (needed + 1); - strxfrm (newp, line, needed + 1); + if (newp == NULL) + { + perror (argv[0]); + exit (1); + } + strxfrm (newp, word, needed + 1); strings[nstrings].xfrm = newp; + + if (nocache) + free (word); line[l] = saved; ++nstrings; }
Comments
Any chance to make it into the upcoming release? https://sourceware.org/ml/libc-alpha/2014-12/msg01001.html
On Thu, Jan 08, 2015 at 08:15:54AM +0100, Leonhard Holz wrote: > Any chance to make it into the upcoming release? > > https://sourceware.org/ml/libc-alpha/2014-12/msg01001.html I hope so. I'll try to get to it tomorrow. Siddhesh
On Wed, Dec 31, 2014 at 05:21:12PM +0100, Leonhard Holz wrote: > Here are the updated diffs. Tests look ok but do not complete because of an > (hopefully unrelated) compile error in string/tester.c. Mostly minor nits below. Please fix them up and repost with a ChangeLog and I'll commit it. The first nit is: please avoid putting diffs of different individual files as attachments and more importantly, please avoid 'attaching' the diffs and inline them instead. It makes it difficult to respond with an inline review of the patch. The easiest way to send the patch inline is to use git-send-email: all you have to do is form your email (along with the ChangeLog) as the git commit comment and then do format-patch + send-email. And now we move on to the actual patch :) Thanks, Siddhesh > diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh > index e37129a..c464b05 100644 > --- a/localedata/sort-test.sh > +++ b/localedata/sort-test.sh > @@ -53,11 +53,18 @@ for l in $lang; do > ${common_objpfx}localedata/xfrm-test $id < $cns.in \ > > ${common_objpfx}localedata/$cns.xout || here=1 > cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1 > + ${test_program_prefix_before_env} \ > + ${run_program_env} \ > + LC_ALL=$l ${test_program_prefix_after_env} \ > + ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \ > + > ${common_objpfx}localedata/$cns.xoutl || here=1 > + cmp -s $cns.in ${common_objpfx}localedata/$cns.xoutl || here=1 Call this $cns.nocache.xout. > if test $here -eq 0; then > echo "$l xfrm-test OK" > else > echo "$l xfrm-test FAIL" > diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/ /' > + diff -u $cns.in ${common_objpfx}localedata/$cns.xoutl | sed 's/^/ /' > status=1 > fi > done > diff --git a/string/strxfrm_l.c b/string/strxfrm_l.c > index 2d3f1bd..7ba6bac 100644 > --- a/string/strxfrm_l.c > +++ b/string/strxfrm_l.c > @@ -40,9 +40,24 @@ > #define CONCAT(a,b) CONCAT1(a,b) > #define CONCAT1(a,b) a##b > > +/* Maximum string size that is calculated with cached indices. Right now this > + is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 has to be > + lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in sync. */ > +#define SMALL_STR_SIZE 4095 > + > #include "../locale/localeinfo.h" > #include WEIGHT_H > > +/* Group locale data for shorter parameter lists. */ > +typedef struct > +{ > + uint_fast32_t nrules; > + unsigned char *rulesets; > + USTRING_TYPE *weights; > + int32_t *table; > + USTRING_TYPE *extra; > + int32_t *indirect; > +} locale_data_t; > > #ifndef WIDE_CHAR_VERSION > > @@ -81,113 +96,325 @@ utf8_encode (char *buf, int val) > } > #endif > > +/* Find next weight and rule index. Inlined since called for every char. */ > +static __always_inline size_t > +find_idx (const USTRING_TYPE **us, int32_t *weight_idx, > + unsigned char *rule_idx, const locale_data_t *l_data, const int pass) > +{ > + int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us, > + -1); > + *rule_idx = tmp >> 24; > + int32_t idx = tmp & 0xffffff; > + size_t len = l_data->weights[idx++]; > + > + /* Skip over indices of previous levels. */ > + for (int i = 0; i < pass; i++) > + { > + idx += len; > + len = l_data->weights[idx++]; > + } > > -size_t > -STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) > + *weight_idx = idx; > + return len; > +} > + > +static int > +find_position (const USTRING_TYPE *us, const locale_data_t *l_data, > + const int pass) > { > - struct __locale_data *current = l->__locales[LC_COLLATE]; > - uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; > - /* We don't assign the following values right away since it might be > - unnecessary in case there are no rules. */ > - const unsigned char *rulesets; > - const int32_t *table; > - const USTRING_TYPE *weights; > - const USTRING_TYPE *extra; > - const int32_t *indirect; > + int32_t weight_idx; > + unsigned char rule_idx; > + const USTRING_TYPE *usrc = us; > + > + find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass); > + return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position; > +} > + > +/* Do the transformation. */ > +static size_t > +do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n, > + const locale_data_t *l_data) > +{ > + int32_t weight_idx; > + unsigned char rule_idx; > uint_fast32_t pass; > - size_t needed; > + size_t needed = 0; > size_t last_needed; > - const USTRING_TYPE *usrc; > - size_t srclen = STRLEN (src); > - int32_t *idxarr; > - unsigned char *rulearr; > - size_t idxmax; > - size_t idxcnt; > - int use_malloc; > > - if (nrules == 0) > + /* Now the passes over the weights. */ > + for (pass = 0; pass < l_data->nrules; ++pass) > { > - if (n != 0) > - STPNCPY (dest, src, MIN (srclen + 1, n)); > + size_t backw_len = 0; > + last_needed = needed; > + const USTRING_TYPE *cur = usrc; > + const USTRING_TYPE *backw_start = NULL; > > - return srclen; > - } > + /* We assume that if a rule has defined `position' in one section > + this is true for all of them. */ > + int position = find_position (cur, l_data, pass); > > - rulesets = (const unsigned char *) > - current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; > - table = (const int32_t *) > - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; > - weights = (const USTRING_TYPE *) > - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; > - extra = (const USTRING_TYPE *) > - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; > - indirect = (const int32_t *) > - current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; > - use_malloc = 0; > + if (position == 0) > + { > + while (*cur != L('\0')) > + { > + const USTRING_TYPE *pos = cur; > + size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, > + pass); > + int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; > > - assert (((uintptr_t) table) % __alignof__ (table[0]) == 0); > - assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0); > - assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0); > - assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0); > + if ((rule & sort_forward) != 0) > + { > + /* Handle the pushed backward sequence. */ > + if (backw_start != NULL) > + { Remove the extra space at the end. > + for (size_t i = backw_len; i > 0; ) > + { > + int32_t weight_idx; > + unsigned char rule_idx; > + size_t len = find_idx (&backw_start, &weight_idx, > + &rule_idx, l_data, pass); > + if (needed + i < n) > + for (size_t j = len; j > 0; j--) > + dest[needed + i - j] = > + l_data->weights[weight_idx++]; > + > + i -= len; > + } > > - /* Handle an empty string as a special case. */ > - if (srclen == 0) > - { > - if (n != 0) > - *dest = L('\0'); > - return 0; > - } > + needed += backw_len; > + backw_start = NULL; > + backw_len = 0; > + } > > - /* We need the elements of the string as unsigned values since they > - are used as indeces. */ > - usrc = (const USTRING_TYPE *) src; > - > - /* Perform the first pass over the string and while doing this find > - and store the weights for each character. Since we want this to > - be as fast as possible we are using `alloca' to store the temporary > - values. But since there is no limit on the length of the string > - we have to use `malloc' if the string is too long. We should be > - very conservative here. */ > - if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1))) > - { > - idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1)); > - rulearr = (unsigned char *) &idxarr[srclen]; > - > - if (idxarr == NULL) > - /* No memory. Well, go with the stack then. > - > - XXX Once this implementation is stable we will handle this > - differently. Instead of precomputing the indeces we will > - do this in time. This means, though, that this happens for > - every pass again. */ > - goto try_stack; > - use_malloc = 1; > - } > - else > - { > - try_stack: > - idxarr = (int32_t *) alloca (srclen * sizeof (int32_t)); > - rulearr = (unsigned char *) alloca (srclen + 1); > + /* Now handle the forward element. */ > + if (needed + len < n) > + while (len-- > 0) > + dest[needed++] = l_data->weights[weight_idx++]; > + else > + /* No more characters fit into the buffer. */ > + needed += len; > + } > + else > + { > + /* Remember start of the backward sequence & track length. */ > + if (backw_start == NULL) > + backw_start = pos; > + backw_len += len; > + } > + } > + > + > + /* Handle the pushed backward sequence. */ > + if (backw_start != NULL) > + { remove extra space at the end. > + for (size_t i = backw_len; i > 0; ) > + { > + size_t len = find_idx (&backw_start, &weight_idx, &rule_idx, > + l_data, pass); > + if (needed + i < n) > + for (size_t j = len; j > 0; j--) > + dest[needed + i - j] = > + l_data->weights[weight_idx++]; > + > + i -= len; > + } > + > + needed += backw_len; > + } > + } > + else > + { > + int val = 1; > +#ifndef WIDE_CHAR_VERSION > + char buf[7]; > + size_t buflen; > +#endif > + size_t i; > + > + while (*cur != L('\0')) > + { > + const USTRING_TYPE *pos = cur; > + size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data, > + pass); > + int rule = l_data->rulesets[rule_idx * l_data->nrules + pass]; > + > + if ((rule & sort_forward) != 0) > + { > + /* Handle the pushed backward sequence. */ > + if (backw_start != NULL) > + { > + for (size_t p = backw_len; p > 0; p--) > + { > + size_t len; > + int32_t weight_idx; > + unsigned char rule_idx; > + const USTRING_TYPE *backw_cur = backw_start; > + > + /* To prevent a warning init the used vars. */ > + len = find_idx (&backw_cur, &weight_idx, > + &rule_idx, l_data, pass); > + > + for (i = 1; i < p; i++) > + len = find_idx (&backw_cur, &weight_idx, > + &rule_idx, l_data, pass); > + > + if (len != 0) > + { > +#ifdef WIDE_CHAR_VERSION > + if (needed + 1 + len < n) > + { > + dest[needed] = val; > + for (i = 0; i < len; ++i) > + dest[needed + 1 + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += 1 + len; > +#else > + buflen = utf8_encode (buf, val); > + if (needed + buflen + len < n) > + { > + for (i = 0; i < buflen; ++i) > + dest[needed + i] = buf[i]; > + for (i = 0; i < len; ++i) > + dest[needed + buflen + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += buflen + len; > +#endif > + val = 1; > + } > + else > + ++val; > + } > + > + backw_start = NULL; > + backw_len = 0; > + } > + > + /* Now handle the forward element. */ > + if (len != 0) > + { > +#ifdef WIDE_CHAR_VERSION > + if (needed + 1 + len < n) > + { > + dest[needed] = val; > + for (i = 0; i < len; ++i) > + dest[needed + 1 + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += 1 + len; > +#else > + buflen = utf8_encode (buf, val); > + if (needed + buflen + len < n) > + { > + for (i = 0; i < buflen; ++i) > + dest[needed + i] = buf[i]; > + for (i = 0; i < len; ++i) > + dest[needed + buflen + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += buflen + len; > +#endif > + val = 1; > + } > + else > + ++val; > + } > + else > + { > + /* Remember start of the backward sequence & track length. */ > + if (backw_start == NULL) > + backw_start = pos; > + backw_len++; > + } > + } > + > + /* Handle the pushed backward sequence. */ > + if (backw_start != NULL) > + { > + for (size_t p = backw_len; p > 0; p--) > + { > + size_t len; > + int32_t weight_idx; > + unsigned char rule_idx; > + const USTRING_TYPE *backw_cur = backw_start; > + > + /* To prevent a warning init the used vars. */ > + len = find_idx (&backw_cur, &weight_idx, > + &rule_idx, l_data, pass); > + > + for (i = 1; i < p; i++) > + len = find_idx (&backw_cur, &weight_idx, > + &rule_idx, l_data, pass); > + > + if (len != 0) > + { > +#ifdef WIDE_CHAR_VERSION > + if (needed + 1 + len < n) > + { > + dest[needed] = val; > + for (i = 0; i < len; ++i) > + dest[needed + 1 + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += 1 + len; > +#else > + buflen = utf8_encode (buf, val); > + if (needed + buflen + len < n) > + { > + for (i = 0; i < buflen; ++i) > + dest[needed + i] = buf[i]; > + for (i = 0; i < len; ++i) > + dest[needed + buflen + i] = > + l_data->weights[weight_idx + i]; > + } > + needed += buflen + len; > +#endif > + val = 1; > + } > + else > + ++val; > + } > + } > + } > + > + /* Finally store the byte to separate the passes or terminate > + the string. */ > + if (needed < n) > + dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0'); > + ++needed; > } > > - idxmax = 0; > - do > + /* This is a little optimization: many collation specifications have > + a `position' rule at the end and if no non-ignored character > + is found the last \1 byte is immediately followed by a \0 byte > + signalling this. We can avoid the \1 byte(s). */ > + if (needed > 2 && needed == last_needed + 1) > { > - int32_t tmp = findidx (table, indirect, extra, &usrc, -1); > - rulearr[idxmax] = tmp >> 24; > - idxarr[idxmax] = tmp & 0xffffff; > - > - ++idxmax; > + /* Remove the \1 byte. */ > + if (--needed <= n) > + dest[needed - 1] = L('\0'); > } > - while (*usrc != L('\0')); > > - /* This element is only read, the value never used but to determine > - another value which then is ignored. */ > - rulearr[idxmax] = '\0'; > + /* Return the number of bytes/words we need, but don't count the NUL > + byte/word at the end. */ > + return needed - 1; > +} > + > +/* Do the transformation using weight-index and rule cache. */ > +static size_t > +do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data, > + size_t idxmax, int32_t *idxarr, const unsigned char *rulearr) > +{ > + uint_fast32_t nrules = l_data->nrules; > + unsigned char *rulesets = l_data->rulesets; > + USTRING_TYPE *weights = l_data->weights; > + uint_fast32_t pass; > + size_t needed = 0; > + size_t last_needed; > + size_t idxcnt; > > - /* Now the passes over the weights. We now use the indeces we found > - before. */ > - needed = 0; > + /* Now the passes over the weights. */ > for (pass = 0; pass < nrules; ++pass) > { > size_t backw_stop = ~0ul; > @@ -433,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) > dest[needed - 1] = L('\0'); > } > > - /* Free the memory if needed. */ > - if (use_malloc) > - free (idxarr); > - > /* Return the number of bytes/words we need, but don't count the NUL > byte/word at the end. */ > return needed - 1; > } > + > +size_t > +STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l) > +{ > + locale_data_t l_data; > + struct __locale_data *current = l->__locales[LC_COLLATE]; > + l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word; > + > + /* Handle byte comparison case. */ > + if (l_data.nrules == 0) > + { > + size_t srclen = STRLEN (src); > + > + if (n != 0) > + STPNCPY (dest, src, MIN (srclen + 1, n)); > + > + return srclen; > + } > + > + /* Handle an empty string, code hereafter relies on strlen (src) > 0. */ > + if (*src == L('\0')) > + { > + if (n != 0) > + *dest = L('\0'); > + return 0; > + } > + > + /* Get the locale data. */ > + l_data.rulesets = (unsigned char *) > + current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string; > + l_data.table = (int32_t *) > + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string; > + l_data.weights = (USTRING_TYPE *) > + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string; > + l_data.extra = (USTRING_TYPE *) > + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string; > + l_data.indirect = (int32_t *) > + current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string; > + > + assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0); > + assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0); > + assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0); > + assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0); > + > + /* We need the elements of the string as unsigned values since they > + are used as indeces. */ > + const USTRING_TYPE *usrc = (const USTRING_TYPE *) src; > + > + /* Allocate cache for small strings on the stack and fill it with weight and > + rule indices. If the cache size is not sufficient, continue with the > + uncached xfrm version. */ > + size_t idxmax = 0; > + const USTRING_TYPE *cur = usrc; > + int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t)); > + unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1); > + > + do > + { > + int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur, > + -1); > + rulearr[idxmax] = tmp >> 24; > + idxarr[idxmax] = tmp & 0xffffff; > + > + ++idxmax; > + } > + while (*cur != L('\0') && idxmax < SMALL_STR_SIZE); > + > + /* This element is only read, the value never used but to determine > + another value which then is ignored. */ > + rulearr[idxmax] = '\0'; > + > + /* Do the transformation. */ > + if (*cur == L('\0')) > + return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr); > + else > + return do_xfrm (usrc, dest, n, &l_data); > +} > libc_hidden_def (STRXFRM) > > #ifndef WIDE_CHAR_VERSION > diff --git a/localedata/xfrm-test.c b/localedata/xfrm-test.c > index d2aba7d..c40a7e6 100644 > --- a/localedata/xfrm-test.c > +++ b/localedata/xfrm-test.c > @@ -24,6 +24,8 @@ > #include <stdlib.h> > #include <string.h> > > +/* Keep in sync with string/strxfrm_l.c. */ > +#define SMALL_STR_SIZE 4095 > > struct lines > { > @@ -36,7 +38,7 @@ static int xstrcmp (const void *, const void *); > int > main (int argc, char *argv[]) > { > - int result = 0; > + int result = 0, nocache = 0; Make nocache bool. > size_t nstrings, nstrings_max; > struct lines *strings; > char *line = NULL; > @@ -44,7 +46,18 @@ main (int argc, char *argv[]) > size_t n; > > if (argc < 2) > - error (1, 0, "usage: %s <random seed>", argv[0]); > + error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]); > + > + if (argc == 3) > + { > + if (strcmp (argv[2], "-nocache") == 0) > + nocache = 1; > + else > + { > + printf ("Unknown option %s!\n", argv[2]); > + exit (1); > + } > + } > > setlocale (LC_ALL, ""); > > @@ -59,9 +72,9 @@ main (int argc, char *argv[]) > > while (1) > { > - char saved, *newp; > - int needed; > - int l; > + char saved, *word, *newp; > + size_t l, line_len, needed; > + > if (getline (&line, &len, stdin) < 0) > break; > > @@ -83,10 +96,35 @@ main (int argc, char *argv[]) > > saved = line[l]; > line[l] = '\0'; > - needed = strxfrm (NULL, line, 0); > + > + if (nocache) > + { > + line_len = strlen (line); > + word = malloc (line_len + SMALL_STR_SIZE + 1); > + if (word == NULL) > + { > + perror (argv[0]); Don't use perror in the test; it prints to stderr, which is then not captured in the test.out file. Use %m instead. > + exit (1); > + } > + memset (word, ' ', SMALL_STR_SIZE); > + memcpy (word + SMALL_STR_SIZE, line, line_len); > + word[line_len + SMALL_STR_SIZE] = '\0'; > + } > + else > + word = line; > + > + needed = strxfrm (NULL, word, 0); > newp = malloc (needed + 1); > - strxfrm (newp, line, needed + 1); > + if (newp == NULL) > + { > + perror (argv[0]); Likewise. > + exit (1); > + } > + strxfrm (newp, word, needed + 1); > strings[nstrings].xfrm = newp; > + > + if (nocache) > + free (word); > line[l] = saved; > ++nstrings; > }
diff --git a/localedata/sort-test.sh b/localedata/sort-test.sh index e37129a..c464b05 100644 --- a/localedata/sort-test.sh +++ b/localedata/sort-test.sh @@ -53,11 +53,18 @@ for l in $lang; do ${common_objpfx}localedata/xfrm-test $id < $cns.in \ > ${common_objpfx}localedata/$cns.xout || here=1 cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1 + ${test_program_prefix_before_env} \ + ${run_program_env} \ + LC_ALL=$l ${test_program_prefix_after_env} \ + ${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \ + > ${common_objpfx}localedata/$cns.xoutl || here=1 + cmp -s $cns.in ${common_objpfx}localedata/$cns.xoutl || here=1 if test $here -eq 0; then echo "$l xfrm-test OK" else echo "$l xfrm-test FAIL" diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/ /' + diff -u $cns.in ${common_objpfx}localedata/$cns.xoutl | sed 's/^/ /' status=1 fi done