[v2] regex: Unnest nested functions in regcomp.c
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
dj/TryBot-32bit |
success
|
Build for i686
|
Commit Message
This refactor moves four functions out of a nested scope and converts
them into static always_inline functions. collseqwc, table_size,
symb_table, extra are now initialized to zero because they are passed as
function arguments.
On x86-64, .text is 16 byte larger likely due to the 4 stores.
This is nothing compared to the amount of work that regcomp has to do
looking up the collation weights, or other functions.
If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
this patch removes the last `auto inline` usage from glibc.
---
v1 -> v2:
* Update commit message
---
posix/regcomp.c | 514 +++++++++++++++++++++++++-----------------------
1 file changed, 266 insertions(+), 248 deletions(-)
Comments
On 02/11/2021 02:45, Fangrui Song via Libc-alpha wrote:
> This refactor moves four functions out of a nested scope and converts
> them into static always_inline functions. collseqwc, table_size,
> symb_table, extra are now initialized to zero because they are passed as
> function arguments.
>
> On x86-64, .text is 16 byte larger likely due to the 4 stores.
> This is nothing compared to the amount of work that regcomp has to do
> looking up the collation weights, or other functions.
>
> If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
> this patch removes the last `auto inline` usage from glibc.
LGTM, thanks. Carlos might want to ack as well.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
> ---
> v1 -> v2:
> * Update commit message
> ---
> posix/regcomp.c | 514 +++++++++++++++++++++++++-----------------------
> 1 file changed, 266 insertions(+), 248 deletions(-)
>
> diff --git a/posix/regcomp.c b/posix/regcomp.c
> index 887e5b5068..e2b5c2807a 100644
> --- a/posix/regcomp.c
> +++ b/posix/regcomp.c
> @@ -2831,6 +2831,263 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
> }
> #endif /* not _LIBC */
>
> +#ifdef _LIBC
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> + Seek the collating symbol entry corresponding to NAME.
> + Return the index of the symbol in the SYMB_TABLE,
> + or -1 if not found. */
> +
> +static inline int32_t
> +__attribute__ ((always_inline))
> +seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
> + const int32_t *symb_table, int32_t table_size,
> + const unsigned char *extra)
> +{
> + int32_t elem;
> +
> + for (elem = 0; elem < table_size; elem++)
> + if (symb_table[2 * elem] != 0)
> + {
> + int32_t idx = symb_table[2 * elem + 1];
> + /* Skip the name of collating element name. */
> + idx += 1 + extra[idx];
> + if (/* Compare the length of the name. */
> + name_len == extra[idx]
> + /* Compare the name. */
> + && memcmp (name, &extra[idx + 1], name_len) == 0)
> + /* Yep, this is the entry. */
> + return elem;
> + }
> + return -1;
> +}
> +
Ok (I would let the compiler optimize it, but __attribute__ ((always_inline))
at least tries to mimic what nested function does).
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> + Look up the collation sequence value of BR_ELEM.
> + Return the value if succeeded, UINT_MAX otherwise. */
> +
> +static inline unsigned int
> +__attribute__ ((always_inline))
> +lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
> + const unsigned char *collseqmb,
> + const char *collseqwc, int32_t table_size,
> + const int32_t *symb_table,
> + const unsigned char *extra)
> +{
> + if (br_elem->type == SB_CHAR)
> + {
> + /* if (MB_CUR_MAX == 1) */
> + if (nrules == 0)
> + return collseqmb[br_elem->opr.ch];
> + else
> + {
> + wint_t wc = __btowc (br_elem->opr.ch);
> + return __collseq_table_lookup (collseqwc, wc);
> + }
> + }
> + else if (br_elem->type == MB_CHAR)
> + {
> + if (nrules != 0)
> + return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
> + }
> + else if (br_elem->type == COLL_SYM)
> + {
> + size_t sym_name_len = strlen ((char *) br_elem->opr.name);
> + if (nrules != 0)
> + {
> + int32_t elem, idx;
> + elem = seek_collating_symbol_entry (br_elem->opr.name,
> + sym_name_len,
> + symb_table, table_size,
> + extra);
> + if (elem != -1)
> + {
> + /* We found the entry. */
> + idx = symb_table[2 * elem + 1];
> + /* Skip the name of collating element name. */
> + idx += 1 + extra[idx];
> + /* Skip the byte sequence of the collating element. */
> + idx += 1 + extra[idx];
> + /* Adjust for the alignment. */
> + idx = (idx + 3) & ~3;
> + /* Skip the multibyte collation sequence value. */
> + idx += sizeof (unsigned int);
> + /* Skip the wide char sequence of the collating element. */
> + idx += sizeof (unsigned int) *
> + (1 + *(unsigned int *) (extra + idx));
> + /* Return the collation sequence value. */
> + return *(unsigned int *) (extra + idx);
> + }
> + else if (sym_name_len == 1)
> + {
> + /* No valid character. Match it as a single byte
> + character. */
> + return collseqmb[br_elem->opr.name[0]];
> + }
> + }
> + else if (sym_name_len == 1)
> + return collseqmb[br_elem->opr.name[0]];
> + }
> + return UINT_MAX;
> +}
> +
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> + Build the range expression which starts from START_ELEM, and ends
> + at END_ELEM. The result are written to MBCSET and SBCSET.
> + RANGE_ALLOC is the allocated size of mbcset->range_starts, and
> + mbcset->range_ends, is a pointer argument since we may
> + update it. */
> +
> +static inline reg_errcode_t
> +__attribute__ ((always_inline))
> +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
> + bracket_elem_t *start_elem, bracket_elem_t *end_elem,
> + re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
> + const unsigned char *collseqmb, const char *collseqwc,
> + int32_t table_size, const int32_t *symb_table,
> + const unsigned char *extra)
> +{
> + unsigned int ch;
> + uint32_t start_collseq;
> + uint32_t end_collseq;
> +
> + /* Equivalence Classes and Character Classes can't be a range
> + start/end. */
> + if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
> + || start_elem->type == CHAR_CLASS
> + || end_elem->type == EQUIV_CLASS
> + || end_elem->type == CHAR_CLASS))
> + return REG_ERANGE;
> +
> + /* FIXME: Implement rational ranges here, too. */
> + start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
> + table_size, symb_table, extra);
> + end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
> + table_size, symb_table, extra);
> + /* Check start/end collation sequence values. */
> + if (__glibc_unlikely (start_collseq == UINT_MAX
> + || end_collseq == UINT_MAX))
> + return REG_ECOLLATE;
> + if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
> + && start_collseq > end_collseq))
> + return REG_ERANGE;
> +
> + /* Got valid collation sequence values, add them as a new entry.
> + However, if we have no collation elements, and the character set
> + is single byte, the single byte character set that we
> + build below suffices. */
> + if (nrules > 0 || dfa->mb_cur_max > 1)
> + {
> + /* Check the space of the arrays. */
> + if (__glibc_unlikely (*range_alloc == mbcset->nranges))
> + {
> + /* There is not enough space, need realloc. */
> + uint32_t *new_array_start;
> + uint32_t *new_array_end;
> + int new_nranges;
> +
> + /* +1 in case of mbcset->nranges is 0. */
> + new_nranges = 2 * mbcset->nranges + 1;
> + new_array_start = re_realloc (mbcset->range_starts, uint32_t,
> + new_nranges);
> + new_array_end = re_realloc (mbcset->range_ends, uint32_t,
> + new_nranges);
> +
> + if (__glibc_unlikely (new_array_start == NULL
> + || new_array_end == NULL))
> + return REG_ESPACE;
> +
> + mbcset->range_starts = new_array_start;
> + mbcset->range_ends = new_array_end;
> + *range_alloc = new_nranges;
> + }
> +
> + mbcset->range_starts[mbcset->nranges] = start_collseq;
> + mbcset->range_ends[mbcset->nranges++] = end_collseq;
> + }
> +
> + /* Build the table for single byte characters. */
> + for (ch = 0; ch < SBC_MAX; ch++)
> + {
> + uint32_t ch_collseq;
> + /* if (MB_CUR_MAX == 1) */
> + if (nrules == 0)
> + ch_collseq = collseqmb[ch];
> + else
> + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
> + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
> + bitset_set (sbcset, ch);
> + }
> + return REG_NOERROR;
> +}
> +
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> + Build the collating element which is represented by NAME.
> + The result are written to MBCSET and SBCSET.
> + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
> + pointer argument since we may update it. */
> +
> +static inline reg_errcode_t
> +__attribute__ ((always_inline))
> +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
> + int *coll_sym_alloc, const unsigned char *name,
> + uint32_t nrules, int32_t table_size,
> + const int32_t *symb_table, const unsigned char *extra)
> +{
> + int32_t elem, idx;
> + size_t name_len = strlen ((const char *) name);
> + if (nrules != 0)
> + {
> + elem = seek_collating_symbol_entry (name, name_len, symb_table,
> + table_size, extra);
> + if (elem != -1)
> + {
> + /* We found the entry. */
> + idx = symb_table[2 * elem + 1];
> + /* Skip the name of collating element name. */
> + idx += 1 + extra[idx];
> + }
> + else if (name_len == 1)
> + {
> + /* No valid character, treat it as a normal
> + character. */
> + bitset_set (sbcset, name[0]);
> + return REG_NOERROR;
> + }
> + else
> + return REG_ECOLLATE;
> +
> + /* Got valid collation sequence, add it as a new entry. */
> + /* Check the space of the arrays. */
> + if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
> + {
> + /* Not enough, realloc it. */
> + /* +1 in case of mbcset->ncoll_syms is 0. */
> + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
> + /* Use realloc since mbcset->coll_syms is NULL
> + if *alloc == 0. */
> + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
> + new_coll_sym_alloc);
> + if (__glibc_unlikely (new_coll_syms == NULL))
> + return REG_ESPACE;
> + mbcset->coll_syms = new_coll_syms;
> + *coll_sym_alloc = new_coll_sym_alloc;
> + }
> + mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
> + return REG_NOERROR;
> + }
> + else
> + {
> + if (__glibc_unlikely (name_len != 1))
> + return REG_ECOLLATE;
> + else
> + {
> + bitset_set (sbcset, name[0]);
> + return REG_NOERROR;
> + }
> + }
> +}
> +#endif /* _LIBC */
> +
> /* This function parse bracket expression like "[abc]", "[a-c]",
> "[[.a-a.]]" etc. */
>
> @@ -2840,253 +3097,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
> {
> #ifdef _LIBC
> const unsigned char *collseqmb;
> - const char *collseqwc;
> + const char *collseqwc = NULL;
> uint32_t nrules;
> - int32_t table_size;
> - const int32_t *symb_table;
> - const unsigned char *extra;
> -
> - /* Local function for parse_bracket_exp used in _LIBC environment.
> - Seek the collating symbol entry corresponding to NAME.
> - Return the index of the symbol in the SYMB_TABLE,
> - or -1 if not found. */
> -
> - auto inline int32_t
> - __attribute__ ((always_inline))
> - seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
> - {
> - int32_t elem;
> -
> - for (elem = 0; elem < table_size; elem++)
> - if (symb_table[2 * elem] != 0)
> - {
> - int32_t idx = symb_table[2 * elem + 1];
> - /* Skip the name of collating element name. */
> - idx += 1 + extra[idx];
> - if (/* Compare the length of the name. */
> - name_len == extra[idx]
> - /* Compare the name. */
> - && memcmp (name, &extra[idx + 1], name_len) == 0)
> - /* Yep, this is the entry. */
> - return elem;
> - }
> - return -1;
> - }
> -
> - /* Local function for parse_bracket_exp used in _LIBC environment.
> - Look up the collation sequence value of BR_ELEM.
> - Return the value if succeeded, UINT_MAX otherwise. */
> -
> - auto inline unsigned int
> - __attribute__ ((always_inline))
> - lookup_collation_sequence_value (bracket_elem_t *br_elem)
> - {
> - if (br_elem->type == SB_CHAR)
> - {
> - /*
> - if (MB_CUR_MAX == 1)
> - */
> - if (nrules == 0)
> - return collseqmb[br_elem->opr.ch];
> - else
> - {
> - wint_t wc = __btowc (br_elem->opr.ch);
> - return __collseq_table_lookup (collseqwc, wc);
> - }
> - }
> - else if (br_elem->type == MB_CHAR)
> - {
> - if (nrules != 0)
> - return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
> - }
> - else if (br_elem->type == COLL_SYM)
> - {
> - size_t sym_name_len = strlen ((char *) br_elem->opr.name);
> - if (nrules != 0)
> - {
> - int32_t elem, idx;
> - elem = seek_collating_symbol_entry (br_elem->opr.name,
> - sym_name_len);
> - if (elem != -1)
> - {
> - /* We found the entry. */
> - idx = symb_table[2 * elem + 1];
> - /* Skip the name of collating element name. */
> - idx += 1 + extra[idx];
> - /* Skip the byte sequence of the collating element. */
> - idx += 1 + extra[idx];
> - /* Adjust for the alignment. */
> - idx = (idx + 3) & ~3;
> - /* Skip the multibyte collation sequence value. */
> - idx += sizeof (unsigned int);
> - /* Skip the wide char sequence of the collating element. */
> - idx += sizeof (unsigned int) *
> - (1 + *(unsigned int *) (extra + idx));
> - /* Return the collation sequence value. */
> - return *(unsigned int *) (extra + idx);
> - }
> - else if (sym_name_len == 1)
> - {
> - /* No valid character. Match it as a single byte
> - character. */
> - return collseqmb[br_elem->opr.name[0]];
> - }
> - }
> - else if (sym_name_len == 1)
> - return collseqmb[br_elem->opr.name[0]];
> - }
> - return UINT_MAX;
> - }
> -
> - /* Local function for parse_bracket_exp used in _LIBC environment.
> - Build the range expression which starts from START_ELEM, and ends
> - at END_ELEM. The result are written to MBCSET and SBCSET.
> - RANGE_ALLOC is the allocated size of mbcset->range_starts, and
> - mbcset->range_ends, is a pointer argument since we may
> - update it. */
> -
> - auto inline reg_errcode_t
> - __attribute__ ((always_inline))
> - build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
> - bracket_elem_t *start_elem, bracket_elem_t *end_elem)
> - {
> - unsigned int ch;
> - uint32_t start_collseq;
> - uint32_t end_collseq;
> -
> - /* Equivalence Classes and Character Classes can't be a range
> - start/end. */
> - if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
> - || start_elem->type == CHAR_CLASS
> - || end_elem->type == EQUIV_CLASS
> - || end_elem->type == CHAR_CLASS))
> - return REG_ERANGE;
> -
> - /* FIXME: Implement rational ranges here, too. */
> - start_collseq = lookup_collation_sequence_value (start_elem);
> - end_collseq = lookup_collation_sequence_value (end_elem);
> - /* Check start/end collation sequence values. */
> - if (__glibc_unlikely (start_collseq == UINT_MAX
> - || end_collseq == UINT_MAX))
> - return REG_ECOLLATE;
> - if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
> - && start_collseq > end_collseq))
> - return REG_ERANGE;
> -
> - /* Got valid collation sequence values, add them as a new entry.
> - However, if we have no collation elements, and the character set
> - is single byte, the single byte character set that we
> - build below suffices. */
> - if (nrules > 0 || dfa->mb_cur_max > 1)
> - {
> - /* Check the space of the arrays. */
> - if (__glibc_unlikely (*range_alloc == mbcset->nranges))
> - {
> - /* There is not enough space, need realloc. */
> - uint32_t *new_array_start;
> - uint32_t *new_array_end;
> - Idx new_nranges;
> -
> - /* +1 in case of mbcset->nranges is 0. */
> - new_nranges = 2 * mbcset->nranges + 1;
> - new_array_start = re_realloc (mbcset->range_starts, uint32_t,
> - new_nranges);
> - new_array_end = re_realloc (mbcset->range_ends, uint32_t,
> - new_nranges);
> -
> - if (__glibc_unlikely (new_array_start == NULL
> - || new_array_end == NULL))
> - return REG_ESPACE;
> -
> - mbcset->range_starts = new_array_start;
> - mbcset->range_ends = new_array_end;
> - *range_alloc = new_nranges;
> - }
> -
> - mbcset->range_starts[mbcset->nranges] = start_collseq;
> - mbcset->range_ends[mbcset->nranges++] = end_collseq;
> - }
> -
> - /* Build the table for single byte characters. */
> - for (ch = 0; ch < SBC_MAX; ch++)
> - {
> - uint32_t ch_collseq;
> - /*
> - if (MB_CUR_MAX == 1)
> - */
> - if (nrules == 0)
> - ch_collseq = collseqmb[ch];
> - else
> - ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
> - if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
> - bitset_set (sbcset, ch);
> - }
> - return REG_NOERROR;
> - }
> -
> - /* Local function for parse_bracket_exp used in _LIBC environment.
> - Build the collating element which is represented by NAME.
> - The result are written to MBCSET and SBCSET.
> - COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
> - pointer argument since we may update it. */
> -
> - auto inline reg_errcode_t
> - __attribute__ ((always_inline))
> - build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
> - Idx *coll_sym_alloc, const unsigned char *name)
> - {
> - int32_t elem, idx;
> - size_t name_len = strlen ((const char *) name);
> - if (nrules != 0)
> - {
> - elem = seek_collating_symbol_entry (name, name_len);
> - if (elem != -1)
> - {
> - /* We found the entry. */
> - idx = symb_table[2 * elem + 1];
> - /* Skip the name of collating element name. */
> - idx += 1 + extra[idx];
> - }
> - else if (name_len == 1)
> - {
> - /* No valid character, treat it as a normal
> - character. */
> - bitset_set (sbcset, name[0]);
> - return REG_NOERROR;
> - }
> - else
> - return REG_ECOLLATE;
> -
> - /* Got valid collation sequence, add it as a new entry. */
> - /* Check the space of the arrays. */
> - if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
> - {
> - /* Not enough, realloc it. */
> - /* +1 in case of mbcset->ncoll_syms is 0. */
> - Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
> - /* Use realloc since mbcset->coll_syms is NULL
> - if *alloc == 0. */
> - int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
> - new_coll_sym_alloc);
> - if (__glibc_unlikely (new_coll_syms == NULL))
> - return REG_ESPACE;
> - mbcset->coll_syms = new_coll_syms;
> - *coll_sym_alloc = new_coll_sym_alloc;
> - }
> - mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
> - return REG_NOERROR;
> - }
> - else
> - {
> - if (__glibc_unlikely (name_len != 1))
> - return REG_ECOLLATE;
> - else
> - {
> - bitset_set (sbcset, name[0]);
> - return REG_NOERROR;
> - }
> - }
> - }
> + int32_t table_size = 0;
> + const int32_t *symb_table = NULL;
> + const unsigned char *extra = NULL;
> #endif
>
> re_token_t br_token;
> @@ -3230,7 +3245,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>
> #ifdef _LIBC
> *err = build_range_exp (sbcset, mbcset, &range_alloc,
> - &start_elem, &end_elem);
> + &start_elem, &end_elem,
> + dfa, syntax, nrules, collseqmb, collseqwc,
> + table_size, symb_table, extra);
> #else
> # ifdef RE_ENABLE_I18N
> *err = build_range_exp (syntax, sbcset,
> @@ -3283,7 +3300,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
> #ifdef RE_ENABLE_I18N
> mbcset, &coll_sym_alloc,
> #endif /* RE_ENABLE_I18N */
> - start_elem.opr.name);
> + start_elem.opr.name,
> + nrules, table_size, symb_table, extra);
> if (__glibc_unlikely (*err != REG_NOERROR))
> goto parse_bracket_exp_free_return;
> break;
>
On 11/2/21 08:15, Adhemerval Zanella wrote:
>
>
> On 02/11/2021 02:45, Fangrui Song via Libc-alpha wrote:
>> This refactor moves four functions out of a nested scope and converts
>> them into static always_inline functions. collseqwc, table_size,
>> symb_table, extra are now initialized to zero because they are passed as
>> function arguments.
>>
>> On x86-64, .text is 16 byte larger likely due to the 4 stores.
>> This is nothing compared to the amount of work that regcomp has to do
>> looking up the collation weights, or other functions.
>>
>> If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
>> this patch removes the last `auto inline` usage from glibc.
>
> LGTM, thanks. Carlos might want to ack as well.
>
> Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
LGTM. Thanks!
I think that keeping always_inline is the right thing to do in the context of
a refactor.
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
>> ---
>> v1 -> v2:
>> * Update commit message
>> ---
>> posix/regcomp.c | 514 +++++++++++++++++++++++++-----------------------
>> 1 file changed, 266 insertions(+), 248 deletions(-)
>>
>> diff --git a/posix/regcomp.c b/posix/regcomp.c
>> index 887e5b5068..e2b5c2807a 100644
>> --- a/posix/regcomp.c
>> +++ b/posix/regcomp.c
>> @@ -2831,6 +2831,263 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
>> }
>> #endif /* not _LIBC */
>>
>> +#ifdef _LIBC
>> +/* Local function for parse_bracket_exp used in _LIBC environment.
>> + Seek the collating symbol entry corresponding to NAME.
>> + Return the index of the symbol in the SYMB_TABLE,
>> + or -1 if not found. */
>> +
>> +static inline int32_t
>> +__attribute__ ((always_inline))
>> +seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
>> + const int32_t *symb_table, int32_t table_size,
>> + const unsigned char *extra)
>> +{
>> + int32_t elem;
>> +
>> + for (elem = 0; elem < table_size; elem++)
>> + if (symb_table[2 * elem] != 0)
>> + {
>> + int32_t idx = symb_table[2 * elem + 1];
>> + /* Skip the name of collating element name. */
>> + idx += 1 + extra[idx];
>> + if (/* Compare the length of the name. */
>> + name_len == extra[idx]
>> + /* Compare the name. */
>> + && memcmp (name, &extra[idx + 1], name_len) == 0)
>> + /* Yep, this is the entry. */
>> + return elem;
>> + }
>> + return -1;
>> +}
>> +
>
> Ok (I would let the compiler optimize it, but __attribute__ ((always_inline))
> at least tries to mimic what nested function does).
>
>> +/* Local function for parse_bracket_exp used in _LIBC environment.
>> + Look up the collation sequence value of BR_ELEM.
>> + Return the value if succeeded, UINT_MAX otherwise. */
>> +
>> +static inline unsigned int
>> +__attribute__ ((always_inline))
>> +lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
>> + const unsigned char *collseqmb,
>> + const char *collseqwc, int32_t table_size,
>> + const int32_t *symb_table,
>> + const unsigned char *extra)
>> +{
>> + if (br_elem->type == SB_CHAR)
>> + {
>> + /* if (MB_CUR_MAX == 1) */
>> + if (nrules == 0)
>> + return collseqmb[br_elem->opr.ch];
>> + else
>> + {
>> + wint_t wc = __btowc (br_elem->opr.ch);
>> + return __collseq_table_lookup (collseqwc, wc);
>> + }
>> + }
>> + else if (br_elem->type == MB_CHAR)
>> + {
>> + if (nrules != 0)
>> + return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
>> + }
>> + else if (br_elem->type == COLL_SYM)
>> + {
>> + size_t sym_name_len = strlen ((char *) br_elem->opr.name);
>> + if (nrules != 0)
>> + {
>> + int32_t elem, idx;
>> + elem = seek_collating_symbol_entry (br_elem->opr.name,
>> + sym_name_len,
>> + symb_table, table_size,
>> + extra);
>> + if (elem != -1)
>> + {
>> + /* We found the entry. */
>> + idx = symb_table[2 * elem + 1];
>> + /* Skip the name of collating element name. */
>> + idx += 1 + extra[idx];
>> + /* Skip the byte sequence of the collating element. */
>> + idx += 1 + extra[idx];
>> + /* Adjust for the alignment. */
>> + idx = (idx + 3) & ~3;
>> + /* Skip the multibyte collation sequence value. */
>> + idx += sizeof (unsigned int);
>> + /* Skip the wide char sequence of the collating element. */
>> + idx += sizeof (unsigned int) *
>> + (1 + *(unsigned int *) (extra + idx));
>> + /* Return the collation sequence value. */
>> + return *(unsigned int *) (extra + idx);
>> + }
>> + else if (sym_name_len == 1)
>> + {
>> + /* No valid character. Match it as a single byte
>> + character. */
>> + return collseqmb[br_elem->opr.name[0]];
>> + }
>> + }
>> + else if (sym_name_len == 1)
>> + return collseqmb[br_elem->opr.name[0]];
>> + }
>> + return UINT_MAX;
>> +}
>> +
>> +/* Local function for parse_bracket_exp used in _LIBC environment.
>> + Build the range expression which starts from START_ELEM, and ends
>> + at END_ELEM. The result are written to MBCSET and SBCSET.
>> + RANGE_ALLOC is the allocated size of mbcset->range_starts, and
>> + mbcset->range_ends, is a pointer argument since we may
>> + update it. */
>> +
>> +static inline reg_errcode_t
>> +__attribute__ ((always_inline))
>> +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
>> + bracket_elem_t *start_elem, bracket_elem_t *end_elem,
>> + re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
>> + const unsigned char *collseqmb, const char *collseqwc,
>> + int32_t table_size, const int32_t *symb_table,
>> + const unsigned char *extra)
>> +{
>> + unsigned int ch;
>> + uint32_t start_collseq;
>> + uint32_t end_collseq;
>> +
>> + /* Equivalence Classes and Character Classes can't be a range
>> + start/end. */
>> + if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
>> + || start_elem->type == CHAR_CLASS
>> + || end_elem->type == EQUIV_CLASS
>> + || end_elem->type == CHAR_CLASS))
>> + return REG_ERANGE;
>> +
>> + /* FIXME: Implement rational ranges here, too. */
>> + start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
>> + table_size, symb_table, extra);
>> + end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
>> + table_size, symb_table, extra);
>> + /* Check start/end collation sequence values. */
>> + if (__glibc_unlikely (start_collseq == UINT_MAX
>> + || end_collseq == UINT_MAX))
>> + return REG_ECOLLATE;
>> + if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
>> + && start_collseq > end_collseq))
>> + return REG_ERANGE;
>> +
>> + /* Got valid collation sequence values, add them as a new entry.
>> + However, if we have no collation elements, and the character set
>> + is single byte, the single byte character set that we
>> + build below suffices. */
>> + if (nrules > 0 || dfa->mb_cur_max > 1)
>> + {
>> + /* Check the space of the arrays. */
>> + if (__glibc_unlikely (*range_alloc == mbcset->nranges))
>> + {
>> + /* There is not enough space, need realloc. */
>> + uint32_t *new_array_start;
>> + uint32_t *new_array_end;
>> + int new_nranges;
>> +
>> + /* +1 in case of mbcset->nranges is 0. */
>> + new_nranges = 2 * mbcset->nranges + 1;
>> + new_array_start = re_realloc (mbcset->range_starts, uint32_t,
>> + new_nranges);
>> + new_array_end = re_realloc (mbcset->range_ends, uint32_t,
>> + new_nranges);
>> +
>> + if (__glibc_unlikely (new_array_start == NULL
>> + || new_array_end == NULL))
>> + return REG_ESPACE;
>> +
>> + mbcset->range_starts = new_array_start;
>> + mbcset->range_ends = new_array_end;
>> + *range_alloc = new_nranges;
>> + }
>> +
>> + mbcset->range_starts[mbcset->nranges] = start_collseq;
>> + mbcset->range_ends[mbcset->nranges++] = end_collseq;
>> + }
>> +
>> + /* Build the table for single byte characters. */
>> + for (ch = 0; ch < SBC_MAX; ch++)
>> + {
>> + uint32_t ch_collseq;
>> + /* if (MB_CUR_MAX == 1) */
>> + if (nrules == 0)
>> + ch_collseq = collseqmb[ch];
>> + else
>> + ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
>> + if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
>> + bitset_set (sbcset, ch);
>> + }
>> + return REG_NOERROR;
>> +}
>> +
>> +/* Local function for parse_bracket_exp used in _LIBC environment.
>> + Build the collating element which is represented by NAME.
>> + The result are written to MBCSET and SBCSET.
>> + COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
>> + pointer argument since we may update it. */
>> +
>> +static inline reg_errcode_t
>> +__attribute__ ((always_inline))
>> +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
>> + int *coll_sym_alloc, const unsigned char *name,
>> + uint32_t nrules, int32_t table_size,
>> + const int32_t *symb_table, const unsigned char *extra)
>> +{
>> + int32_t elem, idx;
>> + size_t name_len = strlen ((const char *) name);
>> + if (nrules != 0)
>> + {
>> + elem = seek_collating_symbol_entry (name, name_len, symb_table,
>> + table_size, extra);
>> + if (elem != -1)
>> + {
>> + /* We found the entry. */
>> + idx = symb_table[2 * elem + 1];
>> + /* Skip the name of collating element name. */
>> + idx += 1 + extra[idx];
>> + }
>> + else if (name_len == 1)
>> + {
>> + /* No valid character, treat it as a normal
>> + character. */
>> + bitset_set (sbcset, name[0]);
>> + return REG_NOERROR;
>> + }
>> + else
>> + return REG_ECOLLATE;
>> +
>> + /* Got valid collation sequence, add it as a new entry. */
>> + /* Check the space of the arrays. */
>> + if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
>> + {
>> + /* Not enough, realloc it. */
>> + /* +1 in case of mbcset->ncoll_syms is 0. */
>> + int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
>> + /* Use realloc since mbcset->coll_syms is NULL
>> + if *alloc == 0. */
>> + int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
>> + new_coll_sym_alloc);
>> + if (__glibc_unlikely (new_coll_syms == NULL))
>> + return REG_ESPACE;
>> + mbcset->coll_syms = new_coll_syms;
>> + *coll_sym_alloc = new_coll_sym_alloc;
>> + }
>> + mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
>> + return REG_NOERROR;
>> + }
>> + else
>> + {
>> + if (__glibc_unlikely (name_len != 1))
>> + return REG_ECOLLATE;
>> + else
>> + {
>> + bitset_set (sbcset, name[0]);
>> + return REG_NOERROR;
>> + }
>> + }
>> +}
>> +#endif /* _LIBC */
>> +
>> /* This function parse bracket expression like "[abc]", "[a-c]",
>> "[[.a-a.]]" etc. */
>>
>> @@ -2840,253 +3097,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>> {
>> #ifdef _LIBC
>> const unsigned char *collseqmb;
>> - const char *collseqwc;
>> + const char *collseqwc = NULL;
>> uint32_t nrules;
>> - int32_t table_size;
>> - const int32_t *symb_table;
>> - const unsigned char *extra;
>> -
>> - /* Local function for parse_bracket_exp used in _LIBC environment.
>> - Seek the collating symbol entry corresponding to NAME.
>> - Return the index of the symbol in the SYMB_TABLE,
>> - or -1 if not found. */
>> -
>> - auto inline int32_t
>> - __attribute__ ((always_inline))
>> - seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
>> - {
>> - int32_t elem;
>> -
>> - for (elem = 0; elem < table_size; elem++)
>> - if (symb_table[2 * elem] != 0)
>> - {
>> - int32_t idx = symb_table[2 * elem + 1];
>> - /* Skip the name of collating element name. */
>> - idx += 1 + extra[idx];
>> - if (/* Compare the length of the name. */
>> - name_len == extra[idx]
>> - /* Compare the name. */
>> - && memcmp (name, &extra[idx + 1], name_len) == 0)
>> - /* Yep, this is the entry. */
>> - return elem;
>> - }
>> - return -1;
>> - }
>> -
>> - /* Local function for parse_bracket_exp used in _LIBC environment.
>> - Look up the collation sequence value of BR_ELEM.
>> - Return the value if succeeded, UINT_MAX otherwise. */
>> -
>> - auto inline unsigned int
>> - __attribute__ ((always_inline))
>> - lookup_collation_sequence_value (bracket_elem_t *br_elem)
>> - {
>> - if (br_elem->type == SB_CHAR)
>> - {
>> - /*
>> - if (MB_CUR_MAX == 1)
>> - */
>> - if (nrules == 0)
>> - return collseqmb[br_elem->opr.ch];
>> - else
>> - {
>> - wint_t wc = __btowc (br_elem->opr.ch);
>> - return __collseq_table_lookup (collseqwc, wc);
>> - }
>> - }
>> - else if (br_elem->type == MB_CHAR)
>> - {
>> - if (nrules != 0)
>> - return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
>> - }
>> - else if (br_elem->type == COLL_SYM)
>> - {
>> - size_t sym_name_len = strlen ((char *) br_elem->opr.name);
>> - if (nrules != 0)
>> - {
>> - int32_t elem, idx;
>> - elem = seek_collating_symbol_entry (br_elem->opr.name,
>> - sym_name_len);
>> - if (elem != -1)
>> - {
>> - /* We found the entry. */
>> - idx = symb_table[2 * elem + 1];
>> - /* Skip the name of collating element name. */
>> - idx += 1 + extra[idx];
>> - /* Skip the byte sequence of the collating element. */
>> - idx += 1 + extra[idx];
>> - /* Adjust for the alignment. */
>> - idx = (idx + 3) & ~3;
>> - /* Skip the multibyte collation sequence value. */
>> - idx += sizeof (unsigned int);
>> - /* Skip the wide char sequence of the collating element. */
>> - idx += sizeof (unsigned int) *
>> - (1 + *(unsigned int *) (extra + idx));
>> - /* Return the collation sequence value. */
>> - return *(unsigned int *) (extra + idx);
>> - }
>> - else if (sym_name_len == 1)
>> - {
>> - /* No valid character. Match it as a single byte
>> - character. */
>> - return collseqmb[br_elem->opr.name[0]];
>> - }
>> - }
>> - else if (sym_name_len == 1)
>> - return collseqmb[br_elem->opr.name[0]];
>> - }
>> - return UINT_MAX;
>> - }
>> -
>> - /* Local function for parse_bracket_exp used in _LIBC environment.
>> - Build the range expression which starts from START_ELEM, and ends
>> - at END_ELEM. The result are written to MBCSET and SBCSET.
>> - RANGE_ALLOC is the allocated size of mbcset->range_starts, and
>> - mbcset->range_ends, is a pointer argument since we may
>> - update it. */
>> -
>> - auto inline reg_errcode_t
>> - __attribute__ ((always_inline))
>> - build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
>> - bracket_elem_t *start_elem, bracket_elem_t *end_elem)
>> - {
>> - unsigned int ch;
>> - uint32_t start_collseq;
>> - uint32_t end_collseq;
>> -
>> - /* Equivalence Classes and Character Classes can't be a range
>> - start/end. */
>> - if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
>> - || start_elem->type == CHAR_CLASS
>> - || end_elem->type == EQUIV_CLASS
>> - || end_elem->type == CHAR_CLASS))
>> - return REG_ERANGE;
>> -
>> - /* FIXME: Implement rational ranges here, too. */
>> - start_collseq = lookup_collation_sequence_value (start_elem);
>> - end_collseq = lookup_collation_sequence_value (end_elem);
>> - /* Check start/end collation sequence values. */
>> - if (__glibc_unlikely (start_collseq == UINT_MAX
>> - || end_collseq == UINT_MAX))
>> - return REG_ECOLLATE;
>> - if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
>> - && start_collseq > end_collseq))
>> - return REG_ERANGE;
>> -
>> - /* Got valid collation sequence values, add them as a new entry.
>> - However, if we have no collation elements, and the character set
>> - is single byte, the single byte character set that we
>> - build below suffices. */
>> - if (nrules > 0 || dfa->mb_cur_max > 1)
>> - {
>> - /* Check the space of the arrays. */
>> - if (__glibc_unlikely (*range_alloc == mbcset->nranges))
>> - {
>> - /* There is not enough space, need realloc. */
>> - uint32_t *new_array_start;
>> - uint32_t *new_array_end;
>> - Idx new_nranges;
>> -
>> - /* +1 in case of mbcset->nranges is 0. */
>> - new_nranges = 2 * mbcset->nranges + 1;
>> - new_array_start = re_realloc (mbcset->range_starts, uint32_t,
>> - new_nranges);
>> - new_array_end = re_realloc (mbcset->range_ends, uint32_t,
>> - new_nranges);
>> -
>> - if (__glibc_unlikely (new_array_start == NULL
>> - || new_array_end == NULL))
>> - return REG_ESPACE;
>> -
>> - mbcset->range_starts = new_array_start;
>> - mbcset->range_ends = new_array_end;
>> - *range_alloc = new_nranges;
>> - }
>> -
>> - mbcset->range_starts[mbcset->nranges] = start_collseq;
>> - mbcset->range_ends[mbcset->nranges++] = end_collseq;
>> - }
>> -
>> - /* Build the table for single byte characters. */
>> - for (ch = 0; ch < SBC_MAX; ch++)
>> - {
>> - uint32_t ch_collseq;
>> - /*
>> - if (MB_CUR_MAX == 1)
>> - */
>> - if (nrules == 0)
>> - ch_collseq = collseqmb[ch];
>> - else
>> - ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
>> - if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
>> - bitset_set (sbcset, ch);
>> - }
>> - return REG_NOERROR;
>> - }
>> -
>> - /* Local function for parse_bracket_exp used in _LIBC environment.
>> - Build the collating element which is represented by NAME.
>> - The result are written to MBCSET and SBCSET.
>> - COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
>> - pointer argument since we may update it. */
>> -
>> - auto inline reg_errcode_t
>> - __attribute__ ((always_inline))
>> - build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
>> - Idx *coll_sym_alloc, const unsigned char *name)
>> - {
>> - int32_t elem, idx;
>> - size_t name_len = strlen ((const char *) name);
>> - if (nrules != 0)
>> - {
>> - elem = seek_collating_symbol_entry (name, name_len);
>> - if (elem != -1)
>> - {
>> - /* We found the entry. */
>> - idx = symb_table[2 * elem + 1];
>> - /* Skip the name of collating element name. */
>> - idx += 1 + extra[idx];
>> - }
>> - else if (name_len == 1)
>> - {
>> - /* No valid character, treat it as a normal
>> - character. */
>> - bitset_set (sbcset, name[0]);
>> - return REG_NOERROR;
>> - }
>> - else
>> - return REG_ECOLLATE;
>> -
>> - /* Got valid collation sequence, add it as a new entry. */
>> - /* Check the space of the arrays. */
>> - if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
>> - {
>> - /* Not enough, realloc it. */
>> - /* +1 in case of mbcset->ncoll_syms is 0. */
>> - Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
>> - /* Use realloc since mbcset->coll_syms is NULL
>> - if *alloc == 0. */
>> - int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
>> - new_coll_sym_alloc);
>> - if (__glibc_unlikely (new_coll_syms == NULL))
>> - return REG_ESPACE;
>> - mbcset->coll_syms = new_coll_syms;
>> - *coll_sym_alloc = new_coll_sym_alloc;
>> - }
>> - mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
>> - return REG_NOERROR;
>> - }
>> - else
>> - {
>> - if (__glibc_unlikely (name_len != 1))
>> - return REG_ECOLLATE;
>> - else
>> - {
>> - bitset_set (sbcset, name[0]);
>> - return REG_NOERROR;
>> - }
>> - }
>> - }
>> + int32_t table_size = 0;
>> + const int32_t *symb_table = NULL;
>> + const unsigned char *extra = NULL;
>> #endif
>>
>> re_token_t br_token;
>> @@ -3230,7 +3245,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>>
>> #ifdef _LIBC
>> *err = build_range_exp (sbcset, mbcset, &range_alloc,
>> - &start_elem, &end_elem);
>> + &start_elem, &end_elem,
>> + dfa, syntax, nrules, collseqmb, collseqwc,
>> + table_size, symb_table, extra);
>> #else
>> # ifdef RE_ENABLE_I18N
>> *err = build_range_exp (syntax, sbcset,
>> @@ -3283,7 +3300,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>> #ifdef RE_ENABLE_I18N
>> mbcset, &coll_sym_alloc,
>> #endif /* RE_ENABLE_I18N */
>> - start_elem.opr.name);
>> + start_elem.opr.name,
>> + nrules, table_size, symb_table, extra);
>> if (__glibc_unlikely (*err != REG_NOERROR))
>> goto parse_bracket_exp_free_return;
>> break;
>>
>
@@ -2831,6 +2831,263 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
}
#endif /* not _LIBC */
+#ifdef _LIBC
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Seek the collating symbol entry corresponding to NAME.
+ Return the index of the symbol in the SYMB_TABLE,
+ or -1 if not found. */
+
+static inline int32_t
+__attribute__ ((always_inline))
+seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
+ const int32_t *symb_table, int32_t table_size,
+ const unsigned char *extra)
+{
+ int32_t elem;
+
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ int32_t idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ if (/* Compare the length of the name. */
+ name_len == extra[idx]
+ /* Compare the name. */
+ && memcmp (name, &extra[idx + 1], name_len) == 0)
+ /* Yep, this is the entry. */
+ return elem;
+ }
+ return -1;
+}
+
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Look up the collation sequence value of BR_ELEM.
+ Return the value if succeeded, UINT_MAX otherwise. */
+
+static inline unsigned int
+__attribute__ ((always_inline))
+lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
+ const unsigned char *collseqmb,
+ const char *collseqwc, int32_t table_size,
+ const int32_t *symb_table,
+ const unsigned char *extra)
+{
+ if (br_elem->type == SB_CHAR)
+ {
+ /* if (MB_CUR_MAX == 1) */
+ if (nrules == 0)
+ return collseqmb[br_elem->opr.ch];
+ else
+ {
+ wint_t wc = __btowc (br_elem->opr.ch);
+ return __collseq_table_lookup (collseqwc, wc);
+ }
+ }
+ else if (br_elem->type == MB_CHAR)
+ {
+ if (nrules != 0)
+ return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+ }
+ else if (br_elem->type == COLL_SYM)
+ {
+ size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+ if (nrules != 0)
+ {
+ int32_t elem, idx;
+ elem = seek_collating_symbol_entry (br_elem->opr.name,
+ sym_name_len,
+ symb_table, table_size,
+ extra);
+ if (elem != -1)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ /* Skip the byte sequence of the collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+ /* Skip the multibyte collation sequence value. */
+ idx += sizeof (unsigned int);
+ /* Skip the wide char sequence of the collating element. */
+ idx += sizeof (unsigned int) *
+ (1 + *(unsigned int *) (extra + idx));
+ /* Return the collation sequence value. */
+ return *(unsigned int *) (extra + idx);
+ }
+ else if (sym_name_len == 1)
+ {
+ /* No valid character. Match it as a single byte
+ character. */
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ }
+ else if (sym_name_len == 1)
+ return collseqmb[br_elem->opr.name[0]];
+ }
+ return UINT_MAX;
+}
+
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Build the range expression which starts from START_ELEM, and ends
+ at END_ELEM. The result are written to MBCSET and SBCSET.
+ RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+ mbcset->range_ends, is a pointer argument since we may
+ update it. */
+
+static inline reg_errcode_t
+__attribute__ ((always_inline))
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem,
+ re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
+ const unsigned char *collseqmb, const char *collseqwc,
+ int32_t table_size, const int32_t *symb_table,
+ const unsigned char *extra)
+{
+ unsigned int ch;
+ uint32_t start_collseq;
+ uint32_t end_collseq;
+
+ /* Equivalence Classes and Character Classes can't be a range
+ start/end. */
+ if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
+ || start_elem->type == CHAR_CLASS
+ || end_elem->type == EQUIV_CLASS
+ || end_elem->type == CHAR_CLASS))
+ return REG_ERANGE;
+
+ /* FIXME: Implement rational ranges here, too. */
+ start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
+ table_size, symb_table, extra);
+ end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
+ table_size, symb_table, extra);
+ /* Check start/end collation sequence values. */
+ if (__glibc_unlikely (start_collseq == UINT_MAX
+ || end_collseq == UINT_MAX))
+ return REG_ECOLLATE;
+ if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
+ && start_collseq > end_collseq))
+ return REG_ERANGE;
+
+ /* Got valid collation sequence values, add them as a new entry.
+ However, if we have no collation elements, and the character set
+ is single byte, the single byte character set that we
+ build below suffices. */
+ if (nrules > 0 || dfa->mb_cur_max > 1)
+ {
+ /* Check the space of the arrays. */
+ if (__glibc_unlikely (*range_alloc == mbcset->nranges))
+ {
+ /* There is not enough space, need realloc. */
+ uint32_t *new_array_start;
+ uint32_t *new_array_end;
+ int new_nranges;
+
+ /* +1 in case of mbcset->nranges is 0. */
+ new_nranges = 2 * mbcset->nranges + 1;
+ new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+ new_nranges);
+ new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+ new_nranges);
+
+ if (__glibc_unlikely (new_array_start == NULL
+ || new_array_end == NULL))
+ return REG_ESPACE;
+
+ mbcset->range_starts = new_array_start;
+ mbcset->range_ends = new_array_end;
+ *range_alloc = new_nranges;
+ }
+
+ mbcset->range_starts[mbcset->nranges] = start_collseq;
+ mbcset->range_ends[mbcset->nranges++] = end_collseq;
+ }
+
+ /* Build the table for single byte characters. */
+ for (ch = 0; ch < SBC_MAX; ch++)
+ {
+ uint32_t ch_collseq;
+ /* if (MB_CUR_MAX == 1) */
+ if (nrules == 0)
+ ch_collseq = collseqmb[ch];
+ else
+ ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+ if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+ bitset_set (sbcset, ch);
+ }
+ return REG_NOERROR;
+}
+
+/* Local function for parse_bracket_exp used in _LIBC environment.
+ Build the collating element which is represented by NAME.
+ The result are written to MBCSET and SBCSET.
+ COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+ pointer argument since we may update it. */
+
+static inline reg_errcode_t
+__attribute__ ((always_inline))
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+ int *coll_sym_alloc, const unsigned char *name,
+ uint32_t nrules, int32_t table_size,
+ const int32_t *symb_table, const unsigned char *extra)
+{
+ int32_t elem, idx;
+ size_t name_len = strlen ((const char *) name);
+ if (nrules != 0)
+ {
+ elem = seek_collating_symbol_entry (name, name_len, symb_table,
+ table_size, extra);
+ if (elem != -1)
+ {
+ /* We found the entry. */
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element name. */
+ idx += 1 + extra[idx];
+ }
+ else if (name_len == 1)
+ {
+ /* No valid character, treat it as a normal
+ character. */
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ else
+ return REG_ECOLLATE;
+
+ /* Got valid collation sequence, add it as a new entry. */
+ /* Check the space of the arrays. */
+ if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
+ {
+ /* Not enough, realloc it. */
+ /* +1 in case of mbcset->ncoll_syms is 0. */
+ int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+ /* Use realloc since mbcset->coll_syms is NULL
+ if *alloc == 0. */
+ int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+ new_coll_sym_alloc);
+ if (__glibc_unlikely (new_coll_syms == NULL))
+ return REG_ESPACE;
+ mbcset->coll_syms = new_coll_syms;
+ *coll_sym_alloc = new_coll_sym_alloc;
+ }
+ mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+ return REG_NOERROR;
+ }
+ else
+ {
+ if (__glibc_unlikely (name_len != 1))
+ return REG_ECOLLATE;
+ else
+ {
+ bitset_set (sbcset, name[0]);
+ return REG_NOERROR;
+ }
+ }
+}
+#endif /* _LIBC */
+
/* This function parse bracket expression like "[abc]", "[a-c]",
"[[.a-a.]]" etc. */
@@ -2840,253 +3097,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
{
#ifdef _LIBC
const unsigned char *collseqmb;
- const char *collseqwc;
+ const char *collseqwc = NULL;
uint32_t nrules;
- int32_t table_size;
- const int32_t *symb_table;
- const unsigned char *extra;
-
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Seek the collating symbol entry corresponding to NAME.
- Return the index of the symbol in the SYMB_TABLE,
- or -1 if not found. */
-
- auto inline int32_t
- __attribute__ ((always_inline))
- seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
- {
- int32_t elem;
-
- for (elem = 0; elem < table_size; elem++)
- if (symb_table[2 * elem] != 0)
- {
- int32_t idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- if (/* Compare the length of the name. */
- name_len == extra[idx]
- /* Compare the name. */
- && memcmp (name, &extra[idx + 1], name_len) == 0)
- /* Yep, this is the entry. */
- return elem;
- }
- return -1;
- }
-
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Look up the collation sequence value of BR_ELEM.
- Return the value if succeeded, UINT_MAX otherwise. */
-
- auto inline unsigned int
- __attribute__ ((always_inline))
- lookup_collation_sequence_value (bracket_elem_t *br_elem)
- {
- if (br_elem->type == SB_CHAR)
- {
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- return collseqmb[br_elem->opr.ch];
- else
- {
- wint_t wc = __btowc (br_elem->opr.ch);
- return __collseq_table_lookup (collseqwc, wc);
- }
- }
- else if (br_elem->type == MB_CHAR)
- {
- if (nrules != 0)
- return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
- }
- else if (br_elem->type == COLL_SYM)
- {
- size_t sym_name_len = strlen ((char *) br_elem->opr.name);
- if (nrules != 0)
- {
- int32_t elem, idx;
- elem = seek_collating_symbol_entry (br_elem->opr.name,
- sym_name_len);
- if (elem != -1)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- /* Skip the byte sequence of the collating element. */
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~3;
- /* Skip the multibyte collation sequence value. */
- idx += sizeof (unsigned int);
- /* Skip the wide char sequence of the collating element. */
- idx += sizeof (unsigned int) *
- (1 + *(unsigned int *) (extra + idx));
- /* Return the collation sequence value. */
- return *(unsigned int *) (extra + idx);
- }
- else if (sym_name_len == 1)
- {
- /* No valid character. Match it as a single byte
- character. */
- return collseqmb[br_elem->opr.name[0]];
- }
- }
- else if (sym_name_len == 1)
- return collseqmb[br_elem->opr.name[0]];
- }
- return UINT_MAX;
- }
-
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Build the range expression which starts from START_ELEM, and ends
- at END_ELEM. The result are written to MBCSET and SBCSET.
- RANGE_ALLOC is the allocated size of mbcset->range_starts, and
- mbcset->range_ends, is a pointer argument since we may
- update it. */
-
- auto inline reg_errcode_t
- __attribute__ ((always_inline))
- build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
- bracket_elem_t *start_elem, bracket_elem_t *end_elem)
- {
- unsigned int ch;
- uint32_t start_collseq;
- uint32_t end_collseq;
-
- /* Equivalence Classes and Character Classes can't be a range
- start/end. */
- if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
- || start_elem->type == CHAR_CLASS
- || end_elem->type == EQUIV_CLASS
- || end_elem->type == CHAR_CLASS))
- return REG_ERANGE;
-
- /* FIXME: Implement rational ranges here, too. */
- start_collseq = lookup_collation_sequence_value (start_elem);
- end_collseq = lookup_collation_sequence_value (end_elem);
- /* Check start/end collation sequence values. */
- if (__glibc_unlikely (start_collseq == UINT_MAX
- || end_collseq == UINT_MAX))
- return REG_ECOLLATE;
- if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
- && start_collseq > end_collseq))
- return REG_ERANGE;
-
- /* Got valid collation sequence values, add them as a new entry.
- However, if we have no collation elements, and the character set
- is single byte, the single byte character set that we
- build below suffices. */
- if (nrules > 0 || dfa->mb_cur_max > 1)
- {
- /* Check the space of the arrays. */
- if (__glibc_unlikely (*range_alloc == mbcset->nranges))
- {
- /* There is not enough space, need realloc. */
- uint32_t *new_array_start;
- uint32_t *new_array_end;
- Idx new_nranges;
-
- /* +1 in case of mbcset->nranges is 0. */
- new_nranges = 2 * mbcset->nranges + 1;
- new_array_start = re_realloc (mbcset->range_starts, uint32_t,
- new_nranges);
- new_array_end = re_realloc (mbcset->range_ends, uint32_t,
- new_nranges);
-
- if (__glibc_unlikely (new_array_start == NULL
- || new_array_end == NULL))
- return REG_ESPACE;
-
- mbcset->range_starts = new_array_start;
- mbcset->range_ends = new_array_end;
- *range_alloc = new_nranges;
- }
-
- mbcset->range_starts[mbcset->nranges] = start_collseq;
- mbcset->range_ends[mbcset->nranges++] = end_collseq;
- }
-
- /* Build the table for single byte characters. */
- for (ch = 0; ch < SBC_MAX; ch++)
- {
- uint32_t ch_collseq;
- /*
- if (MB_CUR_MAX == 1)
- */
- if (nrules == 0)
- ch_collseq = collseqmb[ch];
- else
- ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
- if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
- bitset_set (sbcset, ch);
- }
- return REG_NOERROR;
- }
-
- /* Local function for parse_bracket_exp used in _LIBC environment.
- Build the collating element which is represented by NAME.
- The result are written to MBCSET and SBCSET.
- COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
- pointer argument since we may update it. */
-
- auto inline reg_errcode_t
- __attribute__ ((always_inline))
- build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
- Idx *coll_sym_alloc, const unsigned char *name)
- {
- int32_t elem, idx;
- size_t name_len = strlen ((const char *) name);
- if (nrules != 0)
- {
- elem = seek_collating_symbol_entry (name, name_len);
- if (elem != -1)
- {
- /* We found the entry. */
- idx = symb_table[2 * elem + 1];
- /* Skip the name of collating element name. */
- idx += 1 + extra[idx];
- }
- else if (name_len == 1)
- {
- /* No valid character, treat it as a normal
- character. */
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
- else
- return REG_ECOLLATE;
-
- /* Got valid collation sequence, add it as a new entry. */
- /* Check the space of the arrays. */
- if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
- {
- /* Not enough, realloc it. */
- /* +1 in case of mbcset->ncoll_syms is 0. */
- Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
- /* Use realloc since mbcset->coll_syms is NULL
- if *alloc == 0. */
- int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
- new_coll_sym_alloc);
- if (__glibc_unlikely (new_coll_syms == NULL))
- return REG_ESPACE;
- mbcset->coll_syms = new_coll_syms;
- *coll_sym_alloc = new_coll_sym_alloc;
- }
- mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
- return REG_NOERROR;
- }
- else
- {
- if (__glibc_unlikely (name_len != 1))
- return REG_ECOLLATE;
- else
- {
- bitset_set (sbcset, name[0]);
- return REG_NOERROR;
- }
- }
- }
+ int32_t table_size = 0;
+ const int32_t *symb_table = NULL;
+ const unsigned char *extra = NULL;
#endif
re_token_t br_token;
@@ -3230,7 +3245,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
#ifdef _LIBC
*err = build_range_exp (sbcset, mbcset, &range_alloc,
- &start_elem, &end_elem);
+ &start_elem, &end_elem,
+ dfa, syntax, nrules, collseqmb, collseqwc,
+ table_size, symb_table, extra);
#else
# ifdef RE_ENABLE_I18N
*err = build_range_exp (syntax, sbcset,
@@ -3283,7 +3300,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
#ifdef RE_ENABLE_I18N
mbcset, &coll_sym_alloc,
#endif /* RE_ENABLE_I18N */
- start_elem.opr.name);
+ start_elem.opr.name,
+ nrules, table_size, symb_table, extra);
if (__glibc_unlikely (*err != REG_NOERROR))
goto parse_bracket_exp_free_return;
break;