diff mbox series

regex: Unnest nested functions in regcomp.c

Message ID 20211027052959.2549214-1-maskray@google.com
State Superseded
Headers show
Series regex: Unnest nested functions in regcomp.c | expand

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Fāng-ruì Sòng Oct. 27, 2021, 5:29 a.m. UTC
If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
this patch removes the last `auto inline` usage and makes the file
buildable by Clang once we can resolve the "asm label after first use"
issue[1].

collseqwc, table_size, symb_table, extra are now initialized to appease
GCC -Werror=maybe-uninitialized false positive.

[1]: https://maskray.me/blog/2021-10-10-when-can-glibc-be-built-with-clang#asm-label-after-first-use
---
 posix/regcomp.c | 514 +++++++++++++++++++++++++-----------------------
 1 file changed, 266 insertions(+), 248 deletions(-)

Comments

Paul Eggert Oct. 27, 2021, 9:35 a.m. UTC | #1
On 10/26/21 22:29, Fangrui Song wrote:
> collseqwc, table_size, symb_table, extra are now initialized to appease
> GCC -Werror=maybe-uninitialized false positive.

Are the diagnostics really false positives? As I understand it, the 
modified code would have undefined behavior if these variables were not 
initialized.

Can you comment on the efficiency of the modified code compared to the 
original, when compiled with recent GCC?
Joseph Myers Oct. 27, 2021, 4:10 p.m. UTC | #2
On Wed, 27 Oct 2021, Paul Eggert wrote:

> On 10/26/21 22:29, Fangrui Song wrote:
> > collseqwc, table_size, symb_table, extra are now initialized to appease
> > GCC -Werror=maybe-uninitialized false positive.
> 
> Are the diagnostics really false positives? As I understand it, the modified
> code would have undefined behavior if these variables were not initialized.

And if they are false positives, the glibc convention in such cases is 
generally to use the DIAG_* macros from libc-diag.h to suppress the 
warnings, with appropriate comments explaining why they are false 
positives, rather than adding an initialization that should not be 
necessary.  (Sometimes another approach is more appropriate to warning 
suppression - for example, a call to __builtin_unreachable that enables 
the compiler to see that certain paths through the code cannot actually 
occur.)
Fāng-ruì Sòng Oct. 28, 2021, 4:12 a.m. UTC | #3
On Wed, Oct 27, 2021 at 9:11 AM Joseph Myers <joseph@codesourcery.com> wrote:
>
> On Wed, 27 Oct 2021, Paul Eggert wrote:
>
> > On 10/26/21 22:29, Fangrui Song wrote:
> > > collseqwc, table_size, symb_table, extra are now initialized to appease
> > > GCC -Werror=maybe-uninitialized false positive.
> >
> > Are the diagnostics really false positives? As I understand it, the modified
> > code would have undefined behavior if these variables were not initialized.

Are sorry, the diagnostics are legitimate.

collseqwc, table_size, symb_table, extra are now passed as arguments
so they need to be initialized.
Previously they were accessed from `auto inline` nested functions.
With the delayed access GCC did not warn.

In file included from regex.c:74:
regcomp.c: In function ‘parse_expression’:
regcomp.c:3102:11: error: ‘table_size’ may be used uninitialized in
this function [-Werror=maybe-uninitialized]
 3102 |   int32_t table_size;
      |           ^~~~~~~~~~

I don't know how to compare performance difference but the new version
is just 16 bytes larger in .text on x86-64 (gcc -O2).

% ~/projects/bloaty/Release/bloaty /tmp/c/regex1.o -- /tmp/c/regex.o
    FILE SIZE        VM SIZE
 --------------  --------------
  +118%     +20  [ = ]       0    [Unmapped]
  +0.0%     +16  +0.0%     +16    .text
  -5.6%      -4  [ = ]       0    .data
  +0.0%     +32  +0.0%     +16    TOTAL

The code move apparently triggers large assembly move in GCC's code generation.
The 16 byte .text increase is likely due to the now needed zero
initialization for the four variables.

% diff -u =(llvm-objdump -d --symbolize-operands -M intel
--no-leading-addr --no-show-raw-insn /tmp/c/regex.o) =(llvm-objdump -d
--symbolize-operands -M intel --no-leading-addr --no-show-raw-insn
/tmp/c/regex1.o)
...
@@ -13236,7 +13236,11 @@
                        mov     r15d, ebx
                        test    ebx, ebx
                        jne      <L33>
-<L102>:
+                       mov     qword ptr [rsp + 96], 0
+                       mov     qword ptr [rsp + 120], 0
+                       mov     dword ptr [rsp + 116], 0
+                       mov     qword ptr [rsp + 88], 0
+<L94>:
                        mov     esi, 1
                        mov     edi, 32
                        call     <L34>

> And if they are false positives, the glibc convention in such cases is
> generally to use the DIAG_* macros from libc-diag.h to suppress the
> warnings, with appropriate comments explaining why they are false
> positives, rather than adding an initialization that should not be
> necessary.  (Sometimes another approach is more appropriate to warning
> suppression - for example, a call to __builtin_unreachable that enables
> the compiler to see that certain paths through the code cannot actually
> occur.)

Thanks for the tip.

> --
> Joseph S. Myers
> joseph@codesourcery.com
Carlos O'Donell Nov. 2, 2021, 2:40 a.m. UTC | #4
On 10/27/21 01:29, Fangrui Song via Libc-alpha wrote:
> If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
> this patch removes the last `auto inline` usage and makes the file
> buildable by Clang once we can resolve the "asm label after first use"
> issue[1].
> 
> collseqwc, table_size, symb_table, extra are now initialized to appease
> GCC -Werror=maybe-uninitialized false positive.

High-level: Overall the refactor looks correct and removes a nested function which we
have as a community agreed is OK.

Implementation: The refactor moves four functions out of nested scope and converts
them into static always_inline functions. Which overall is correct.

Details: The commit message needs an update and you answered Paul's performance question.

Please repost a v2 with an updated commit message indicating that the variables are now
initialized to zero because that is required by the refactoring (Paul's comment).

You also answered Paul's request to comment on the performance of the new code, and as
far as I can tell it just adds 4 stores in the hot path. This is nothing compared to
the amount of work that regcomp has to do looking up the collation weights, or other
functions IMO.

I'll review the v2 once posted.

Review inline below.
 
> [1]: https://maskray.me/blog/2021-10-10-when-can-glibc-be-built-with-clang#asm-label-after-first-use
> ---
>  posix/regcomp.c | 514 +++++++++++++++++++++++++-----------------------
>  1 file changed, 266 insertions(+), 248 deletions(-)
> 
> diff --git a/posix/regcomp.c b/posix/regcomp.c
> index 887e5b5068..e2b5c2807a 100644
> --- a/posix/regcomp.c
> +++ b/posix/regcomp.c
> @@ -2831,6 +2831,263 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
>  }
>  #endif /* not _LIBC */
>  
> +#ifdef _LIBC
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> +   Seek the collating symbol entry corresponding to NAME.
> +   Return the index of the symbol in the SYMB_TABLE,
> +   or -1 if not found.  */
> +
> +static inline int32_t
> +__attribute__ ((always_inline))

OK. Static inline and always inline.

> +seek_collating_symbol_entry (const unsigned char *name, size_t name_len,

OK. Add seek_collating_symbol_entry (1/4) matches original.

> +			     const int32_t *symb_table, int32_t table_size,
> +			     const unsigned char *extra)
> +{
> +  int32_t elem;
> +
> +  for (elem = 0; elem < table_size; elem++)
> +    if (symb_table[2 * elem] != 0)
> +      {
> +	int32_t idx = symb_table[2 * elem + 1];
> +	/* Skip the name of collating element name.  */
> +	idx += 1 + extra[idx];
> +	if (/* Compare the length of the name.  */
> +	    name_len == extra[idx]
> +	    /* Compare the name.  */
> +	    && memcmp (name, &extra[idx + 1], name_len) == 0)
> +	  /* Yep, this is the entry.  */
> +	  return elem;
> +      }
> +  return -1;
> +}
> +
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> +   Look up the collation sequence value of BR_ELEM.
> +   Return the value if succeeded, UINT_MAX otherwise.  */
> +
> +static inline unsigned int
> +__attribute__ ((always_inline))

OK. Static inline and always inline.

> +lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,

OK. Add lookup_collation_sequence_value (2/4) matches original.

> +				 const unsigned char *collseqmb,
> +				 const char *collseqwc, int32_t table_size,
> +				 const int32_t *symb_table,
> +				 const unsigned char *extra)
> +{
> +  if (br_elem->type == SB_CHAR)
> +    {
> +      /* if (MB_CUR_MAX == 1) */
> +      if (nrules == 0)
> +	return collseqmb[br_elem->opr.ch];
> +      else
> +	{
> +	  wint_t wc = __btowc (br_elem->opr.ch);
> +	  return __collseq_table_lookup (collseqwc, wc);
> +	}
> +    }
> +  else if (br_elem->type == MB_CHAR)
> +    {
> +      if (nrules != 0)
> +	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
> +    }
> +  else if (br_elem->type == COLL_SYM)
> +    {
> +      size_t sym_name_len = strlen ((char *) br_elem->opr.name);
> +      if (nrules != 0)
> +	{
> +	  int32_t elem, idx;
> +	  elem = seek_collating_symbol_entry (br_elem->opr.name,
> +					      sym_name_len,
> +					      symb_table, table_size,
> +					      extra);
> +	  if (elem != -1)
> +	    {
> +	      /* We found the entry.  */
> +	      idx = symb_table[2 * elem + 1];
> +	      /* Skip the name of collating element name.  */
> +	      idx += 1 + extra[idx];
> +	      /* Skip the byte sequence of the collating element.  */
> +	      idx += 1 + extra[idx];
> +	      /* Adjust for the alignment.  */
> +	      idx = (idx + 3) & ~3;
> +	      /* Skip the multibyte collation sequence value.  */
> +	      idx += sizeof (unsigned int);
> +	      /* Skip the wide char sequence of the collating element.  */
> +	      idx += sizeof (unsigned int) *
> +		(1 + *(unsigned int *) (extra + idx));
> +	      /* Return the collation sequence value.  */
> +	      return *(unsigned int *) (extra + idx);
> +	    }
> +	  else if (sym_name_len == 1)
> +	    {
> +	      /* No valid character.  Match it as a single byte
> +		 character.  */
> +	      return collseqmb[br_elem->opr.name[0]];
> +	    }
> +	}
> +      else if (sym_name_len == 1)
> +	return collseqmb[br_elem->opr.name[0]];
> +    }
> +  return UINT_MAX;
> +}
> +
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> +   Build the range expression which starts from START_ELEM, and ends
> +   at END_ELEM.  The result are written to MBCSET and SBCSET.
> +   RANGE_ALLOC is the allocated size of mbcset->range_starts, and
> +   mbcset->range_ends, is a pointer argument since we may
> +   update it.  */
> +
> +static inline reg_errcode_t
> +__attribute__ ((always_inline))

OK. Static inline and always inline.

> +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,

OK. Add build_range_exp (3/4) matches original.

> +		 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
> +		 re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
> +		 const unsigned char *collseqmb, const char *collseqwc,
> +		 int32_t table_size, const int32_t *symb_table,
> +		 const unsigned char *extra)
> +{
> +  unsigned int ch;
> +  uint32_t start_collseq;
> +  uint32_t end_collseq;
> +
> +  /* Equivalence Classes and Character Classes can't be a range
> +     start/end.  */
> +  if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
> +                        || start_elem->type == CHAR_CLASS
> +                        || end_elem->type == EQUIV_CLASS
> +                        || end_elem->type == CHAR_CLASS))
> +    return REG_ERANGE;
> +
> +  /* FIXME: Implement rational ranges here, too.  */
> +  start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
> +						   table_size, symb_table, extra);
> +  end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
> +						 table_size, symb_table, extra);
> +  /* Check start/end collation sequence values.  */
> +  if (__glibc_unlikely (start_collseq == UINT_MAX
> +                        || end_collseq == UINT_MAX))
> +    return REG_ECOLLATE;
> +  if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
> +                        && start_collseq > end_collseq))
> +    return REG_ERANGE;
> +
> +  /* Got valid collation sequence values, add them as a new entry.
> +     However, if we have no collation elements, and the character set
> +     is single byte, the single byte character set that we
> +     build below suffices. */
> +  if (nrules > 0 || dfa->mb_cur_max > 1)
> +    {
> +      /* Check the space of the arrays.  */
> +      if (__glibc_unlikely (*range_alloc == mbcset->nranges))
> +	{
> +	  /* There is not enough space, need realloc.  */
> +	  uint32_t *new_array_start;
> +	  uint32_t *new_array_end;
> +	  int new_nranges;
> +
> +	  /* +1 in case of mbcset->nranges is 0.  */
> +	  new_nranges = 2 * mbcset->nranges + 1;
> +	  new_array_start = re_realloc (mbcset->range_starts, uint32_t,
> +					new_nranges);
> +	  new_array_end = re_realloc (mbcset->range_ends, uint32_t,
> +				      new_nranges);
> +
> +          if (__glibc_unlikely (new_array_start == NULL
> +                                || new_array_end == NULL))
> +	    return REG_ESPACE;
> +
> +	  mbcset->range_starts = new_array_start;
> +	  mbcset->range_ends = new_array_end;
> +	  *range_alloc = new_nranges;
> +	}
> +
> +      mbcset->range_starts[mbcset->nranges] = start_collseq;
> +      mbcset->range_ends[mbcset->nranges++] = end_collseq;
> +    }
> +
> +  /* Build the table for single byte characters.  */
> +  for (ch = 0; ch < SBC_MAX; ch++)
> +    {
> +      uint32_t ch_collseq;
> +      /* if (MB_CUR_MAX == 1) */
> +      if (nrules == 0)
> +	ch_collseq = collseqmb[ch];
> +      else
> +	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
> +      if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
> +	bitset_set (sbcset, ch);
> +    }
> +  return REG_NOERROR;
> +}
> +
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> +   Build the collating element which is represented by NAME.
> +   The result are written to MBCSET and SBCSET.
> +   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
> +   pointer argument since we may update it.  */
> +
> +static inline reg_errcode_t
> +__attribute__ ((always_inline))

OK. Static inline and always inline.

> +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,

OK. Add build_collating_symbol (4/4), matches original.

> +			int *coll_sym_alloc, const unsigned char *name,
> +			uint32_t nrules, int32_t table_size,
> +			const int32_t *symb_table, const unsigned char *extra)
> +{
> +  int32_t elem, idx;
> +  size_t name_len = strlen ((const char *) name);
> +  if (nrules != 0)
> +    {
> +      elem = seek_collating_symbol_entry (name, name_len, symb_table,
> +					  table_size, extra);
> +      if (elem != -1)
> +	{
> +	  /* We found the entry.  */
> +	  idx = symb_table[2 * elem + 1];
> +	  /* Skip the name of collating element name.  */
> +	  idx += 1 + extra[idx];
> +	}
> +      else if (name_len == 1)
> +	{
> +	  /* No valid character, treat it as a normal
> +	     character.  */
> +	  bitset_set (sbcset, name[0]);
> +	  return REG_NOERROR;
> +	}
> +      else
> +	return REG_ECOLLATE;
> +
> +      /* Got valid collation sequence, add it as a new entry.  */
> +      /* Check the space of the arrays.  */
> +      if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
> +	{
> +	  /* Not enough, realloc it.  */
> +	  /* +1 in case of mbcset->ncoll_syms is 0.  */
> +	  int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
> +	  /* Use realloc since mbcset->coll_syms is NULL
> +	     if *alloc == 0.  */
> +	  int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
> +					       new_coll_sym_alloc);
> +          if (__glibc_unlikely (new_coll_syms == NULL))
> +	    return REG_ESPACE;
> +	  mbcset->coll_syms = new_coll_syms;
> +	  *coll_sym_alloc = new_coll_sym_alloc;
> +	}
> +      mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
> +      return REG_NOERROR;
> +    }
> +  else
> +    {
> +      if (__glibc_unlikely (name_len != 1))
> +	return REG_ECOLLATE;
> +      else
> +	{
> +	  bitset_set (sbcset, name[0]);
> +	  return REG_NOERROR;
> +	}
> +    }
> +}
> +#endif /* _LIBC */
> +
>  /* This function parse bracket expression like "[abc]", "[a-c]",
>     "[[.a-a.]]" etc.  */
>  
> @@ -2840,253 +3097,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>  {
>  #ifdef _LIBC
>    const unsigned char *collseqmb;
> -  const char *collseqwc;
> +  const char *collseqwc = NULL;

OK. Set collseqwc to NULL.

>    uint32_t nrules;
> -  int32_t table_size;
> -  const int32_t *symb_table;
> -  const unsigned char *extra;
> -
> -  /* Local function for parse_bracket_exp used in _LIBC environment.
> -     Seek the collating symbol entry corresponding to NAME.
> -     Return the index of the symbol in the SYMB_TABLE,
> -     or -1 if not found.  */
> -
> -  auto inline int32_t
> -  __attribute__ ((always_inline))
> -  seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
> -    {
> -      int32_t elem;
> -
> -      for (elem = 0; elem < table_size; elem++)
> -	if (symb_table[2 * elem] != 0)
> -	  {
> -	    int32_t idx = symb_table[2 * elem + 1];
> -	    /* Skip the name of collating element name.  */
> -	    idx += 1 + extra[idx];
> -	    if (/* Compare the length of the name.  */
> -		name_len == extra[idx]
> -		/* Compare the name.  */
> -		&& memcmp (name, &extra[idx + 1], name_len) == 0)
> -	      /* Yep, this is the entry.  */
> -	      return elem;
> -	  }
> -      return -1;
> -    }
> -
> -  /* Local function for parse_bracket_exp used in _LIBC environment.
> -     Look up the collation sequence value of BR_ELEM.
> -     Return the value if succeeded, UINT_MAX otherwise.  */
> -
> -  auto inline unsigned int
> -  __attribute__ ((always_inline))
> -  lookup_collation_sequence_value (bracket_elem_t *br_elem)
> -    {
> -      if (br_elem->type == SB_CHAR)
> -	{
> -	  /*
> -	  if (MB_CUR_MAX == 1)
> -	  */
> -	  if (nrules == 0)
> -	    return collseqmb[br_elem->opr.ch];
> -	  else
> -	    {
> -	      wint_t wc = __btowc (br_elem->opr.ch);
> -	      return __collseq_table_lookup (collseqwc, wc);
> -	    }
> -	}
> -      else if (br_elem->type == MB_CHAR)
> -	{
> -	  if (nrules != 0)
> -	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
> -	}
> -      else if (br_elem->type == COLL_SYM)
> -	{
> -	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
> -	  if (nrules != 0)
> -	    {
> -	      int32_t elem, idx;
> -	      elem = seek_collating_symbol_entry (br_elem->opr.name,
> -						  sym_name_len);
> -	      if (elem != -1)
> -		{
> -		  /* We found the entry.  */
> -		  idx = symb_table[2 * elem + 1];
> -		  /* Skip the name of collating element name.  */
> -		  idx += 1 + extra[idx];
> -		  /* Skip the byte sequence of the collating element.  */
> -		  idx += 1 + extra[idx];
> -		  /* Adjust for the alignment.  */
> -		  idx = (idx + 3) & ~3;
> -		  /* Skip the multibyte collation sequence value.  */
> -		  idx += sizeof (unsigned int);
> -		  /* Skip the wide char sequence of the collating element.  */
> -		  idx += sizeof (unsigned int) *
> -		    (1 + *(unsigned int *) (extra + idx));
> -		  /* Return the collation sequence value.  */
> -		  return *(unsigned int *) (extra + idx);
> -		}
> -	      else if (sym_name_len == 1)
> -		{
> -		  /* No valid character.  Match it as a single byte
> -		     character.  */
> -		  return collseqmb[br_elem->opr.name[0]];
> -		}
> -	    }
> -	  else if (sym_name_len == 1)
> -	    return collseqmb[br_elem->opr.name[0]];
> -	}
> -      return UINT_MAX;
> -    }
> -
> -  /* Local function for parse_bracket_exp used in _LIBC environment.
> -     Build the range expression which starts from START_ELEM, and ends
> -     at END_ELEM.  The result are written to MBCSET and SBCSET.
> -     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
> -     mbcset->range_ends, is a pointer argument since we may
> -     update it.  */
> -
> -  auto inline reg_errcode_t
> -  __attribute__ ((always_inline))
> -  build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
> -		   bracket_elem_t *start_elem, bracket_elem_t *end_elem)
> -    {
> -      unsigned int ch;
> -      uint32_t start_collseq;
> -      uint32_t end_collseq;
> -
> -      /* Equivalence Classes and Character Classes can't be a range
> -	 start/end.  */
> -      if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
> -			    || start_elem->type == CHAR_CLASS
> -			    || end_elem->type == EQUIV_CLASS
> -			    || end_elem->type == CHAR_CLASS))
> -	return REG_ERANGE;
> -
> -      /* FIXME: Implement rational ranges here, too.  */
> -      start_collseq = lookup_collation_sequence_value (start_elem);
> -      end_collseq = lookup_collation_sequence_value (end_elem);
> -      /* Check start/end collation sequence values.  */
> -      if (__glibc_unlikely (start_collseq == UINT_MAX
> -			    || end_collseq == UINT_MAX))
> -	return REG_ECOLLATE;
> -      if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
> -			    && start_collseq > end_collseq))
> -	return REG_ERANGE;
> -
> -      /* Got valid collation sequence values, add them as a new entry.
> -	 However, if we have no collation elements, and the character set
> -	 is single byte, the single byte character set that we
> -	 build below suffices. */
> -      if (nrules > 0 || dfa->mb_cur_max > 1)
> -	{
> -	  /* Check the space of the arrays.  */
> -	  if (__glibc_unlikely (*range_alloc == mbcset->nranges))
> -	    {
> -	      /* There is not enough space, need realloc.  */
> -	      uint32_t *new_array_start;
> -	      uint32_t *new_array_end;
> -	      Idx new_nranges;
> -
> -	      /* +1 in case of mbcset->nranges is 0.  */
> -	      new_nranges = 2 * mbcset->nranges + 1;
> -	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
> -					    new_nranges);
> -	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
> -					  new_nranges);
> -
> -	      if (__glibc_unlikely (new_array_start == NULL
> -				    || new_array_end == NULL))
> -		return REG_ESPACE;
> -
> -	      mbcset->range_starts = new_array_start;
> -	      mbcset->range_ends = new_array_end;
> -	      *range_alloc = new_nranges;
> -	    }
> -
> -	  mbcset->range_starts[mbcset->nranges] = start_collseq;
> -	  mbcset->range_ends[mbcset->nranges++] = end_collseq;
> -	}
> -
> -      /* Build the table for single byte characters.  */
> -      for (ch = 0; ch < SBC_MAX; ch++)
> -	{
> -	  uint32_t ch_collseq;
> -	  /*
> -	  if (MB_CUR_MAX == 1)
> -	  */
> -	  if (nrules == 0)
> -	    ch_collseq = collseqmb[ch];
> -	  else
> -	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
> -	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
> -	    bitset_set (sbcset, ch);
> -	}
> -      return REG_NOERROR;
> -    }
> -
> -  /* Local function for parse_bracket_exp used in _LIBC environment.
> -     Build the collating element which is represented by NAME.
> -     The result are written to MBCSET and SBCSET.
> -     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
> -     pointer argument since we may update it.  */
> -
> -  auto inline reg_errcode_t
> -  __attribute__ ((always_inline))
> -  build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
> -			  Idx *coll_sym_alloc, const unsigned char *name)
> -    {
> -      int32_t elem, idx;
> -      size_t name_len = strlen ((const char *) name);
> -      if (nrules != 0)
> -	{
> -	  elem = seek_collating_symbol_entry (name, name_len);
> -	  if (elem != -1)
> -	    {
> -	      /* We found the entry.  */
> -	      idx = symb_table[2 * elem + 1];
> -	      /* Skip the name of collating element name.  */
> -	      idx += 1 + extra[idx];
> -	    }
> -	  else if (name_len == 1)
> -	    {
> -	      /* No valid character, treat it as a normal
> -		 character.  */
> -	      bitset_set (sbcset, name[0]);
> -	      return REG_NOERROR;
> -	    }
> -	  else
> -	    return REG_ECOLLATE;
> -
> -	  /* Got valid collation sequence, add it as a new entry.  */
> -	  /* Check the space of the arrays.  */
> -	  if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
> -	    {
> -	      /* Not enough, realloc it.  */
> -	      /* +1 in case of mbcset->ncoll_syms is 0.  */
> -	      Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
> -	      /* Use realloc since mbcset->coll_syms is NULL
> -		 if *alloc == 0.  */
> -	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
> -						   new_coll_sym_alloc);
> -	      if (__glibc_unlikely (new_coll_syms == NULL))
> -		return REG_ESPACE;
> -	      mbcset->coll_syms = new_coll_syms;
> -	      *coll_sym_alloc = new_coll_sym_alloc;
> -	    }
> -	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
> -	  return REG_NOERROR;
> -	}
> -      else
> -	{
> -	  if (__glibc_unlikely (name_len != 1))
> -	    return REG_ECOLLATE;
> -	  else
> -	    {
> -	      bitset_set (sbcset, name[0]);
> -	      return REG_NOERROR;
> -	    }
> -	}
> -    }

OK. Remove 4 nested functions.

> +  int32_t table_size = 0;
> +  const int32_t *symb_table = NULL;
> +  const unsigned char *extra = NULL;

OK. Initialize table_size, symb_table and extra.

>  #endif
>  
>    re_token_t br_token;
> @@ -3230,7 +3245,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>  
>  #ifdef _LIBC
>  	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
> -				  &start_elem, &end_elem);
> +				  &start_elem, &end_elem,
> +				  dfa, syntax, nrules, collseqmb, collseqwc,
> +				  table_size, symb_table, extra);

OK. Pass new parameters required for the function to operate.

>  #else
>  # ifdef RE_ENABLE_I18N
>  	  *err = build_range_exp (syntax, sbcset,
> @@ -3283,7 +3300,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>  #ifdef RE_ENABLE_I18N
>  					     mbcset, &coll_sym_alloc,
>  #endif /* RE_ENABLE_I18N */
> -					     start_elem.opr.name);
> +					     start_elem.opr.name,
> +					     nrules, table_size, symb_table, extra);

OK. Likewise.

>  	      if (__glibc_unlikely (*err != REG_NOERROR))
>  		goto parse_bracket_exp_free_return;
>  	      break;
>
diff mbox series

Patch

diff --git a/posix/regcomp.c b/posix/regcomp.c
index 887e5b5068..e2b5c2807a 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -2831,6 +2831,263 @@  build_collating_symbol (bitset_t sbcset, const unsigned char *name)
 }
 #endif /* not _LIBC */
 
+#ifdef _LIBC
+/* Local function for parse_bracket_exp used in _LIBC environment.
+   Seek the collating symbol entry corresponding to NAME.
+   Return the index of the symbol in the SYMB_TABLE,
+   or -1 if not found.  */
+
+static inline int32_t
+__attribute__ ((always_inline))
+seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
+			     const int32_t *symb_table, int32_t table_size,
+			     const unsigned char *extra)
+{
+  int32_t elem;
+
+  for (elem = 0; elem < table_size; elem++)
+    if (symb_table[2 * elem] != 0)
+      {
+	int32_t idx = symb_table[2 * elem + 1];
+	/* Skip the name of collating element name.  */
+	idx += 1 + extra[idx];
+	if (/* Compare the length of the name.  */
+	    name_len == extra[idx]
+	    /* Compare the name.  */
+	    && memcmp (name, &extra[idx + 1], name_len) == 0)
+	  /* Yep, this is the entry.  */
+	  return elem;
+      }
+  return -1;
+}
+
+/* Local function for parse_bracket_exp used in _LIBC environment.
+   Look up the collation sequence value of BR_ELEM.
+   Return the value if succeeded, UINT_MAX otherwise.  */
+
+static inline unsigned int
+__attribute__ ((always_inline))
+lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
+				 const unsigned char *collseqmb,
+				 const char *collseqwc, int32_t table_size,
+				 const int32_t *symb_table,
+				 const unsigned char *extra)
+{
+  if (br_elem->type == SB_CHAR)
+    {
+      /* if (MB_CUR_MAX == 1) */
+      if (nrules == 0)
+	return collseqmb[br_elem->opr.ch];
+      else
+	{
+	  wint_t wc = __btowc (br_elem->opr.ch);
+	  return __collseq_table_lookup (collseqwc, wc);
+	}
+    }
+  else if (br_elem->type == MB_CHAR)
+    {
+      if (nrules != 0)
+	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+    }
+  else if (br_elem->type == COLL_SYM)
+    {
+      size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+      if (nrules != 0)
+	{
+	  int32_t elem, idx;
+	  elem = seek_collating_symbol_entry (br_elem->opr.name,
+					      sym_name_len,
+					      symb_table, table_size,
+					      extra);
+	  if (elem != -1)
+	    {
+	      /* We found the entry.  */
+	      idx = symb_table[2 * elem + 1];
+	      /* Skip the name of collating element name.  */
+	      idx += 1 + extra[idx];
+	      /* Skip the byte sequence of the collating element.  */
+	      idx += 1 + extra[idx];
+	      /* Adjust for the alignment.  */
+	      idx = (idx + 3) & ~3;
+	      /* Skip the multibyte collation sequence value.  */
+	      idx += sizeof (unsigned int);
+	      /* Skip the wide char sequence of the collating element.  */
+	      idx += sizeof (unsigned int) *
+		(1 + *(unsigned int *) (extra + idx));
+	      /* Return the collation sequence value.  */
+	      return *(unsigned int *) (extra + idx);
+	    }
+	  else if (sym_name_len == 1)
+	    {
+	      /* No valid character.  Match it as a single byte
+		 character.  */
+	      return collseqmb[br_elem->opr.name[0]];
+	    }
+	}
+      else if (sym_name_len == 1)
+	return collseqmb[br_elem->opr.name[0]];
+    }
+  return UINT_MAX;
+}
+
+/* Local function for parse_bracket_exp used in _LIBC environment.
+   Build the range expression which starts from START_ELEM, and ends
+   at END_ELEM.  The result are written to MBCSET and SBCSET.
+   RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+   mbcset->range_ends, is a pointer argument since we may
+   update it.  */
+
+static inline reg_errcode_t
+__attribute__ ((always_inline))
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+		 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
+		 re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
+		 const unsigned char *collseqmb, const char *collseqwc,
+		 int32_t table_size, const int32_t *symb_table,
+		 const unsigned char *extra)
+{
+  unsigned int ch;
+  uint32_t start_collseq;
+  uint32_t end_collseq;
+
+  /* Equivalence Classes and Character Classes can't be a range
+     start/end.  */
+  if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
+                        || start_elem->type == CHAR_CLASS
+                        || end_elem->type == EQUIV_CLASS
+                        || end_elem->type == CHAR_CLASS))
+    return REG_ERANGE;
+
+  /* FIXME: Implement rational ranges here, too.  */
+  start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
+						   table_size, symb_table, extra);
+  end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
+						 table_size, symb_table, extra);
+  /* Check start/end collation sequence values.  */
+  if (__glibc_unlikely (start_collseq == UINT_MAX
+                        || end_collseq == UINT_MAX))
+    return REG_ECOLLATE;
+  if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
+                        && start_collseq > end_collseq))
+    return REG_ERANGE;
+
+  /* Got valid collation sequence values, add them as a new entry.
+     However, if we have no collation elements, and the character set
+     is single byte, the single byte character set that we
+     build below suffices. */
+  if (nrules > 0 || dfa->mb_cur_max > 1)
+    {
+      /* Check the space of the arrays.  */
+      if (__glibc_unlikely (*range_alloc == mbcset->nranges))
+	{
+	  /* There is not enough space, need realloc.  */
+	  uint32_t *new_array_start;
+	  uint32_t *new_array_end;
+	  int new_nranges;
+
+	  /* +1 in case of mbcset->nranges is 0.  */
+	  new_nranges = 2 * mbcset->nranges + 1;
+	  new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+					new_nranges);
+	  new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+				      new_nranges);
+
+          if (__glibc_unlikely (new_array_start == NULL
+                                || new_array_end == NULL))
+	    return REG_ESPACE;
+
+	  mbcset->range_starts = new_array_start;
+	  mbcset->range_ends = new_array_end;
+	  *range_alloc = new_nranges;
+	}
+
+      mbcset->range_starts[mbcset->nranges] = start_collseq;
+      mbcset->range_ends[mbcset->nranges++] = end_collseq;
+    }
+
+  /* Build the table for single byte characters.  */
+  for (ch = 0; ch < SBC_MAX; ch++)
+    {
+      uint32_t ch_collseq;
+      /* if (MB_CUR_MAX == 1) */
+      if (nrules == 0)
+	ch_collseq = collseqmb[ch];
+      else
+	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+      if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+	bitset_set (sbcset, ch);
+    }
+  return REG_NOERROR;
+}
+
+/* Local function for parse_bracket_exp used in _LIBC environment.
+   Build the collating element which is represented by NAME.
+   The result are written to MBCSET and SBCSET.
+   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+   pointer argument since we may update it.  */
+
+static inline reg_errcode_t
+__attribute__ ((always_inline))
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+			int *coll_sym_alloc, const unsigned char *name,
+			uint32_t nrules, int32_t table_size,
+			const int32_t *symb_table, const unsigned char *extra)
+{
+  int32_t elem, idx;
+  size_t name_len = strlen ((const char *) name);
+  if (nrules != 0)
+    {
+      elem = seek_collating_symbol_entry (name, name_len, symb_table,
+					  table_size, extra);
+      if (elem != -1)
+	{
+	  /* We found the entry.  */
+	  idx = symb_table[2 * elem + 1];
+	  /* Skip the name of collating element name.  */
+	  idx += 1 + extra[idx];
+	}
+      else if (name_len == 1)
+	{
+	  /* No valid character, treat it as a normal
+	     character.  */
+	  bitset_set (sbcset, name[0]);
+	  return REG_NOERROR;
+	}
+      else
+	return REG_ECOLLATE;
+
+      /* Got valid collation sequence, add it as a new entry.  */
+      /* Check the space of the arrays.  */
+      if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
+	{
+	  /* Not enough, realloc it.  */
+	  /* +1 in case of mbcset->ncoll_syms is 0.  */
+	  int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+	  /* Use realloc since mbcset->coll_syms is NULL
+	     if *alloc == 0.  */
+	  int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+					       new_coll_sym_alloc);
+          if (__glibc_unlikely (new_coll_syms == NULL))
+	    return REG_ESPACE;
+	  mbcset->coll_syms = new_coll_syms;
+	  *coll_sym_alloc = new_coll_sym_alloc;
+	}
+      mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+      return REG_NOERROR;
+    }
+  else
+    {
+      if (__glibc_unlikely (name_len != 1))
+	return REG_ECOLLATE;
+      else
+	{
+	  bitset_set (sbcset, name[0]);
+	  return REG_NOERROR;
+	}
+    }
+}
+#endif /* _LIBC */
+
 /* This function parse bracket expression like "[abc]", "[a-c]",
    "[[.a-a.]]" etc.  */
 
@@ -2840,253 +3097,11 @@  parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 {
 #ifdef _LIBC
   const unsigned char *collseqmb;
-  const char *collseqwc;
+  const char *collseqwc = NULL;
   uint32_t nrules;
-  int32_t table_size;
-  const int32_t *symb_table;
-  const unsigned char *extra;
-
-  /* Local function for parse_bracket_exp used in _LIBC environment.
-     Seek the collating symbol entry corresponding to NAME.
-     Return the index of the symbol in the SYMB_TABLE,
-     or -1 if not found.  */
-
-  auto inline int32_t
-  __attribute__ ((always_inline))
-  seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
-    {
-      int32_t elem;
-
-      for (elem = 0; elem < table_size; elem++)
-	if (symb_table[2 * elem] != 0)
-	  {
-	    int32_t idx = symb_table[2 * elem + 1];
-	    /* Skip the name of collating element name.  */
-	    idx += 1 + extra[idx];
-	    if (/* Compare the length of the name.  */
-		name_len == extra[idx]
-		/* Compare the name.  */
-		&& memcmp (name, &extra[idx + 1], name_len) == 0)
-	      /* Yep, this is the entry.  */
-	      return elem;
-	  }
-      return -1;
-    }
-
-  /* Local function for parse_bracket_exp used in _LIBC environment.
-     Look up the collation sequence value of BR_ELEM.
-     Return the value if succeeded, UINT_MAX otherwise.  */
-
-  auto inline unsigned int
-  __attribute__ ((always_inline))
-  lookup_collation_sequence_value (bracket_elem_t *br_elem)
-    {
-      if (br_elem->type == SB_CHAR)
-	{
-	  /*
-	  if (MB_CUR_MAX == 1)
-	  */
-	  if (nrules == 0)
-	    return collseqmb[br_elem->opr.ch];
-	  else
-	    {
-	      wint_t wc = __btowc (br_elem->opr.ch);
-	      return __collseq_table_lookup (collseqwc, wc);
-	    }
-	}
-      else if (br_elem->type == MB_CHAR)
-	{
-	  if (nrules != 0)
-	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
-	}
-      else if (br_elem->type == COLL_SYM)
-	{
-	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
-	  if (nrules != 0)
-	    {
-	      int32_t elem, idx;
-	      elem = seek_collating_symbol_entry (br_elem->opr.name,
-						  sym_name_len);
-	      if (elem != -1)
-		{
-		  /* We found the entry.  */
-		  idx = symb_table[2 * elem + 1];
-		  /* Skip the name of collating element name.  */
-		  idx += 1 + extra[idx];
-		  /* Skip the byte sequence of the collating element.  */
-		  idx += 1 + extra[idx];
-		  /* Adjust for the alignment.  */
-		  idx = (idx + 3) & ~3;
-		  /* Skip the multibyte collation sequence value.  */
-		  idx += sizeof (unsigned int);
-		  /* Skip the wide char sequence of the collating element.  */
-		  idx += sizeof (unsigned int) *
-		    (1 + *(unsigned int *) (extra + idx));
-		  /* Return the collation sequence value.  */
-		  return *(unsigned int *) (extra + idx);
-		}
-	      else if (sym_name_len == 1)
-		{
-		  /* No valid character.  Match it as a single byte
-		     character.  */
-		  return collseqmb[br_elem->opr.name[0]];
-		}
-	    }
-	  else if (sym_name_len == 1)
-	    return collseqmb[br_elem->opr.name[0]];
-	}
-      return UINT_MAX;
-    }
-
-  /* Local function for parse_bracket_exp used in _LIBC environment.
-     Build the range expression which starts from START_ELEM, and ends
-     at END_ELEM.  The result are written to MBCSET and SBCSET.
-     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
-     mbcset->range_ends, is a pointer argument since we may
-     update it.  */
-
-  auto inline reg_errcode_t
-  __attribute__ ((always_inline))
-  build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
-		   bracket_elem_t *start_elem, bracket_elem_t *end_elem)
-    {
-      unsigned int ch;
-      uint32_t start_collseq;
-      uint32_t end_collseq;
-
-      /* Equivalence Classes and Character Classes can't be a range
-	 start/end.  */
-      if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
-			    || start_elem->type == CHAR_CLASS
-			    || end_elem->type == EQUIV_CLASS
-			    || end_elem->type == CHAR_CLASS))
-	return REG_ERANGE;
-
-      /* FIXME: Implement rational ranges here, too.  */
-      start_collseq = lookup_collation_sequence_value (start_elem);
-      end_collseq = lookup_collation_sequence_value (end_elem);
-      /* Check start/end collation sequence values.  */
-      if (__glibc_unlikely (start_collseq == UINT_MAX
-			    || end_collseq == UINT_MAX))
-	return REG_ECOLLATE;
-      if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
-			    && start_collseq > end_collseq))
-	return REG_ERANGE;
-
-      /* Got valid collation sequence values, add them as a new entry.
-	 However, if we have no collation elements, and the character set
-	 is single byte, the single byte character set that we
-	 build below suffices. */
-      if (nrules > 0 || dfa->mb_cur_max > 1)
-	{
-	  /* Check the space of the arrays.  */
-	  if (__glibc_unlikely (*range_alloc == mbcset->nranges))
-	    {
-	      /* There is not enough space, need realloc.  */
-	      uint32_t *new_array_start;
-	      uint32_t *new_array_end;
-	      Idx new_nranges;
-
-	      /* +1 in case of mbcset->nranges is 0.  */
-	      new_nranges = 2 * mbcset->nranges + 1;
-	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
-					    new_nranges);
-	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
-					  new_nranges);
-
-	      if (__glibc_unlikely (new_array_start == NULL
-				    || new_array_end == NULL))
-		return REG_ESPACE;
-
-	      mbcset->range_starts = new_array_start;
-	      mbcset->range_ends = new_array_end;
-	      *range_alloc = new_nranges;
-	    }
-
-	  mbcset->range_starts[mbcset->nranges] = start_collseq;
-	  mbcset->range_ends[mbcset->nranges++] = end_collseq;
-	}
-
-      /* Build the table for single byte characters.  */
-      for (ch = 0; ch < SBC_MAX; ch++)
-	{
-	  uint32_t ch_collseq;
-	  /*
-	  if (MB_CUR_MAX == 1)
-	  */
-	  if (nrules == 0)
-	    ch_collseq = collseqmb[ch];
-	  else
-	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
-	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
-	    bitset_set (sbcset, ch);
-	}
-      return REG_NOERROR;
-    }
-
-  /* Local function for parse_bracket_exp used in _LIBC environment.
-     Build the collating element which is represented by NAME.
-     The result are written to MBCSET and SBCSET.
-     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
-     pointer argument since we may update it.  */
-
-  auto inline reg_errcode_t
-  __attribute__ ((always_inline))
-  build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
-			  Idx *coll_sym_alloc, const unsigned char *name)
-    {
-      int32_t elem, idx;
-      size_t name_len = strlen ((const char *) name);
-      if (nrules != 0)
-	{
-	  elem = seek_collating_symbol_entry (name, name_len);
-	  if (elem != -1)
-	    {
-	      /* We found the entry.  */
-	      idx = symb_table[2 * elem + 1];
-	      /* Skip the name of collating element name.  */
-	      idx += 1 + extra[idx];
-	    }
-	  else if (name_len == 1)
-	    {
-	      /* No valid character, treat it as a normal
-		 character.  */
-	      bitset_set (sbcset, name[0]);
-	      return REG_NOERROR;
-	    }
-	  else
-	    return REG_ECOLLATE;
-
-	  /* Got valid collation sequence, add it as a new entry.  */
-	  /* Check the space of the arrays.  */
-	  if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
-	    {
-	      /* Not enough, realloc it.  */
-	      /* +1 in case of mbcset->ncoll_syms is 0.  */
-	      Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
-	      /* Use realloc since mbcset->coll_syms is NULL
-		 if *alloc == 0.  */
-	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
-						   new_coll_sym_alloc);
-	      if (__glibc_unlikely (new_coll_syms == NULL))
-		return REG_ESPACE;
-	      mbcset->coll_syms = new_coll_syms;
-	      *coll_sym_alloc = new_coll_sym_alloc;
-	    }
-	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
-	  return REG_NOERROR;
-	}
-      else
-	{
-	  if (__glibc_unlikely (name_len != 1))
-	    return REG_ECOLLATE;
-	  else
-	    {
-	      bitset_set (sbcset, name[0]);
-	      return REG_NOERROR;
-	    }
-	}
-    }
+  int32_t table_size = 0;
+  const int32_t *symb_table = NULL;
+  const unsigned char *extra = NULL;
 #endif
 
   re_token_t br_token;
@@ -3230,7 +3245,9 @@  parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 
 #ifdef _LIBC
 	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
-				  &start_elem, &end_elem);
+				  &start_elem, &end_elem,
+				  dfa, syntax, nrules, collseqmb, collseqwc,
+				  table_size, symb_table, extra);
 #else
 # ifdef RE_ENABLE_I18N
 	  *err = build_range_exp (syntax, sbcset,
@@ -3283,7 +3300,8 @@  parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 #ifdef RE_ENABLE_I18N
 					     mbcset, &coll_sym_alloc,
 #endif /* RE_ENABLE_I18N */
-					     start_elem.opr.name);
+					     start_elem.opr.name,
+					     nrules, table_size, symb_table, extra);
 	      if (__glibc_unlikely (*err != REG_NOERROR))
 		goto parse_bracket_exp_free_return;
 	      break;