[v2] regex: Unnest nested functions in regcomp.c

Message ID 20211102054537.1137247-1-maskray@google.com
State Committed
Headers
Series [v2] regex: Unnest nested functions in regcomp.c |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Fangrui Song Nov. 2, 2021, 5:45 a.m. UTC
  This refactor moves four functions out of a nested scope and converts
them into static always_inline functions. collseqwc, table_size,
symb_table, extra are now initialized to zero because they are passed as
function arguments.

On x86-64, .text is 16 byte larger likely due to the 4 stores.
This is nothing compared to the amount of work that regcomp has to do
looking up the collation weights, or other functions.

If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
this patch removes the last `auto inline` usage from glibc.
---
v1 -> v2:
* Update commit message
---
 posix/regcomp.c | 514 +++++++++++++++++++++++++-----------------------
 1 file changed, 266 insertions(+), 248 deletions(-)
  

Comments

Adhemerval Zanella Nov. 2, 2021, 12:15 p.m. UTC | #1
On 02/11/2021 02:45, Fangrui Song via Libc-alpha wrote:
> This refactor moves four functions out of a nested scope and converts
> them into static always_inline functions. collseqwc, table_size,
> symb_table, extra are now initialized to zero because they are passed as
> function arguments.
> 
> On x86-64, .text is 16 byte larger likely due to the 4 stores.
> This is nothing compared to the amount of work that regcomp has to do
> looking up the collation weights, or other functions.
> 
> If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
> this patch removes the last `auto inline` usage from glibc.

LGTM, thanks.  Carlos might want to ack as well.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

> ---
> v1 -> v2:
> * Update commit message
> ---
>  posix/regcomp.c | 514 +++++++++++++++++++++++++-----------------------
>  1 file changed, 266 insertions(+), 248 deletions(-)
> 
> diff --git a/posix/regcomp.c b/posix/regcomp.c
> index 887e5b5068..e2b5c2807a 100644
> --- a/posix/regcomp.c
> +++ b/posix/regcomp.c
> @@ -2831,6 +2831,263 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
>  }
>  #endif /* not _LIBC */
>  
> +#ifdef _LIBC
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> +   Seek the collating symbol entry corresponding to NAME.
> +   Return the index of the symbol in the SYMB_TABLE,
> +   or -1 if not found.  */
> +
> +static inline int32_t
> +__attribute__ ((always_inline))
> +seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
> +			     const int32_t *symb_table, int32_t table_size,
> +			     const unsigned char *extra)
> +{
> +  int32_t elem;
> +
> +  for (elem = 0; elem < table_size; elem++)
> +    if (symb_table[2 * elem] != 0)
> +      {
> +	int32_t idx = symb_table[2 * elem + 1];
> +	/* Skip the name of collating element name.  */
> +	idx += 1 + extra[idx];
> +	if (/* Compare the length of the name.  */
> +	    name_len == extra[idx]
> +	    /* Compare the name.  */
> +	    && memcmp (name, &extra[idx + 1], name_len) == 0)
> +	  /* Yep, this is the entry.  */
> +	  return elem;
> +      }
> +  return -1;
> +}
> +

Ok (I would let the compiler optimize it, but __attribute__ ((always_inline))
at least tries to mimic what nested function does).

> +/* Local function for parse_bracket_exp used in _LIBC environment.
> +   Look up the collation sequence value of BR_ELEM.
> +   Return the value if succeeded, UINT_MAX otherwise.  */
> +
> +static inline unsigned int
> +__attribute__ ((always_inline))
> +lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
> +				 const unsigned char *collseqmb,
> +				 const char *collseqwc, int32_t table_size,
> +				 const int32_t *symb_table,
> +				 const unsigned char *extra)
> +{
> +  if (br_elem->type == SB_CHAR)
> +    {
> +      /* if (MB_CUR_MAX == 1) */
> +      if (nrules == 0)
> +	return collseqmb[br_elem->opr.ch];
> +      else
> +	{
> +	  wint_t wc = __btowc (br_elem->opr.ch);
> +	  return __collseq_table_lookup (collseqwc, wc);
> +	}
> +    }
> +  else if (br_elem->type == MB_CHAR)
> +    {
> +      if (nrules != 0)
> +	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
> +    }
> +  else if (br_elem->type == COLL_SYM)
> +    {
> +      size_t sym_name_len = strlen ((char *) br_elem->opr.name);
> +      if (nrules != 0)
> +	{
> +	  int32_t elem, idx;
> +	  elem = seek_collating_symbol_entry (br_elem->opr.name,
> +					      sym_name_len,
> +					      symb_table, table_size,
> +					      extra);
> +	  if (elem != -1)
> +	    {
> +	      /* We found the entry.  */
> +	      idx = symb_table[2 * elem + 1];
> +	      /* Skip the name of collating element name.  */
> +	      idx += 1 + extra[idx];
> +	      /* Skip the byte sequence of the collating element.  */
> +	      idx += 1 + extra[idx];
> +	      /* Adjust for the alignment.  */
> +	      idx = (idx + 3) & ~3;
> +	      /* Skip the multibyte collation sequence value.  */
> +	      idx += sizeof (unsigned int);
> +	      /* Skip the wide char sequence of the collating element.  */
> +	      idx += sizeof (unsigned int) *
> +		(1 + *(unsigned int *) (extra + idx));
> +	      /* Return the collation sequence value.  */
> +	      return *(unsigned int *) (extra + idx);
> +	    }
> +	  else if (sym_name_len == 1)
> +	    {
> +	      /* No valid character.  Match it as a single byte
> +		 character.  */
> +	      return collseqmb[br_elem->opr.name[0]];
> +	    }
> +	}
> +      else if (sym_name_len == 1)
> +	return collseqmb[br_elem->opr.name[0]];
> +    }
> +  return UINT_MAX;
> +}
> +
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> +   Build the range expression which starts from START_ELEM, and ends
> +   at END_ELEM.  The result are written to MBCSET and SBCSET.
> +   RANGE_ALLOC is the allocated size of mbcset->range_starts, and
> +   mbcset->range_ends, is a pointer argument since we may
> +   update it.  */
> +
> +static inline reg_errcode_t
> +__attribute__ ((always_inline))
> +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
> +		 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
> +		 re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
> +		 const unsigned char *collseqmb, const char *collseqwc,
> +		 int32_t table_size, const int32_t *symb_table,
> +		 const unsigned char *extra)
> +{
> +  unsigned int ch;
> +  uint32_t start_collseq;
> +  uint32_t end_collseq;
> +
> +  /* Equivalence Classes and Character Classes can't be a range
> +     start/end.  */
> +  if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
> +                        || start_elem->type == CHAR_CLASS
> +                        || end_elem->type == EQUIV_CLASS
> +                        || end_elem->type == CHAR_CLASS))
> +    return REG_ERANGE;
> +
> +  /* FIXME: Implement rational ranges here, too.  */
> +  start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
> +						   table_size, symb_table, extra);
> +  end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
> +						 table_size, symb_table, extra);
> +  /* Check start/end collation sequence values.  */
> +  if (__glibc_unlikely (start_collseq == UINT_MAX
> +                        || end_collseq == UINT_MAX))
> +    return REG_ECOLLATE;
> +  if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
> +                        && start_collseq > end_collseq))
> +    return REG_ERANGE;
> +
> +  /* Got valid collation sequence values, add them as a new entry.
> +     However, if we have no collation elements, and the character set
> +     is single byte, the single byte character set that we
> +     build below suffices. */
> +  if (nrules > 0 || dfa->mb_cur_max > 1)
> +    {
> +      /* Check the space of the arrays.  */
> +      if (__glibc_unlikely (*range_alloc == mbcset->nranges))
> +	{
> +	  /* There is not enough space, need realloc.  */
> +	  uint32_t *new_array_start;
> +	  uint32_t *new_array_end;
> +	  int new_nranges;
> +
> +	  /* +1 in case of mbcset->nranges is 0.  */
> +	  new_nranges = 2 * mbcset->nranges + 1;
> +	  new_array_start = re_realloc (mbcset->range_starts, uint32_t,
> +					new_nranges);
> +	  new_array_end = re_realloc (mbcset->range_ends, uint32_t,
> +				      new_nranges);
> +
> +          if (__glibc_unlikely (new_array_start == NULL
> +                                || new_array_end == NULL))
> +	    return REG_ESPACE;
> +
> +	  mbcset->range_starts = new_array_start;
> +	  mbcset->range_ends = new_array_end;
> +	  *range_alloc = new_nranges;
> +	}
> +
> +      mbcset->range_starts[mbcset->nranges] = start_collseq;
> +      mbcset->range_ends[mbcset->nranges++] = end_collseq;
> +    }
> +
> +  /* Build the table for single byte characters.  */
> +  for (ch = 0; ch < SBC_MAX; ch++)
> +    {
> +      uint32_t ch_collseq;
> +      /* if (MB_CUR_MAX == 1) */
> +      if (nrules == 0)
> +	ch_collseq = collseqmb[ch];
> +      else
> +	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
> +      if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
> +	bitset_set (sbcset, ch);
> +    }
> +  return REG_NOERROR;
> +}
> +
> +/* Local function for parse_bracket_exp used in _LIBC environment.
> +   Build the collating element which is represented by NAME.
> +   The result are written to MBCSET and SBCSET.
> +   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
> +   pointer argument since we may update it.  */
> +
> +static inline reg_errcode_t
> +__attribute__ ((always_inline))
> +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
> +			int *coll_sym_alloc, const unsigned char *name,
> +			uint32_t nrules, int32_t table_size,
> +			const int32_t *symb_table, const unsigned char *extra)
> +{
> +  int32_t elem, idx;
> +  size_t name_len = strlen ((const char *) name);
> +  if (nrules != 0)
> +    {
> +      elem = seek_collating_symbol_entry (name, name_len, symb_table,
> +					  table_size, extra);
> +      if (elem != -1)
> +	{
> +	  /* We found the entry.  */
> +	  idx = symb_table[2 * elem + 1];
> +	  /* Skip the name of collating element name.  */
> +	  idx += 1 + extra[idx];
> +	}
> +      else if (name_len == 1)
> +	{
> +	  /* No valid character, treat it as a normal
> +	     character.  */
> +	  bitset_set (sbcset, name[0]);
> +	  return REG_NOERROR;
> +	}
> +      else
> +	return REG_ECOLLATE;
> +
> +      /* Got valid collation sequence, add it as a new entry.  */
> +      /* Check the space of the arrays.  */
> +      if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
> +	{
> +	  /* Not enough, realloc it.  */
> +	  /* +1 in case of mbcset->ncoll_syms is 0.  */
> +	  int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
> +	  /* Use realloc since mbcset->coll_syms is NULL
> +	     if *alloc == 0.  */
> +	  int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
> +					       new_coll_sym_alloc);
> +          if (__glibc_unlikely (new_coll_syms == NULL))
> +	    return REG_ESPACE;
> +	  mbcset->coll_syms = new_coll_syms;
> +	  *coll_sym_alloc = new_coll_sym_alloc;
> +	}
> +      mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
> +      return REG_NOERROR;
> +    }
> +  else
> +    {
> +      if (__glibc_unlikely (name_len != 1))
> +	return REG_ECOLLATE;
> +      else
> +	{
> +	  bitset_set (sbcset, name[0]);
> +	  return REG_NOERROR;
> +	}
> +    }
> +}
> +#endif /* _LIBC */
> +
>  /* This function parse bracket expression like "[abc]", "[a-c]",
>     "[[.a-a.]]" etc.  */
>  
> @@ -2840,253 +3097,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>  {
>  #ifdef _LIBC
>    const unsigned char *collseqmb;
> -  const char *collseqwc;
> +  const char *collseqwc = NULL;
>    uint32_t nrules;
> -  int32_t table_size;
> -  const int32_t *symb_table;
> -  const unsigned char *extra;
> -
> -  /* Local function for parse_bracket_exp used in _LIBC environment.
> -     Seek the collating symbol entry corresponding to NAME.
> -     Return the index of the symbol in the SYMB_TABLE,
> -     or -1 if not found.  */
> -
> -  auto inline int32_t
> -  __attribute__ ((always_inline))
> -  seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
> -    {
> -      int32_t elem;
> -
> -      for (elem = 0; elem < table_size; elem++)
> -	if (symb_table[2 * elem] != 0)
> -	  {
> -	    int32_t idx = symb_table[2 * elem + 1];
> -	    /* Skip the name of collating element name.  */
> -	    idx += 1 + extra[idx];
> -	    if (/* Compare the length of the name.  */
> -		name_len == extra[idx]
> -		/* Compare the name.  */
> -		&& memcmp (name, &extra[idx + 1], name_len) == 0)
> -	      /* Yep, this is the entry.  */
> -	      return elem;
> -	  }
> -      return -1;
> -    }
> -
> -  /* Local function for parse_bracket_exp used in _LIBC environment.
> -     Look up the collation sequence value of BR_ELEM.
> -     Return the value if succeeded, UINT_MAX otherwise.  */
> -
> -  auto inline unsigned int
> -  __attribute__ ((always_inline))
> -  lookup_collation_sequence_value (bracket_elem_t *br_elem)
> -    {
> -      if (br_elem->type == SB_CHAR)
> -	{
> -	  /*
> -	  if (MB_CUR_MAX == 1)
> -	  */
> -	  if (nrules == 0)
> -	    return collseqmb[br_elem->opr.ch];
> -	  else
> -	    {
> -	      wint_t wc = __btowc (br_elem->opr.ch);
> -	      return __collseq_table_lookup (collseqwc, wc);
> -	    }
> -	}
> -      else if (br_elem->type == MB_CHAR)
> -	{
> -	  if (nrules != 0)
> -	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
> -	}
> -      else if (br_elem->type == COLL_SYM)
> -	{
> -	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
> -	  if (nrules != 0)
> -	    {
> -	      int32_t elem, idx;
> -	      elem = seek_collating_symbol_entry (br_elem->opr.name,
> -						  sym_name_len);
> -	      if (elem != -1)
> -		{
> -		  /* We found the entry.  */
> -		  idx = symb_table[2 * elem + 1];
> -		  /* Skip the name of collating element name.  */
> -		  idx += 1 + extra[idx];
> -		  /* Skip the byte sequence of the collating element.  */
> -		  idx += 1 + extra[idx];
> -		  /* Adjust for the alignment.  */
> -		  idx = (idx + 3) & ~3;
> -		  /* Skip the multibyte collation sequence value.  */
> -		  idx += sizeof (unsigned int);
> -		  /* Skip the wide char sequence of the collating element.  */
> -		  idx += sizeof (unsigned int) *
> -		    (1 + *(unsigned int *) (extra + idx));
> -		  /* Return the collation sequence value.  */
> -		  return *(unsigned int *) (extra + idx);
> -		}
> -	      else if (sym_name_len == 1)
> -		{
> -		  /* No valid character.  Match it as a single byte
> -		     character.  */
> -		  return collseqmb[br_elem->opr.name[0]];
> -		}
> -	    }
> -	  else if (sym_name_len == 1)
> -	    return collseqmb[br_elem->opr.name[0]];
> -	}
> -      return UINT_MAX;
> -    }
> -
> -  /* Local function for parse_bracket_exp used in _LIBC environment.
> -     Build the range expression which starts from START_ELEM, and ends
> -     at END_ELEM.  The result are written to MBCSET and SBCSET.
> -     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
> -     mbcset->range_ends, is a pointer argument since we may
> -     update it.  */
> -
> -  auto inline reg_errcode_t
> -  __attribute__ ((always_inline))
> -  build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
> -		   bracket_elem_t *start_elem, bracket_elem_t *end_elem)
> -    {
> -      unsigned int ch;
> -      uint32_t start_collseq;
> -      uint32_t end_collseq;
> -
> -      /* Equivalence Classes and Character Classes can't be a range
> -	 start/end.  */
> -      if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
> -			    || start_elem->type == CHAR_CLASS
> -			    || end_elem->type == EQUIV_CLASS
> -			    || end_elem->type == CHAR_CLASS))
> -	return REG_ERANGE;
> -
> -      /* FIXME: Implement rational ranges here, too.  */
> -      start_collseq = lookup_collation_sequence_value (start_elem);
> -      end_collseq = lookup_collation_sequence_value (end_elem);
> -      /* Check start/end collation sequence values.  */
> -      if (__glibc_unlikely (start_collseq == UINT_MAX
> -			    || end_collseq == UINT_MAX))
> -	return REG_ECOLLATE;
> -      if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
> -			    && start_collseq > end_collseq))
> -	return REG_ERANGE;
> -
> -      /* Got valid collation sequence values, add them as a new entry.
> -	 However, if we have no collation elements, and the character set
> -	 is single byte, the single byte character set that we
> -	 build below suffices. */
> -      if (nrules > 0 || dfa->mb_cur_max > 1)
> -	{
> -	  /* Check the space of the arrays.  */
> -	  if (__glibc_unlikely (*range_alloc == mbcset->nranges))
> -	    {
> -	      /* There is not enough space, need realloc.  */
> -	      uint32_t *new_array_start;
> -	      uint32_t *new_array_end;
> -	      Idx new_nranges;
> -
> -	      /* +1 in case of mbcset->nranges is 0.  */
> -	      new_nranges = 2 * mbcset->nranges + 1;
> -	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
> -					    new_nranges);
> -	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
> -					  new_nranges);
> -
> -	      if (__glibc_unlikely (new_array_start == NULL
> -				    || new_array_end == NULL))
> -		return REG_ESPACE;
> -
> -	      mbcset->range_starts = new_array_start;
> -	      mbcset->range_ends = new_array_end;
> -	      *range_alloc = new_nranges;
> -	    }
> -
> -	  mbcset->range_starts[mbcset->nranges] = start_collseq;
> -	  mbcset->range_ends[mbcset->nranges++] = end_collseq;
> -	}
> -
> -      /* Build the table for single byte characters.  */
> -      for (ch = 0; ch < SBC_MAX; ch++)
> -	{
> -	  uint32_t ch_collseq;
> -	  /*
> -	  if (MB_CUR_MAX == 1)
> -	  */
> -	  if (nrules == 0)
> -	    ch_collseq = collseqmb[ch];
> -	  else
> -	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
> -	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
> -	    bitset_set (sbcset, ch);
> -	}
> -      return REG_NOERROR;
> -    }
> -
> -  /* Local function for parse_bracket_exp used in _LIBC environment.
> -     Build the collating element which is represented by NAME.
> -     The result are written to MBCSET and SBCSET.
> -     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
> -     pointer argument since we may update it.  */
> -
> -  auto inline reg_errcode_t
> -  __attribute__ ((always_inline))
> -  build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
> -			  Idx *coll_sym_alloc, const unsigned char *name)
> -    {
> -      int32_t elem, idx;
> -      size_t name_len = strlen ((const char *) name);
> -      if (nrules != 0)
> -	{
> -	  elem = seek_collating_symbol_entry (name, name_len);
> -	  if (elem != -1)
> -	    {
> -	      /* We found the entry.  */
> -	      idx = symb_table[2 * elem + 1];
> -	      /* Skip the name of collating element name.  */
> -	      idx += 1 + extra[idx];
> -	    }
> -	  else if (name_len == 1)
> -	    {
> -	      /* No valid character, treat it as a normal
> -		 character.  */
> -	      bitset_set (sbcset, name[0]);
> -	      return REG_NOERROR;
> -	    }
> -	  else
> -	    return REG_ECOLLATE;
> -
> -	  /* Got valid collation sequence, add it as a new entry.  */
> -	  /* Check the space of the arrays.  */
> -	  if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
> -	    {
> -	      /* Not enough, realloc it.  */
> -	      /* +1 in case of mbcset->ncoll_syms is 0.  */
> -	      Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
> -	      /* Use realloc since mbcset->coll_syms is NULL
> -		 if *alloc == 0.  */
> -	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
> -						   new_coll_sym_alloc);
> -	      if (__glibc_unlikely (new_coll_syms == NULL))
> -		return REG_ESPACE;
> -	      mbcset->coll_syms = new_coll_syms;
> -	      *coll_sym_alloc = new_coll_sym_alloc;
> -	    }
> -	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
> -	  return REG_NOERROR;
> -	}
> -      else
> -	{
> -	  if (__glibc_unlikely (name_len != 1))
> -	    return REG_ECOLLATE;
> -	  else
> -	    {
> -	      bitset_set (sbcset, name[0]);
> -	      return REG_NOERROR;
> -	    }
> -	}
> -    }
> +  int32_t table_size = 0;
> +  const int32_t *symb_table = NULL;
> +  const unsigned char *extra = NULL;
>  #endif
>  
>    re_token_t br_token;
> @@ -3230,7 +3245,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>  
>  #ifdef _LIBC
>  	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
> -				  &start_elem, &end_elem);
> +				  &start_elem, &end_elem,
> +				  dfa, syntax, nrules, collseqmb, collseqwc,
> +				  table_size, symb_table, extra);
>  #else
>  # ifdef RE_ENABLE_I18N
>  	  *err = build_range_exp (syntax, sbcset,
> @@ -3283,7 +3300,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>  #ifdef RE_ENABLE_I18N
>  					     mbcset, &coll_sym_alloc,
>  #endif /* RE_ENABLE_I18N */
> -					     start_elem.opr.name);
> +					     start_elem.opr.name,
> +					     nrules, table_size, symb_table, extra);
>  	      if (__glibc_unlikely (*err != REG_NOERROR))
>  		goto parse_bracket_exp_free_return;
>  	      break;
>
  
Carlos O'Donell Nov. 2, 2021, 4:21 p.m. UTC | #2
On 11/2/21 08:15, Adhemerval Zanella wrote:
> 
> 
> On 02/11/2021 02:45, Fangrui Song via Libc-alpha wrote:
>> This refactor moves four functions out of a nested scope and converts
>> them into static always_inline functions. collseqwc, table_size,
>> symb_table, extra are now initialized to zero because they are passed as
>> function arguments.
>>
>> On x86-64, .text is 16 byte larger likely due to the 4 stores.
>> This is nothing compared to the amount of work that regcomp has to do
>> looking up the collation weights, or other functions.
>>
>> If the non-buildable `sysdeps/generic/dl-machine.h` doesn't count,
>> this patch removes the last `auto inline` usage from glibc.
> 
> LGTM, thanks.  Carlos might want to ack as well.
> 
> Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

LGTM. Thanks!

I think that keeping always_inline is the right thing to do in the context of
a refactor.

Reviewed-by: Carlos O'Donell <carlos@redhat.com>

 
>> ---
>> v1 -> v2:
>> * Update commit message
>> ---
>>  posix/regcomp.c | 514 +++++++++++++++++++++++++-----------------------
>>  1 file changed, 266 insertions(+), 248 deletions(-)
>>
>> diff --git a/posix/regcomp.c b/posix/regcomp.c
>> index 887e5b5068..e2b5c2807a 100644
>> --- a/posix/regcomp.c
>> +++ b/posix/regcomp.c
>> @@ -2831,6 +2831,263 @@ build_collating_symbol (bitset_t sbcset, const unsigned char *name)
>>  }
>>  #endif /* not _LIBC */
>>  
>> +#ifdef _LIBC
>> +/* Local function for parse_bracket_exp used in _LIBC environment.
>> +   Seek the collating symbol entry corresponding to NAME.
>> +   Return the index of the symbol in the SYMB_TABLE,
>> +   or -1 if not found.  */
>> +
>> +static inline int32_t
>> +__attribute__ ((always_inline))
>> +seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
>> +			     const int32_t *symb_table, int32_t table_size,
>> +			     const unsigned char *extra)
>> +{
>> +  int32_t elem;
>> +
>> +  for (elem = 0; elem < table_size; elem++)
>> +    if (symb_table[2 * elem] != 0)
>> +      {
>> +	int32_t idx = symb_table[2 * elem + 1];
>> +	/* Skip the name of collating element name.  */
>> +	idx += 1 + extra[idx];
>> +	if (/* Compare the length of the name.  */
>> +	    name_len == extra[idx]
>> +	    /* Compare the name.  */
>> +	    && memcmp (name, &extra[idx + 1], name_len) == 0)
>> +	  /* Yep, this is the entry.  */
>> +	  return elem;
>> +      }
>> +  return -1;
>> +}
>> +
> 
> Ok (I would let the compiler optimize it, but __attribute__ ((always_inline))
> at least tries to mimic what nested function does).
> 
>> +/* Local function for parse_bracket_exp used in _LIBC environment.
>> +   Look up the collation sequence value of BR_ELEM.
>> +   Return the value if succeeded, UINT_MAX otherwise.  */
>> +
>> +static inline unsigned int
>> +__attribute__ ((always_inline))
>> +lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
>> +				 const unsigned char *collseqmb,
>> +				 const char *collseqwc, int32_t table_size,
>> +				 const int32_t *symb_table,
>> +				 const unsigned char *extra)
>> +{
>> +  if (br_elem->type == SB_CHAR)
>> +    {
>> +      /* if (MB_CUR_MAX == 1) */
>> +      if (nrules == 0)
>> +	return collseqmb[br_elem->opr.ch];
>> +      else
>> +	{
>> +	  wint_t wc = __btowc (br_elem->opr.ch);
>> +	  return __collseq_table_lookup (collseqwc, wc);
>> +	}
>> +    }
>> +  else if (br_elem->type == MB_CHAR)
>> +    {
>> +      if (nrules != 0)
>> +	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
>> +    }
>> +  else if (br_elem->type == COLL_SYM)
>> +    {
>> +      size_t sym_name_len = strlen ((char *) br_elem->opr.name);
>> +      if (nrules != 0)
>> +	{
>> +	  int32_t elem, idx;
>> +	  elem = seek_collating_symbol_entry (br_elem->opr.name,
>> +					      sym_name_len,
>> +					      symb_table, table_size,
>> +					      extra);
>> +	  if (elem != -1)
>> +	    {
>> +	      /* We found the entry.  */
>> +	      idx = symb_table[2 * elem + 1];
>> +	      /* Skip the name of collating element name.  */
>> +	      idx += 1 + extra[idx];
>> +	      /* Skip the byte sequence of the collating element.  */
>> +	      idx += 1 + extra[idx];
>> +	      /* Adjust for the alignment.  */
>> +	      idx = (idx + 3) & ~3;
>> +	      /* Skip the multibyte collation sequence value.  */
>> +	      idx += sizeof (unsigned int);
>> +	      /* Skip the wide char sequence of the collating element.  */
>> +	      idx += sizeof (unsigned int) *
>> +		(1 + *(unsigned int *) (extra + idx));
>> +	      /* Return the collation sequence value.  */
>> +	      return *(unsigned int *) (extra + idx);
>> +	    }
>> +	  else if (sym_name_len == 1)
>> +	    {
>> +	      /* No valid character.  Match it as a single byte
>> +		 character.  */
>> +	      return collseqmb[br_elem->opr.name[0]];
>> +	    }
>> +	}
>> +      else if (sym_name_len == 1)
>> +	return collseqmb[br_elem->opr.name[0]];
>> +    }
>> +  return UINT_MAX;
>> +}
>> +
>> +/* Local function for parse_bracket_exp used in _LIBC environment.
>> +   Build the range expression which starts from START_ELEM, and ends
>> +   at END_ELEM.  The result are written to MBCSET and SBCSET.
>> +   RANGE_ALLOC is the allocated size of mbcset->range_starts, and
>> +   mbcset->range_ends, is a pointer argument since we may
>> +   update it.  */
>> +
>> +static inline reg_errcode_t
>> +__attribute__ ((always_inline))
>> +build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
>> +		 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
>> +		 re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
>> +		 const unsigned char *collseqmb, const char *collseqwc,
>> +		 int32_t table_size, const int32_t *symb_table,
>> +		 const unsigned char *extra)
>> +{
>> +  unsigned int ch;
>> +  uint32_t start_collseq;
>> +  uint32_t end_collseq;
>> +
>> +  /* Equivalence Classes and Character Classes can't be a range
>> +     start/end.  */
>> +  if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
>> +                        || start_elem->type == CHAR_CLASS
>> +                        || end_elem->type == EQUIV_CLASS
>> +                        || end_elem->type == CHAR_CLASS))
>> +    return REG_ERANGE;
>> +
>> +  /* FIXME: Implement rational ranges here, too.  */
>> +  start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
>> +						   table_size, symb_table, extra);
>> +  end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
>> +						 table_size, symb_table, extra);
>> +  /* Check start/end collation sequence values.  */
>> +  if (__glibc_unlikely (start_collseq == UINT_MAX
>> +                        || end_collseq == UINT_MAX))
>> +    return REG_ECOLLATE;
>> +  if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
>> +                        && start_collseq > end_collseq))
>> +    return REG_ERANGE;
>> +
>> +  /* Got valid collation sequence values, add them as a new entry.
>> +     However, if we have no collation elements, and the character set
>> +     is single byte, the single byte character set that we
>> +     build below suffices. */
>> +  if (nrules > 0 || dfa->mb_cur_max > 1)
>> +    {
>> +      /* Check the space of the arrays.  */
>> +      if (__glibc_unlikely (*range_alloc == mbcset->nranges))
>> +	{
>> +	  /* There is not enough space, need realloc.  */
>> +	  uint32_t *new_array_start;
>> +	  uint32_t *new_array_end;
>> +	  int new_nranges;
>> +
>> +	  /* +1 in case of mbcset->nranges is 0.  */
>> +	  new_nranges = 2 * mbcset->nranges + 1;
>> +	  new_array_start = re_realloc (mbcset->range_starts, uint32_t,
>> +					new_nranges);
>> +	  new_array_end = re_realloc (mbcset->range_ends, uint32_t,
>> +				      new_nranges);
>> +
>> +          if (__glibc_unlikely (new_array_start == NULL
>> +                                || new_array_end == NULL))
>> +	    return REG_ESPACE;
>> +
>> +	  mbcset->range_starts = new_array_start;
>> +	  mbcset->range_ends = new_array_end;
>> +	  *range_alloc = new_nranges;
>> +	}
>> +
>> +      mbcset->range_starts[mbcset->nranges] = start_collseq;
>> +      mbcset->range_ends[mbcset->nranges++] = end_collseq;
>> +    }
>> +
>> +  /* Build the table for single byte characters.  */
>> +  for (ch = 0; ch < SBC_MAX; ch++)
>> +    {
>> +      uint32_t ch_collseq;
>> +      /* if (MB_CUR_MAX == 1) */
>> +      if (nrules == 0)
>> +	ch_collseq = collseqmb[ch];
>> +      else
>> +	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
>> +      if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
>> +	bitset_set (sbcset, ch);
>> +    }
>> +  return REG_NOERROR;
>> +}
>> +
>> +/* Local function for parse_bracket_exp used in _LIBC environment.
>> +   Build the collating element which is represented by NAME.
>> +   The result are written to MBCSET and SBCSET.
>> +   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
>> +   pointer argument since we may update it.  */
>> +
>> +static inline reg_errcode_t
>> +__attribute__ ((always_inline))
>> +build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
>> +			int *coll_sym_alloc, const unsigned char *name,
>> +			uint32_t nrules, int32_t table_size,
>> +			const int32_t *symb_table, const unsigned char *extra)
>> +{
>> +  int32_t elem, idx;
>> +  size_t name_len = strlen ((const char *) name);
>> +  if (nrules != 0)
>> +    {
>> +      elem = seek_collating_symbol_entry (name, name_len, symb_table,
>> +					  table_size, extra);
>> +      if (elem != -1)
>> +	{
>> +	  /* We found the entry.  */
>> +	  idx = symb_table[2 * elem + 1];
>> +	  /* Skip the name of collating element name.  */
>> +	  idx += 1 + extra[idx];
>> +	}
>> +      else if (name_len == 1)
>> +	{
>> +	  /* No valid character, treat it as a normal
>> +	     character.  */
>> +	  bitset_set (sbcset, name[0]);
>> +	  return REG_NOERROR;
>> +	}
>> +      else
>> +	return REG_ECOLLATE;
>> +
>> +      /* Got valid collation sequence, add it as a new entry.  */
>> +      /* Check the space of the arrays.  */
>> +      if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
>> +	{
>> +	  /* Not enough, realloc it.  */
>> +	  /* +1 in case of mbcset->ncoll_syms is 0.  */
>> +	  int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
>> +	  /* Use realloc since mbcset->coll_syms is NULL
>> +	     if *alloc == 0.  */
>> +	  int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
>> +					       new_coll_sym_alloc);
>> +          if (__glibc_unlikely (new_coll_syms == NULL))
>> +	    return REG_ESPACE;
>> +	  mbcset->coll_syms = new_coll_syms;
>> +	  *coll_sym_alloc = new_coll_sym_alloc;
>> +	}
>> +      mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
>> +      return REG_NOERROR;
>> +    }
>> +  else
>> +    {
>> +      if (__glibc_unlikely (name_len != 1))
>> +	return REG_ECOLLATE;
>> +      else
>> +	{
>> +	  bitset_set (sbcset, name[0]);
>> +	  return REG_NOERROR;
>> +	}
>> +    }
>> +}
>> +#endif /* _LIBC */
>> +
>>  /* This function parse bracket expression like "[abc]", "[a-c]",
>>     "[[.a-a.]]" etc.  */
>>  
>> @@ -2840,253 +3097,11 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>>  {
>>  #ifdef _LIBC
>>    const unsigned char *collseqmb;
>> -  const char *collseqwc;
>> +  const char *collseqwc = NULL;
>>    uint32_t nrules;
>> -  int32_t table_size;
>> -  const int32_t *symb_table;
>> -  const unsigned char *extra;
>> -
>> -  /* Local function for parse_bracket_exp used in _LIBC environment.
>> -     Seek the collating symbol entry corresponding to NAME.
>> -     Return the index of the symbol in the SYMB_TABLE,
>> -     or -1 if not found.  */
>> -
>> -  auto inline int32_t
>> -  __attribute__ ((always_inline))
>> -  seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
>> -    {
>> -      int32_t elem;
>> -
>> -      for (elem = 0; elem < table_size; elem++)
>> -	if (symb_table[2 * elem] != 0)
>> -	  {
>> -	    int32_t idx = symb_table[2 * elem + 1];
>> -	    /* Skip the name of collating element name.  */
>> -	    idx += 1 + extra[idx];
>> -	    if (/* Compare the length of the name.  */
>> -		name_len == extra[idx]
>> -		/* Compare the name.  */
>> -		&& memcmp (name, &extra[idx + 1], name_len) == 0)
>> -	      /* Yep, this is the entry.  */
>> -	      return elem;
>> -	  }
>> -      return -1;
>> -    }
>> -
>> -  /* Local function for parse_bracket_exp used in _LIBC environment.
>> -     Look up the collation sequence value of BR_ELEM.
>> -     Return the value if succeeded, UINT_MAX otherwise.  */
>> -
>> -  auto inline unsigned int
>> -  __attribute__ ((always_inline))
>> -  lookup_collation_sequence_value (bracket_elem_t *br_elem)
>> -    {
>> -      if (br_elem->type == SB_CHAR)
>> -	{
>> -	  /*
>> -	  if (MB_CUR_MAX == 1)
>> -	  */
>> -	  if (nrules == 0)
>> -	    return collseqmb[br_elem->opr.ch];
>> -	  else
>> -	    {
>> -	      wint_t wc = __btowc (br_elem->opr.ch);
>> -	      return __collseq_table_lookup (collseqwc, wc);
>> -	    }
>> -	}
>> -      else if (br_elem->type == MB_CHAR)
>> -	{
>> -	  if (nrules != 0)
>> -	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
>> -	}
>> -      else if (br_elem->type == COLL_SYM)
>> -	{
>> -	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
>> -	  if (nrules != 0)
>> -	    {
>> -	      int32_t elem, idx;
>> -	      elem = seek_collating_symbol_entry (br_elem->opr.name,
>> -						  sym_name_len);
>> -	      if (elem != -1)
>> -		{
>> -		  /* We found the entry.  */
>> -		  idx = symb_table[2 * elem + 1];
>> -		  /* Skip the name of collating element name.  */
>> -		  idx += 1 + extra[idx];
>> -		  /* Skip the byte sequence of the collating element.  */
>> -		  idx += 1 + extra[idx];
>> -		  /* Adjust for the alignment.  */
>> -		  idx = (idx + 3) & ~3;
>> -		  /* Skip the multibyte collation sequence value.  */
>> -		  idx += sizeof (unsigned int);
>> -		  /* Skip the wide char sequence of the collating element.  */
>> -		  idx += sizeof (unsigned int) *
>> -		    (1 + *(unsigned int *) (extra + idx));
>> -		  /* Return the collation sequence value.  */
>> -		  return *(unsigned int *) (extra + idx);
>> -		}
>> -	      else if (sym_name_len == 1)
>> -		{
>> -		  /* No valid character.  Match it as a single byte
>> -		     character.  */
>> -		  return collseqmb[br_elem->opr.name[0]];
>> -		}
>> -	    }
>> -	  else if (sym_name_len == 1)
>> -	    return collseqmb[br_elem->opr.name[0]];
>> -	}
>> -      return UINT_MAX;
>> -    }
>> -
>> -  /* Local function for parse_bracket_exp used in _LIBC environment.
>> -     Build the range expression which starts from START_ELEM, and ends
>> -     at END_ELEM.  The result are written to MBCSET and SBCSET.
>> -     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
>> -     mbcset->range_ends, is a pointer argument since we may
>> -     update it.  */
>> -
>> -  auto inline reg_errcode_t
>> -  __attribute__ ((always_inline))
>> -  build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
>> -		   bracket_elem_t *start_elem, bracket_elem_t *end_elem)
>> -    {
>> -      unsigned int ch;
>> -      uint32_t start_collseq;
>> -      uint32_t end_collseq;
>> -
>> -      /* Equivalence Classes and Character Classes can't be a range
>> -	 start/end.  */
>> -      if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
>> -			    || start_elem->type == CHAR_CLASS
>> -			    || end_elem->type == EQUIV_CLASS
>> -			    || end_elem->type == CHAR_CLASS))
>> -	return REG_ERANGE;
>> -
>> -      /* FIXME: Implement rational ranges here, too.  */
>> -      start_collseq = lookup_collation_sequence_value (start_elem);
>> -      end_collseq = lookup_collation_sequence_value (end_elem);
>> -      /* Check start/end collation sequence values.  */
>> -      if (__glibc_unlikely (start_collseq == UINT_MAX
>> -			    || end_collseq == UINT_MAX))
>> -	return REG_ECOLLATE;
>> -      if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
>> -			    && start_collseq > end_collseq))
>> -	return REG_ERANGE;
>> -
>> -      /* Got valid collation sequence values, add them as a new entry.
>> -	 However, if we have no collation elements, and the character set
>> -	 is single byte, the single byte character set that we
>> -	 build below suffices. */
>> -      if (nrules > 0 || dfa->mb_cur_max > 1)
>> -	{
>> -	  /* Check the space of the arrays.  */
>> -	  if (__glibc_unlikely (*range_alloc == mbcset->nranges))
>> -	    {
>> -	      /* There is not enough space, need realloc.  */
>> -	      uint32_t *new_array_start;
>> -	      uint32_t *new_array_end;
>> -	      Idx new_nranges;
>> -
>> -	      /* +1 in case of mbcset->nranges is 0.  */
>> -	      new_nranges = 2 * mbcset->nranges + 1;
>> -	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
>> -					    new_nranges);
>> -	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
>> -					  new_nranges);
>> -
>> -	      if (__glibc_unlikely (new_array_start == NULL
>> -				    || new_array_end == NULL))
>> -		return REG_ESPACE;
>> -
>> -	      mbcset->range_starts = new_array_start;
>> -	      mbcset->range_ends = new_array_end;
>> -	      *range_alloc = new_nranges;
>> -	    }
>> -
>> -	  mbcset->range_starts[mbcset->nranges] = start_collseq;
>> -	  mbcset->range_ends[mbcset->nranges++] = end_collseq;
>> -	}
>> -
>> -      /* Build the table for single byte characters.  */
>> -      for (ch = 0; ch < SBC_MAX; ch++)
>> -	{
>> -	  uint32_t ch_collseq;
>> -	  /*
>> -	  if (MB_CUR_MAX == 1)
>> -	  */
>> -	  if (nrules == 0)
>> -	    ch_collseq = collseqmb[ch];
>> -	  else
>> -	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
>> -	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
>> -	    bitset_set (sbcset, ch);
>> -	}
>> -      return REG_NOERROR;
>> -    }
>> -
>> -  /* Local function for parse_bracket_exp used in _LIBC environment.
>> -     Build the collating element which is represented by NAME.
>> -     The result are written to MBCSET and SBCSET.
>> -     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
>> -     pointer argument since we may update it.  */
>> -
>> -  auto inline reg_errcode_t
>> -  __attribute__ ((always_inline))
>> -  build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
>> -			  Idx *coll_sym_alloc, const unsigned char *name)
>> -    {
>> -      int32_t elem, idx;
>> -      size_t name_len = strlen ((const char *) name);
>> -      if (nrules != 0)
>> -	{
>> -	  elem = seek_collating_symbol_entry (name, name_len);
>> -	  if (elem != -1)
>> -	    {
>> -	      /* We found the entry.  */
>> -	      idx = symb_table[2 * elem + 1];
>> -	      /* Skip the name of collating element name.  */
>> -	      idx += 1 + extra[idx];
>> -	    }
>> -	  else if (name_len == 1)
>> -	    {
>> -	      /* No valid character, treat it as a normal
>> -		 character.  */
>> -	      bitset_set (sbcset, name[0]);
>> -	      return REG_NOERROR;
>> -	    }
>> -	  else
>> -	    return REG_ECOLLATE;
>> -
>> -	  /* Got valid collation sequence, add it as a new entry.  */
>> -	  /* Check the space of the arrays.  */
>> -	  if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
>> -	    {
>> -	      /* Not enough, realloc it.  */
>> -	      /* +1 in case of mbcset->ncoll_syms is 0.  */
>> -	      Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
>> -	      /* Use realloc since mbcset->coll_syms is NULL
>> -		 if *alloc == 0.  */
>> -	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
>> -						   new_coll_sym_alloc);
>> -	      if (__glibc_unlikely (new_coll_syms == NULL))
>> -		return REG_ESPACE;
>> -	      mbcset->coll_syms = new_coll_syms;
>> -	      *coll_sym_alloc = new_coll_sym_alloc;
>> -	    }
>> -	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
>> -	  return REG_NOERROR;
>> -	}
>> -      else
>> -	{
>> -	  if (__glibc_unlikely (name_len != 1))
>> -	    return REG_ECOLLATE;
>> -	  else
>> -	    {
>> -	      bitset_set (sbcset, name[0]);
>> -	      return REG_NOERROR;
>> -	    }
>> -	}
>> -    }
>> +  int32_t table_size = 0;
>> +  const int32_t *symb_table = NULL;
>> +  const unsigned char *extra = NULL;
>>  #endif
>>  
>>    re_token_t br_token;
>> @@ -3230,7 +3245,9 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>>  
>>  #ifdef _LIBC
>>  	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
>> -				  &start_elem, &end_elem);
>> +				  &start_elem, &end_elem,
>> +				  dfa, syntax, nrules, collseqmb, collseqwc,
>> +				  table_size, symb_table, extra);
>>  #else
>>  # ifdef RE_ENABLE_I18N
>>  	  *err = build_range_exp (syntax, sbcset,
>> @@ -3283,7 +3300,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
>>  #ifdef RE_ENABLE_I18N
>>  					     mbcset, &coll_sym_alloc,
>>  #endif /* RE_ENABLE_I18N */
>> -					     start_elem.opr.name);
>> +					     start_elem.opr.name,
>> +					     nrules, table_size, symb_table, extra);
>>  	      if (__glibc_unlikely (*err != REG_NOERROR))
>>  		goto parse_bracket_exp_free_return;
>>  	      break;
>>
>
  

Patch

diff --git a/posix/regcomp.c b/posix/regcomp.c
index 887e5b5068..e2b5c2807a 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -2831,6 +2831,263 @@  build_collating_symbol (bitset_t sbcset, const unsigned char *name)
 }
 #endif /* not _LIBC */
 
+#ifdef _LIBC
+/* Local function for parse_bracket_exp used in _LIBC environment.
+   Seek the collating symbol entry corresponding to NAME.
+   Return the index of the symbol in the SYMB_TABLE,
+   or -1 if not found.  */
+
+static inline int32_t
+__attribute__ ((always_inline))
+seek_collating_symbol_entry (const unsigned char *name, size_t name_len,
+			     const int32_t *symb_table, int32_t table_size,
+			     const unsigned char *extra)
+{
+  int32_t elem;
+
+  for (elem = 0; elem < table_size; elem++)
+    if (symb_table[2 * elem] != 0)
+      {
+	int32_t idx = symb_table[2 * elem + 1];
+	/* Skip the name of collating element name.  */
+	idx += 1 + extra[idx];
+	if (/* Compare the length of the name.  */
+	    name_len == extra[idx]
+	    /* Compare the name.  */
+	    && memcmp (name, &extra[idx + 1], name_len) == 0)
+	  /* Yep, this is the entry.  */
+	  return elem;
+      }
+  return -1;
+}
+
+/* Local function for parse_bracket_exp used in _LIBC environment.
+   Look up the collation sequence value of BR_ELEM.
+   Return the value if succeeded, UINT_MAX otherwise.  */
+
+static inline unsigned int
+__attribute__ ((always_inline))
+lookup_collation_sequence_value (bracket_elem_t *br_elem, uint32_t nrules,
+				 const unsigned char *collseqmb,
+				 const char *collseqwc, int32_t table_size,
+				 const int32_t *symb_table,
+				 const unsigned char *extra)
+{
+  if (br_elem->type == SB_CHAR)
+    {
+      /* if (MB_CUR_MAX == 1) */
+      if (nrules == 0)
+	return collseqmb[br_elem->opr.ch];
+      else
+	{
+	  wint_t wc = __btowc (br_elem->opr.ch);
+	  return __collseq_table_lookup (collseqwc, wc);
+	}
+    }
+  else if (br_elem->type == MB_CHAR)
+    {
+      if (nrules != 0)
+	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
+    }
+  else if (br_elem->type == COLL_SYM)
+    {
+      size_t sym_name_len = strlen ((char *) br_elem->opr.name);
+      if (nrules != 0)
+	{
+	  int32_t elem, idx;
+	  elem = seek_collating_symbol_entry (br_elem->opr.name,
+					      sym_name_len,
+					      symb_table, table_size,
+					      extra);
+	  if (elem != -1)
+	    {
+	      /* We found the entry.  */
+	      idx = symb_table[2 * elem + 1];
+	      /* Skip the name of collating element name.  */
+	      idx += 1 + extra[idx];
+	      /* Skip the byte sequence of the collating element.  */
+	      idx += 1 + extra[idx];
+	      /* Adjust for the alignment.  */
+	      idx = (idx + 3) & ~3;
+	      /* Skip the multibyte collation sequence value.  */
+	      idx += sizeof (unsigned int);
+	      /* Skip the wide char sequence of the collating element.  */
+	      idx += sizeof (unsigned int) *
+		(1 + *(unsigned int *) (extra + idx));
+	      /* Return the collation sequence value.  */
+	      return *(unsigned int *) (extra + idx);
+	    }
+	  else if (sym_name_len == 1)
+	    {
+	      /* No valid character.  Match it as a single byte
+		 character.  */
+	      return collseqmb[br_elem->opr.name[0]];
+	    }
+	}
+      else if (sym_name_len == 1)
+	return collseqmb[br_elem->opr.name[0]];
+    }
+  return UINT_MAX;
+}
+
+/* Local function for parse_bracket_exp used in _LIBC environment.
+   Build the range expression which starts from START_ELEM, and ends
+   at END_ELEM.  The result are written to MBCSET and SBCSET.
+   RANGE_ALLOC is the allocated size of mbcset->range_starts, and
+   mbcset->range_ends, is a pointer argument since we may
+   update it.  */
+
+static inline reg_errcode_t
+__attribute__ ((always_inline))
+build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
+		 bracket_elem_t *start_elem, bracket_elem_t *end_elem,
+		 re_dfa_t *dfa, reg_syntax_t syntax, uint32_t nrules,
+		 const unsigned char *collseqmb, const char *collseqwc,
+		 int32_t table_size, const int32_t *symb_table,
+		 const unsigned char *extra)
+{
+  unsigned int ch;
+  uint32_t start_collseq;
+  uint32_t end_collseq;
+
+  /* Equivalence Classes and Character Classes can't be a range
+     start/end.  */
+  if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
+                        || start_elem->type == CHAR_CLASS
+                        || end_elem->type == EQUIV_CLASS
+                        || end_elem->type == CHAR_CLASS))
+    return REG_ERANGE;
+
+  /* FIXME: Implement rational ranges here, too.  */
+  start_collseq = lookup_collation_sequence_value (start_elem, nrules, collseqmb, collseqwc,
+						   table_size, symb_table, extra);
+  end_collseq = lookup_collation_sequence_value (end_elem, nrules, collseqmb, collseqwc,
+						 table_size, symb_table, extra);
+  /* Check start/end collation sequence values.  */
+  if (__glibc_unlikely (start_collseq == UINT_MAX
+                        || end_collseq == UINT_MAX))
+    return REG_ECOLLATE;
+  if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
+                        && start_collseq > end_collseq))
+    return REG_ERANGE;
+
+  /* Got valid collation sequence values, add them as a new entry.
+     However, if we have no collation elements, and the character set
+     is single byte, the single byte character set that we
+     build below suffices. */
+  if (nrules > 0 || dfa->mb_cur_max > 1)
+    {
+      /* Check the space of the arrays.  */
+      if (__glibc_unlikely (*range_alloc == mbcset->nranges))
+	{
+	  /* There is not enough space, need realloc.  */
+	  uint32_t *new_array_start;
+	  uint32_t *new_array_end;
+	  int new_nranges;
+
+	  /* +1 in case of mbcset->nranges is 0.  */
+	  new_nranges = 2 * mbcset->nranges + 1;
+	  new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+					new_nranges);
+	  new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+				      new_nranges);
+
+          if (__glibc_unlikely (new_array_start == NULL
+                                || new_array_end == NULL))
+	    return REG_ESPACE;
+
+	  mbcset->range_starts = new_array_start;
+	  mbcset->range_ends = new_array_end;
+	  *range_alloc = new_nranges;
+	}
+
+      mbcset->range_starts[mbcset->nranges] = start_collseq;
+      mbcset->range_ends[mbcset->nranges++] = end_collseq;
+    }
+
+  /* Build the table for single byte characters.  */
+  for (ch = 0; ch < SBC_MAX; ch++)
+    {
+      uint32_t ch_collseq;
+      /* if (MB_CUR_MAX == 1) */
+      if (nrules == 0)
+	ch_collseq = collseqmb[ch];
+      else
+	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
+      if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
+	bitset_set (sbcset, ch);
+    }
+  return REG_NOERROR;
+}
+
+/* Local function for parse_bracket_exp used in _LIBC environment.
+   Build the collating element which is represented by NAME.
+   The result are written to MBCSET and SBCSET.
+   COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
+   pointer argument since we may update it.  */
+
+static inline reg_errcode_t
+__attribute__ ((always_inline))
+build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
+			int *coll_sym_alloc, const unsigned char *name,
+			uint32_t nrules, int32_t table_size,
+			const int32_t *symb_table, const unsigned char *extra)
+{
+  int32_t elem, idx;
+  size_t name_len = strlen ((const char *) name);
+  if (nrules != 0)
+    {
+      elem = seek_collating_symbol_entry (name, name_len, symb_table,
+					  table_size, extra);
+      if (elem != -1)
+	{
+	  /* We found the entry.  */
+	  idx = symb_table[2 * elem + 1];
+	  /* Skip the name of collating element name.  */
+	  idx += 1 + extra[idx];
+	}
+      else if (name_len == 1)
+	{
+	  /* No valid character, treat it as a normal
+	     character.  */
+	  bitset_set (sbcset, name[0]);
+	  return REG_NOERROR;
+	}
+      else
+	return REG_ECOLLATE;
+
+      /* Got valid collation sequence, add it as a new entry.  */
+      /* Check the space of the arrays.  */
+      if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
+	{
+	  /* Not enough, realloc it.  */
+	  /* +1 in case of mbcset->ncoll_syms is 0.  */
+	  int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+	  /* Use realloc since mbcset->coll_syms is NULL
+	     if *alloc == 0.  */
+	  int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+					       new_coll_sym_alloc);
+          if (__glibc_unlikely (new_coll_syms == NULL))
+	    return REG_ESPACE;
+	  mbcset->coll_syms = new_coll_syms;
+	  *coll_sym_alloc = new_coll_sym_alloc;
+	}
+      mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
+      return REG_NOERROR;
+    }
+  else
+    {
+      if (__glibc_unlikely (name_len != 1))
+	return REG_ECOLLATE;
+      else
+	{
+	  bitset_set (sbcset, name[0]);
+	  return REG_NOERROR;
+	}
+    }
+}
+#endif /* _LIBC */
+
 /* This function parse bracket expression like "[abc]", "[a-c]",
    "[[.a-a.]]" etc.  */
 
@@ -2840,253 +3097,11 @@  parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 {
 #ifdef _LIBC
   const unsigned char *collseqmb;
-  const char *collseqwc;
+  const char *collseqwc = NULL;
   uint32_t nrules;
-  int32_t table_size;
-  const int32_t *symb_table;
-  const unsigned char *extra;
-
-  /* Local function for parse_bracket_exp used in _LIBC environment.
-     Seek the collating symbol entry corresponding to NAME.
-     Return the index of the symbol in the SYMB_TABLE,
-     or -1 if not found.  */
-
-  auto inline int32_t
-  __attribute__ ((always_inline))
-  seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
-    {
-      int32_t elem;
-
-      for (elem = 0; elem < table_size; elem++)
-	if (symb_table[2 * elem] != 0)
-	  {
-	    int32_t idx = symb_table[2 * elem + 1];
-	    /* Skip the name of collating element name.  */
-	    idx += 1 + extra[idx];
-	    if (/* Compare the length of the name.  */
-		name_len == extra[idx]
-		/* Compare the name.  */
-		&& memcmp (name, &extra[idx + 1], name_len) == 0)
-	      /* Yep, this is the entry.  */
-	      return elem;
-	  }
-      return -1;
-    }
-
-  /* Local function for parse_bracket_exp used in _LIBC environment.
-     Look up the collation sequence value of BR_ELEM.
-     Return the value if succeeded, UINT_MAX otherwise.  */
-
-  auto inline unsigned int
-  __attribute__ ((always_inline))
-  lookup_collation_sequence_value (bracket_elem_t *br_elem)
-    {
-      if (br_elem->type == SB_CHAR)
-	{
-	  /*
-	  if (MB_CUR_MAX == 1)
-	  */
-	  if (nrules == 0)
-	    return collseqmb[br_elem->opr.ch];
-	  else
-	    {
-	      wint_t wc = __btowc (br_elem->opr.ch);
-	      return __collseq_table_lookup (collseqwc, wc);
-	    }
-	}
-      else if (br_elem->type == MB_CHAR)
-	{
-	  if (nrules != 0)
-	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
-	}
-      else if (br_elem->type == COLL_SYM)
-	{
-	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);
-	  if (nrules != 0)
-	    {
-	      int32_t elem, idx;
-	      elem = seek_collating_symbol_entry (br_elem->opr.name,
-						  sym_name_len);
-	      if (elem != -1)
-		{
-		  /* We found the entry.  */
-		  idx = symb_table[2 * elem + 1];
-		  /* Skip the name of collating element name.  */
-		  idx += 1 + extra[idx];
-		  /* Skip the byte sequence of the collating element.  */
-		  idx += 1 + extra[idx];
-		  /* Adjust for the alignment.  */
-		  idx = (idx + 3) & ~3;
-		  /* Skip the multibyte collation sequence value.  */
-		  idx += sizeof (unsigned int);
-		  /* Skip the wide char sequence of the collating element.  */
-		  idx += sizeof (unsigned int) *
-		    (1 + *(unsigned int *) (extra + idx));
-		  /* Return the collation sequence value.  */
-		  return *(unsigned int *) (extra + idx);
-		}
-	      else if (sym_name_len == 1)
-		{
-		  /* No valid character.  Match it as a single byte
-		     character.  */
-		  return collseqmb[br_elem->opr.name[0]];
-		}
-	    }
-	  else if (sym_name_len == 1)
-	    return collseqmb[br_elem->opr.name[0]];
-	}
-      return UINT_MAX;
-    }
-
-  /* Local function for parse_bracket_exp used in _LIBC environment.
-     Build the range expression which starts from START_ELEM, and ends
-     at END_ELEM.  The result are written to MBCSET and SBCSET.
-     RANGE_ALLOC is the allocated size of mbcset->range_starts, and
-     mbcset->range_ends, is a pointer argument since we may
-     update it.  */
-
-  auto inline reg_errcode_t
-  __attribute__ ((always_inline))
-  build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
-		   bracket_elem_t *start_elem, bracket_elem_t *end_elem)
-    {
-      unsigned int ch;
-      uint32_t start_collseq;
-      uint32_t end_collseq;
-
-      /* Equivalence Classes and Character Classes can't be a range
-	 start/end.  */
-      if (__glibc_unlikely (start_elem->type == EQUIV_CLASS
-			    || start_elem->type == CHAR_CLASS
-			    || end_elem->type == EQUIV_CLASS
-			    || end_elem->type == CHAR_CLASS))
-	return REG_ERANGE;
-
-      /* FIXME: Implement rational ranges here, too.  */
-      start_collseq = lookup_collation_sequence_value (start_elem);
-      end_collseq = lookup_collation_sequence_value (end_elem);
-      /* Check start/end collation sequence values.  */
-      if (__glibc_unlikely (start_collseq == UINT_MAX
-			    || end_collseq == UINT_MAX))
-	return REG_ECOLLATE;
-      if (__glibc_unlikely ((syntax & RE_NO_EMPTY_RANGES)
-			    && start_collseq > end_collseq))
-	return REG_ERANGE;
-
-      /* Got valid collation sequence values, add them as a new entry.
-	 However, if we have no collation elements, and the character set
-	 is single byte, the single byte character set that we
-	 build below suffices. */
-      if (nrules > 0 || dfa->mb_cur_max > 1)
-	{
-	  /* Check the space of the arrays.  */
-	  if (__glibc_unlikely (*range_alloc == mbcset->nranges))
-	    {
-	      /* There is not enough space, need realloc.  */
-	      uint32_t *new_array_start;
-	      uint32_t *new_array_end;
-	      Idx new_nranges;
-
-	      /* +1 in case of mbcset->nranges is 0.  */
-	      new_nranges = 2 * mbcset->nranges + 1;
-	      new_array_start = re_realloc (mbcset->range_starts, uint32_t,
-					    new_nranges);
-	      new_array_end = re_realloc (mbcset->range_ends, uint32_t,
-					  new_nranges);
-
-	      if (__glibc_unlikely (new_array_start == NULL
-				    || new_array_end == NULL))
-		return REG_ESPACE;
-
-	      mbcset->range_starts = new_array_start;
-	      mbcset->range_ends = new_array_end;
-	      *range_alloc = new_nranges;
-	    }
-
-	  mbcset->range_starts[mbcset->nranges] = start_collseq;
-	  mbcset->range_ends[mbcset->nranges++] = end_collseq;
-	}
-
-      /* Build the table for single byte characters.  */
-      for (ch = 0; ch < SBC_MAX; ch++)
-	{
-	  uint32_t ch_collseq;
-	  /*
-	  if (MB_CUR_MAX == 1)
-	  */
-	  if (nrules == 0)
-	    ch_collseq = collseqmb[ch];
-	  else
-	    ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
-	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
-	    bitset_set (sbcset, ch);
-	}
-      return REG_NOERROR;
-    }
-
-  /* Local function for parse_bracket_exp used in _LIBC environment.
-     Build the collating element which is represented by NAME.
-     The result are written to MBCSET and SBCSET.
-     COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
-     pointer argument since we may update it.  */
-
-  auto inline reg_errcode_t
-  __attribute__ ((always_inline))
-  build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
-			  Idx *coll_sym_alloc, const unsigned char *name)
-    {
-      int32_t elem, idx;
-      size_t name_len = strlen ((const char *) name);
-      if (nrules != 0)
-	{
-	  elem = seek_collating_symbol_entry (name, name_len);
-	  if (elem != -1)
-	    {
-	      /* We found the entry.  */
-	      idx = symb_table[2 * elem + 1];
-	      /* Skip the name of collating element name.  */
-	      idx += 1 + extra[idx];
-	    }
-	  else if (name_len == 1)
-	    {
-	      /* No valid character, treat it as a normal
-		 character.  */
-	      bitset_set (sbcset, name[0]);
-	      return REG_NOERROR;
-	    }
-	  else
-	    return REG_ECOLLATE;
-
-	  /* Got valid collation sequence, add it as a new entry.  */
-	  /* Check the space of the arrays.  */
-	  if (__glibc_unlikely (*coll_sym_alloc == mbcset->ncoll_syms))
-	    {
-	      /* Not enough, realloc it.  */
-	      /* +1 in case of mbcset->ncoll_syms is 0.  */
-	      Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
-	      /* Use realloc since mbcset->coll_syms is NULL
-		 if *alloc == 0.  */
-	      int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
-						   new_coll_sym_alloc);
-	      if (__glibc_unlikely (new_coll_syms == NULL))
-		return REG_ESPACE;
-	      mbcset->coll_syms = new_coll_syms;
-	      *coll_sym_alloc = new_coll_sym_alloc;
-	    }
-	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
-	  return REG_NOERROR;
-	}
-      else
-	{
-	  if (__glibc_unlikely (name_len != 1))
-	    return REG_ECOLLATE;
-	  else
-	    {
-	      bitset_set (sbcset, name[0]);
-	      return REG_NOERROR;
-	    }
-	}
-    }
+  int32_t table_size = 0;
+  const int32_t *symb_table = NULL;
+  const unsigned char *extra = NULL;
 #endif
 
   re_token_t br_token;
@@ -3230,7 +3245,9 @@  parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 
 #ifdef _LIBC
 	  *err = build_range_exp (sbcset, mbcset, &range_alloc,
-				  &start_elem, &end_elem);
+				  &start_elem, &end_elem,
+				  dfa, syntax, nrules, collseqmb, collseqwc,
+				  table_size, symb_table, extra);
 #else
 # ifdef RE_ENABLE_I18N
 	  *err = build_range_exp (syntax, sbcset,
@@ -3283,7 +3300,8 @@  parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
 #ifdef RE_ENABLE_I18N
 					     mbcset, &coll_sym_alloc,
 #endif /* RE_ENABLE_I18N */
-					     start_elem.opr.name);
+					     start_elem.opr.name,
+					     nrules, table_size, symb_table, extra);
 	      if (__glibc_unlikely (*err != REG_NOERROR))
 		goto parse_bracket_exp_free_return;
 	      break;