From patchwork Wed May 13 17:26:57 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Ondrej Bilka X-Patchwork-Id: 6703 Received: (qmail 14991 invoked by alias); 13 May 2015 17:27:28 -0000 Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: libc-alpha-owner@sourceware.org Delivered-To: mailing list libc-alpha@sourceware.org Received: (qmail 14982 invoked by uid 89); 13 May 2015 17:27:27 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=2.1 required=5.0 tests=AWL, BAYES_99, BAYES_999, FREEMAIL_FROM, SPF_NEUTRAL autolearn=no version=3.3.2 X-HELO: popelka.ms.mff.cuni.cz Date: Wed, 13 May 2015 19:26:57 +0200 From: =?utf-8?B?T25kxZllaiBCw61sa2E=?= To: Joseph Myers Cc: libc-alpha@sourceware.org Subject: Re: [RFC v2] Improve wordexp performance. Message-ID: <20150513172657.GA21390@domone> References: <20150513124945.GA4067@domone> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: User-Agent: Mutt/1.5.20 (2009-06-14) On Wed, May 13, 2015 at 03:59:20PM +0000, Joseph Myers wrote: > On Wed, 13 May 2015, Ondřej Bílka wrote: > Anything like this using possibly signed char seems suspect; I'd expect it > to use unsigned char unless there is a clearly documented reason why > negative values aren't possible. > Sure, was bit sleepy and forgot about that. Here is v2 which intended cast to unsigned. diff --git a/posix/wordexp.c b/posix/wordexp.c index e711d43..40021c5 100644 --- a/posix/wordexp.c +++ b/posix/wordexp.c @@ -45,6 +45,37 @@ #include #include <_itoa.h> + +static int +in_charset (char _c, uint32_t *charset) +{ + unsigned char c = _c; + return charset[c / 32] & (1 << (c % 32)); +} + +static void +make_charset (char *_a, uint32_t *b) +{ + unsigned char *a = (unsigned char *) _a; + int i; + for (i = 0; i < 8; i++) + b[i] = 0; + while (*a) + { + b[*a / 32] |= 1 << (*a % 32); + a++; + } +} + +struct ifss { + char *ifs; + char *ifs_white; + uint32_t ifs_table[8]; + uint32_t ifs_white_table[8]; +}; + +struct ifss nulifs; + /* Undefine the following line for the production version. */ /* #define NDEBUG 1 */ #include @@ -63,18 +94,16 @@ extern char **__libc_argv attribute_hidden; /* Some forward declarations */ static int parse_dollars (char **word, size_t *word_length, size_t *max_length, const char *words, size_t *offset, int flags, - wordexp_t *pwordexp, const char *ifs, - const char *ifs_white, int quoted) + wordexp_t *pwordexp, struct ifss *ifsdata, int quoted) internal_function; static int parse_backtick (char **word, size_t *word_length, size_t *max_length, const char *words, size_t *offset, int flags, wordexp_t *pwordexp, - const char *ifs, const char *ifs_white) + struct ifss *ifsdata) internal_function; static int parse_dquote (char **word, size_t *word_length, size_t *max_length, const char *words, size_t *offset, int flags, - wordexp_t *pwordexp, const char *ifs, - const char *ifs_white) + wordexp_t *pwordexp, struct ifss *ifsdata) internal_function; static int eval_expr (char *expr, long int *result) internal_function; @@ -381,8 +410,7 @@ parse_tilde (char **word, size_t *word_length, size_t *max_length, static int internal_function do_parse_glob (const char *glob_word, char **word, size_t *word_length, - size_t *max_length, wordexp_t *pwordexp, const char *ifs, - const char *ifs_white) + size_t *max_length, wordexp_t *pwordexp, struct ifss *ifsdata) { int error; unsigned int match; @@ -397,7 +425,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length, return WRDE_NOSPACE; } - if (ifs && !*ifs) + if (ifsdata->ifs && !*(ifsdata->ifs)) { /* No field splitting allowed. */ assert (globbuf.gl_pathv[0] != NULL); @@ -414,7 +442,7 @@ do_parse_glob (const char *glob_word, char **word, size_t *word_length, return *word ? 0 : WRDE_NOSPACE; } - assert (ifs == NULL || *ifs != '\0'); + assert (ifsdata->ifs == NULL || *(ifsdata->ifs) != '\0'); if (*word != NULL) { free (*word); @@ -439,7 +467,7 @@ static int internal_function parse_glob (char **word, size_t *word_length, size_t *max_length, const char *words, size_t *offset, int flags, - wordexp_t *pwordexp, const char *ifs, const char *ifs_white) + wordexp_t *pwordexp, struct ifss *ifsdata) { /* We are poised just after a '*', a '[' or a '?'. */ int error = WRDE_NOSPACE; @@ -452,7 +480,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length, glob_list.we_offs = 0; for (; words[*offset] != '\0'; ++*offset) { - if (strchr (ifs, words[*offset]) != NULL) + if (in_charset (words[*offset], ifsdata->ifs_table)) /* Reached IFS */ break; @@ -488,7 +516,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length, if (quoted != 1 && words[*offset] == '$') { error = parse_dollars (word, word_length, max_length, words, - offset, flags, &glob_list, ifs, ifs_white, + offset, flags, &glob_list, ifsdata, quoted == 2); if (error) goto tidy_up; @@ -523,7 +551,7 @@ parse_glob (char **word, size_t *word_length, size_t *max_length, *word = w_newword (word_length, max_length); for (i = 0; error == 0 && i < glob_list.we_wordc; i++) error = do_parse_glob (glob_list.we_wordv[i], word, word_length, - max_length, pwordexp, ifs, ifs_white); + max_length, pwordexp, ifsdata); /* Now tidy up */ tidy_up: @@ -685,7 +713,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length, { case '$': error = parse_dollars (&expr, &expr_length, &expr_maxlen, - words, offset, flags, NULL, NULL, NULL, 1); + words, offset, flags, NULL, &nulifs, 1); /* The ``1'' here is to tell parse_dollars not to * split the fields. */ @@ -699,7 +727,7 @@ parse_arith (char **word, size_t *word_length, size_t *max_length, case '`': (*offset)++; error = parse_backtick (&expr, &expr_length, &expr_maxlen, - words, offset, flags, NULL, NULL, NULL); + words, offset, flags, NULL, &nulifs); /* The first NULL here is to tell parse_backtick not to * split the fields. */ @@ -884,8 +912,7 @@ exec_comm_child (char *comm, int *fildes, int showerr, int noexec) static int internal_function exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length, - int flags, wordexp_t *pwordexp, const char *ifs, - const char *ifs_white) + int flags, wordexp_t *pwordexp, struct ifss *ifsdata) { int fildes[2]; #define bufsize 128 @@ -1010,10 +1037,10 @@ exec_comm (char *comm, char **word, size_t *word_length, size_t *max_length, for (i = 0; i < buflen; ++i) { - if (strchr (ifs, buffer[i]) != NULL) + if (in_charset (buffer[i], ifsdata->ifs_table)) { /* Current character is IFS */ - if (strchr (ifs_white, buffer[i]) == NULL) + if (!in_charset (buffer[i], ifsdata->ifs_white_table)) { /* Current character is IFS but not whitespace */ if (copying == 2) @@ -1142,7 +1169,7 @@ static int internal_function parse_comm (char **word, size_t *word_length, size_t *max_length, const char *words, size_t *offset, int flags, wordexp_t *pwordexp, - const char *ifs, const char *ifs_white) + struct ifss *ifsdata) { /* We are poised just after "$(" */ int paren_depth = 1; @@ -1191,7 +1218,7 @@ parse_comm (char **word, size_t *word_length, size_t *max_length, #endif error = exec_comm (comm, word, word_length, max_length, - flags, pwordexp, ifs, ifs_white); + flags, pwordexp, ifsdata); #ifdef __libc_ptf_call __libc_ptf_call (pthread_setcancelstate, (state, NULL), 0); @@ -1222,14 +1249,12 @@ parse_comm (char **word, size_t *word_length, size_t *max_length, return WRDE_SYNTAX; } -#define CHAR_IN_SET(ch, char_set) \ - (memchr (char_set "", ch, sizeof (char_set) - 1) != NULL) static int internal_function parse_param (char **word, size_t *word_length, size_t *max_length, const char *words, size_t *offset, int flags, wordexp_t *pwordexp, - const char *ifs, const char *ifs_white, int quoted) + struct ifss *ifsdata, int quoted) { /* We are poised just after "$" */ enum action @@ -1269,6 +1294,8 @@ parse_param (char **word, size_t *word_length, size_t *max_length, if (brace) ++*offset; + uint32_t set1[8] = {0x0, 0x410, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0}; /* *@$ */ + /* First collect the parameter name. */ if (words[*offset] == '#') @@ -1306,7 +1333,7 @@ parse_param (char **word, size_t *word_length, size_t *max_length, } while (isdigit(words[++*offset])); } - else if (CHAR_IN_SET (words[*offset], "*@$")) + else if (in_charset (words[*offset], set1)) { /* Special parameter. */ special = 1; @@ -1349,8 +1376,9 @@ parse_param (char **word, size_t *word_length, size_t *max_length, } break; - case ':': - if (!CHAR_IN_SET (words[1 + *offset], "-=?+")) + case ':':; + uint32_t set2[8] = {0x0, 0xa0002800, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* -=?+ */ + if (!in_charset (words[1 + *offset], set2)) goto syntax; colon_seen = 1; @@ -1648,7 +1676,7 @@ envsubst: case '$': offset = 0; error = parse_dollars (&expanded, &exp_len, &exp_maxl, p, - &offset, flags, NULL, NULL, NULL, 1); + &offset, flags, NULL, &nulifs, 1); if (error) { if (free_value) @@ -1991,22 +2019,22 @@ envsubst: } /* Skip IFS whitespace before the field */ - field_begin += strspn (field_begin, ifs_white); + field_begin += strspn (field_begin, ifsdata->ifs_white); if (!seen_nonws_ifs && *field_begin == 0) /* Nothing but whitespace */ break; /* Search for the end of the field */ - field_end = field_begin + strcspn (field_begin, ifs); + field_end = field_begin + strcspn (field_begin, ifsdata->ifs); /* Set up pointer to the character after end of field and skip whitespace IFS after it. */ - next_field = field_end + strspn (field_end, ifs_white); + next_field = field_end + strspn (field_end, ifsdata->ifs_white); /* Skip at most one non-whitespace IFS character after the field */ seen_nonws_ifs = 0; - if (*next_field && strchr (ifs, *next_field)) + if (*next_field && in_charset (*next_field, ifsdata->ifs_table)) { seen_nonws_ifs = 1; next_field++; @@ -2052,13 +2080,12 @@ do_error: return error; } -#undef CHAR_IN_SET static int internal_function parse_dollars (char **word, size_t *word_length, size_t *max_length, const char *words, size_t *offset, int flags, - wordexp_t *pwordexp, const char *ifs, const char *ifs_white, + wordexp_t *pwordexp, struct ifss *ifsdata, int quoted) { /* We are poised _at_ "$" */ @@ -2097,7 +2124,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length, (*offset) += 2; return parse_comm (word, word_length, max_length, words, offset, flags, - quoted? NULL : pwordexp, ifs, ifs_white); + quoted? NULL : pwordexp, ifsdata); case '[': (*offset) += 2; @@ -2109,7 +2136,7 @@ parse_dollars (char **word, size_t *word_length, size_t *max_length, default: ++(*offset); /* parse_param needs to know if "{" is there */ return parse_param (word, word_length, max_length, words, offset, flags, - pwordexp, ifs, ifs_white, quoted); + pwordexp, ifsdata, quoted); } } @@ -2117,7 +2144,7 @@ static int internal_function parse_backtick (char **word, size_t *word_length, size_t *max_length, const char *words, size_t *offset, int flags, - wordexp_t *pwordexp, const char *ifs, const char *ifs_white) + wordexp_t *pwordexp, struct ifss *ifsdata) { /* We are poised just after "`" */ int error; @@ -2133,7 +2160,7 @@ parse_backtick (char **word, size_t *word_length, size_t *max_length, case '`': /* Go -- give the script to the shell */ error = exec_comm (comm, word, word_length, max_length, flags, - pwordexp, ifs, ifs_white); + pwordexp, ifsdata); free (comm); return error; @@ -2181,7 +2208,7 @@ static int internal_function parse_dquote (char **word, size_t *word_length, size_t *max_length, const char *words, size_t *offset, int flags, - wordexp_t *pwordexp, const char * ifs, const char * ifs_white) + wordexp_t *pwordexp, struct ifss *ifsdata) { /* We are poised just after a double-quote */ int error; @@ -2195,7 +2222,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length, case '$': error = parse_dollars (word, word_length, max_length, words, offset, - flags, pwordexp, ifs, ifs_white, 1); + flags, pwordexp, ifsdata, 1); /* The ``1'' here is to tell parse_dollars not to * split the fields. It may need to, however ("$@"). */ @@ -2207,7 +2234,7 @@ parse_dquote (char **word, size_t *word_length, size_t *max_length, case '`': ++(*offset); error = parse_backtick (word, word_length, max_length, words, - offset, flags, NULL, NULL, NULL); + offset, flags, NULL, &nulifs); /* The first NULL here is to tell parse_backtick not to * split the fields. */ @@ -2340,6 +2367,13 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags) *whch = '\0'; } + struct ifss ifsdata; + ifsdata.ifs = ifs; + ifsdata.ifs_white = ifs_white; + make_charset (ifs, ifsdata.ifs_table); + make_charset (ifs_white, ifsdata.ifs_white_table); + + for (words_offset = 0 ; words[words_offset] ; ++words_offset) switch (words[words_offset]) { @@ -2354,7 +2388,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags) case '$': error = parse_dollars (&word, &word_length, &max_length, words, - &words_offset, flags, pwordexp, ifs, ifs_white, + &words_offset, flags, pwordexp, &ifsdata, 0); if (error) @@ -2365,8 +2399,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags) case '`': ++words_offset; error = parse_backtick (&word, &word_length, &max_length, words, - &words_offset, flags, pwordexp, ifs, - ifs_white); + &words_offset, flags, pwordexp, &ifsdata); if (error) goto do_error; @@ -2376,7 +2409,7 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags) case '"': ++words_offset; error = parse_dquote (&word, &word_length, &max_length, words, - &words_offset, flags, pwordexp, ifs, ifs_white); + &words_offset, flags, pwordexp, &ifsdata); if (error) goto do_error; @@ -2422,21 +2455,24 @@ wordexp (const char *words, wordexp_t *pwordexp, int flags) case '[': case '?': error = parse_glob (&word, &word_length, &max_length, words, - &words_offset, flags, pwordexp, ifs, ifs_white); + &words_offset, flags, pwordexp, &ifsdata); if (error) goto do_error; break; - default: + default:; /* Is it a word separator? */ - if (strchr (" \t", words[words_offset]) == NULL) - { - char ch = words[words_offset]; + char ch = words[words_offset]; + uint32_t set3[8] = {0x200, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; /* \t */ + if (!in_charset (ch, set3)) + { /* Not a word separator -- but is it a valid word char? */ - if (strchr ("\n|&;<>(){}", ch)) + uint32_t set4[8] = {0x400, 0x58000340, 0x0, 0x38000000, 0x0, 0x0, + 0x0, 0x0}; /* \n|&;<>(){} */ + if (in_charset (ch, set4)) { /* Fail */ error = WRDE_BADCHAR;