[1/3] posix: Remove alloca usage on regex set_regs
Commit Message
It replaces the regmatch_t with a dynarray list.
Checked on x86_64-linux-gnu.
---
posix/regexec.c | 62 ++++++++++++++++++++++++-------------------------
1 file changed, 31 insertions(+), 31 deletions(-)
Comments
On 1/6/21 10:17 AM, Adhemerval Zanella wrote:
> It replaces the regmatch_t with a dynarray list.
regexec.c is shared with Gnulib, so some work needed to be done on the
Gnulib side for this patch since Gnulib didn't have dynarray. Dynarray
is something I've been meaning to add to Gnulib for some time, so I did
that by installing the first attached patch into Gnulib. Could you
please propagate the new Gnulib dynarray sources into glibc so that they
stay in sync? As near as I can make out, the glibc dynarray files can
now be identical to the new Gnulib files; if not, please let me know.
> posix/regexec.c | 62 ++++++++++++++++++++++++-------------------------
> 1 file changed, 31 insertions(+), 31 deletions(-)
> ...
> @@ -1355,6 +1352,16 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
> return fs->stack[num].node;
> }
>
> +
> +#define DYNARRAY_STRUCT regmatch_list
> +#define DYNARRAY_ELEMENT regmatch_t
> +#define DYNARRAY_PREFIX regmatch_list_
> +#include <malloc/dynarray-skeleton.c>
> +
> +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
> + struct regmatch_list *prev_idx_match, Idx cur_node,
> + Idx cur_idx, Idx nmatch);
> +
> /* Set the positions where the subexpressions are starts/ends to registers
> PMATCH.
> Note: We assume that pmatch[0] is already set, and
> @@ -1370,8 +1377,8 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
> re_node_set eps_via_nodes;
> struct re_fail_stack_t *fs;
> struct re_fail_stack_t fs_body = { 0, 2, NULL };
> - regmatch_t *prev_idx_match;
> - bool prev_idx_match_malloced = false;
> + struct regmatch_list prev_idx_match;
> + regmatch_list_init (&prev_idx_match);
>
> DEBUG_ASSERT (nmatch > 1);
> DEBUG_ASSERT (mctx->state_log != NULL);
> @@ -1388,23 +1395,18 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
> cur_node = dfa->init_node;
> re_node_set_init_empty (&eps_via_nodes);
>
> - if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
> - prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
> - else
> + if (!regmatch_list_resize (&prev_idx_match, nmatch))
> {
> - prev_idx_match = re_malloc (regmatch_t, nmatch);
> - if (prev_idx_match == NULL)
> - {
> - free_fail_stack_return (fs);
> - return REG_ESPACE;
> - }
> - prev_idx_match_malloced = true;
> + regmatch_list_free (&prev_idx_match);
> + free_fail_stack_return (fs);
> + return REG_ESPACE;
> }
These three hunks are good, but you can omit most of the other hunks
(and improve performance a bit) by inserting the following line after
the 3rd hunk:
+ regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
since the dynarray doesn't grow after that and this means you don't need
to change the rest of the code to use prev_match rather than
prev_idx_match. The only other hunks you need to retain are the ones
replacing re_free with regmastch_list_free.
I've made this improvement to Gnulib by installing the second attached
patch, so you should be able to copy Gnulib regexec.c to glibc without
changing it.
Hi,
Since this patch has been applied, GNU Wget fails during the linking
stage due to regmatch_list_free not being available:
/usr/bin/ld: ../lib/libgnu.a(regex.o): in function `set_regs':
/home/rincewind/Programming/wget/lib/regexec.c:1444: undefined reference
to `regmatch_list_free'
/usr/bin/ld: /home/rincewind/Programming/wget/lib/regexec.c:1421:
undefined reference to `regmatch_list_free'
/usr/bin/ld: /home/rincewind/Programming/wget/lib/regexec.c:1454:
undefined reference to `regmatch_list_free'
/usr/bin/ld: /home/rincewind/Programming/wget/lib/regexec.c:1444:
undefined reference to `regmatch_list_free'
/usr/bin/ld: /home/rincewind/Programming/wget/lib/regexec.c:1430:
undefined reference to `regmatch_list_free'
/usr/bin/ld:
../lib/libgnu.a(regex.o):/home/rincewind/Programming/wget/lib/regexec.c:1460:
more undefined references to `regmatch_list_free' follow
On 06.01.21 19:17, Adhemerval Zanella wrote:
> It replaces the regmatch_t with a dynarray list.
>
> Checked on x86_64-linux-gnu.
> ---
> posix/regexec.c | 62 ++++++++++++++++++++++++-------------------------
> 1 file changed, 31 insertions(+), 31 deletions(-)
>
> diff --git a/posix/regexec.c b/posix/regexec.c
> index b083342f77..5e22f90842 100644
> --- a/posix/regexec.c
> +++ b/posix/regexec.c
> @@ -54,9 +54,6 @@ static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
> Idx *p_match_first);
> static Idx check_halt_state_context (const re_match_context_t *mctx,
> const re_dfastate_t *state, Idx idx);
> -static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
> - regmatch_t *prev_idx_match, Idx cur_node,
> - Idx cur_idx, Idx nmatch);
> static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
> Idx str_idx, Idx dest_node, Idx nregs,
> regmatch_t *regs,
> @@ -1355,6 +1352,16 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
> return fs->stack[num].node;
> }
>
> +
> +#define DYNARRAY_STRUCT regmatch_list
> +#define DYNARRAY_ELEMENT regmatch_t
> +#define DYNARRAY_PREFIX regmatch_list_
> +#include <malloc/dynarray-skeleton.c>
> +
> +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
> + struct regmatch_list *prev_idx_match, Idx cur_node,
> + Idx cur_idx, Idx nmatch);
> +
> /* Set the positions where the subexpressions are starts/ends to registers
> PMATCH.
> Note: We assume that pmatch[0] is already set, and
> @@ -1370,8 +1377,8 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
> re_node_set eps_via_nodes;
> struct re_fail_stack_t *fs;
> struct re_fail_stack_t fs_body = { 0, 2, NULL };
> - regmatch_t *prev_idx_match;
> - bool prev_idx_match_malloced = false;
> + struct regmatch_list prev_idx_match;
> + regmatch_list_init (&prev_idx_match);
>
> DEBUG_ASSERT (nmatch > 1);
> DEBUG_ASSERT (mctx->state_log != NULL);
> @@ -1388,23 +1395,18 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
> cur_node = dfa->init_node;
> re_node_set_init_empty (&eps_via_nodes);
>
> - if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
> - prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
> - else
> + if (!regmatch_list_resize (&prev_idx_match, nmatch))
> {
> - prev_idx_match = re_malloc (regmatch_t, nmatch);
> - if (prev_idx_match == NULL)
> - {
> - free_fail_stack_return (fs);
> - return REG_ESPACE;
> - }
> - prev_idx_match_malloced = true;
> + regmatch_list_free (&prev_idx_match);
> + free_fail_stack_return (fs);
> + return REG_ESPACE;
> }
> - memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
> + for (size_t i = 0; i < nmatch; i++)
> + *regmatch_list_at (&prev_idx_match, i) = pmatch[i];
>
> for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
> {
> - update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
> + update_regs (dfa, pmatch, &prev_idx_match, cur_node, idx, nmatch);
>
> if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
> {
> @@ -1417,8 +1419,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
> if (reg_idx == nmatch)
> {
> re_node_set_free (&eps_via_nodes);
> - if (prev_idx_match_malloced)
> - re_free (prev_idx_match);
> + regmatch_list_free (&prev_idx_match);
> return free_fail_stack_return (fs);
> }
> cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
> @@ -1427,8 +1428,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
> else
> {
> re_node_set_free (&eps_via_nodes);
> - if (prev_idx_match_malloced)
> - re_free (prev_idx_match);
> + regmatch_list_free (&prev_idx_match);
> return REG_NOERROR;
> }
> }
> @@ -1442,8 +1442,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
> if (__glibc_unlikely (cur_node == -2))
> {
> re_node_set_free (&eps_via_nodes);
> - if (prev_idx_match_malloced)
> - re_free (prev_idx_match);
> + regmatch_list_free (&prev_idx_match);
> free_fail_stack_return (fs);
> return REG_ESPACE;
> }
> @@ -1453,15 +1452,13 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
> else
> {
> re_node_set_free (&eps_via_nodes);
> - if (prev_idx_match_malloced)
> - re_free (prev_idx_match);
> + regmatch_list_free (&prev_idx_match);
> return REG_NOMATCH;
> }
> }
> }
> re_node_set_free (&eps_via_nodes);
> - if (prev_idx_match_malloced)
> - re_free (prev_idx_match);
> + regmatch_list_free (&prev_idx_match);
> return free_fail_stack_return (fs);
> }
>
> @@ -1483,7 +1480,8 @@ free_fail_stack_return (struct re_fail_stack_t *fs)
>
> static void
> update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
> - regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
> + struct regmatch_list *prev_idx_match, Idx cur_node, Idx cur_idx,
> + Idx nmatch)
> {
> int type = dfa->nodes[cur_node].type;
> if (type == OP_OPEN_SUBEXP)
> @@ -1508,18 +1506,20 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
> pmatch[reg_num].rm_eo = cur_idx;
> /* This is a non-empty match or we are not inside an optional
> subexpression. Accept this right away. */
> - memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
> + for (size_t i = 0; i < nmatch; i++)
> + *regmatch_list_at (prev_idx_match, i) = pmatch[i];
> }
> else
> {
> if (dfa->nodes[cur_node].opt_subexp
> - && prev_idx_match[reg_num].rm_so != -1)
> + && regmatch_list_at (prev_idx_match, reg_num)->rm_so != -1)
> /* We transited through an empty match for an optional
> subexpression, like (a?)*, and this is not the subexp's
> first match. Copy back the old content of the registers
> so that matches of an inner subexpression are undone as
> well, like in ((a?))*. */
> - memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
> + memcpy (pmatch, regmatch_list_begin (prev_idx_match),
> + sizeof (regmatch_t) * nmatch);
> else
> /* We completed a subexpression, but it may be part of
> an optional one, so do not update PREV_IDX_MATCH. */
>
On 1/8/21 5:24 PM, Darshit Shah wrote:
> /home/rincewind/Programming/wget/lib/regexec.c:1444: undefined reference
> to `regmatch_list_free'
Oof, it's because Gnulib's replacement of 'free' with 'rpl_free' caused
the function definition to be named 'regmatch_list_rpl_free'.
Thanks for reporting it. I installed the attached hack into Gnulib to
work around the problem. This should work with glibc as well.
On 08/01/2021 17:14, Paul Eggert wrote:
> On 1/6/21 10:17 AM, Adhemerval Zanella wrote:
>> It replaces the regmatch_t with a dynarray list.
>
> regexec.c is shared with Gnulib, so some work needed to be done on the Gnulib side for this patch since Gnulib didn't have dynarray. Dynarray is something I've been meaning to add to Gnulib for some time, so I did that by installing the first attached patch into Gnulib. Could you please propagate the new Gnulib dynarray sources into glibc so that they stay in sync? As near as I can make out, the glibc dynarray files can now be identical to the new Gnulib files; if not, please let me know.
I will check and sync the differences.
>
>
>> posix/regexec.c | 62 ++++++++++++++++++++++++-------------------------
>> 1 file changed, 31 insertions(+), 31 deletions(-)
>> ...
>> @@ -1355,6 +1352,16 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
>> return fs->stack[num].node;
>> }
>> +
>> +#define DYNARRAY_STRUCT regmatch_list
>> +#define DYNARRAY_ELEMENT regmatch_t
>> +#define DYNARRAY_PREFIX regmatch_list_
>> +#include <malloc/dynarray-skeleton.c>
>> +
>> +static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
>> + struct regmatch_list *prev_idx_match, Idx cur_node,
>> + Idx cur_idx, Idx nmatch);
>> +
>> /* Set the positions where the subexpressions are starts/ends to registers
>> PMATCH.
>> Note: We assume that pmatch[0] is already set, and
>> @@ -1370,8 +1377,8 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
>> re_node_set eps_via_nodes;
>> struct re_fail_stack_t *fs;
>> struct re_fail_stack_t fs_body = { 0, 2, NULL };
>> - regmatch_t *prev_idx_match;
>> - bool prev_idx_match_malloced = false;
>> + struct regmatch_list prev_idx_match;
>> + regmatch_list_init (&prev_idx_match);
>> DEBUG_ASSERT (nmatch > 1);
>> DEBUG_ASSERT (mctx->state_log != NULL);
>> @@ -1388,23 +1395,18 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
>> cur_node = dfa->init_node;
>> re_node_set_init_empty (&eps_via_nodes);
>> - if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
>> - prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
>> - else
>> + if (!regmatch_list_resize (&prev_idx_match, nmatch))
>> {
>> - prev_idx_match = re_malloc (regmatch_t, nmatch);
>> - if (prev_idx_match == NULL)
>> - {
>> - free_fail_stack_return (fs);
>> - return REG_ESPACE;
>> - }
>> - prev_idx_match_malloced = true;
>> + regmatch_list_free (&prev_idx_match);
>> + free_fail_stack_return (fs);
>> + return REG_ESPACE;
>> }
>
> These three hunks are good, but you can omit most of the other hunks (and improve performance a bit) by inserting the following line after the 3rd hunk:
>
> + regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
>
> since the dynarray doesn't grow after that and this means you don't need to change the rest of the code to use prev_match rather than prev_idx_match. The only other hunks you need to retain are the ones replacing re_free with regmastch_list_free.
>
> I've made this improvement to Gnulib by installing the second attached patch, so you should be able to copy Gnulib regexec.c to glibc without changing it.
Ok, I will check and sync with gnulib.
@@ -54,9 +54,6 @@ static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
Idx *p_match_first);
static Idx check_halt_state_context (const re_match_context_t *mctx,
const re_dfastate_t *state, Idx idx);
-static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
- regmatch_t *prev_idx_match, Idx cur_node,
- Idx cur_idx, Idx nmatch);
static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
Idx str_idx, Idx dest_node, Idx nregs,
regmatch_t *regs,
@@ -1355,6 +1352,16 @@ pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
return fs->stack[num].node;
}
+
+#define DYNARRAY_STRUCT regmatch_list
+#define DYNARRAY_ELEMENT regmatch_t
+#define DYNARRAY_PREFIX regmatch_list_
+#include <malloc/dynarray-skeleton.c>
+
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ struct regmatch_list *prev_idx_match, Idx cur_node,
+ Idx cur_idx, Idx nmatch);
+
/* Set the positions where the subexpressions are starts/ends to registers
PMATCH.
Note: We assume that pmatch[0] is already set, and
@@ -1370,8 +1377,8 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
re_node_set eps_via_nodes;
struct re_fail_stack_t *fs;
struct re_fail_stack_t fs_body = { 0, 2, NULL };
- regmatch_t *prev_idx_match;
- bool prev_idx_match_malloced = false;
+ struct regmatch_list prev_idx_match;
+ regmatch_list_init (&prev_idx_match);
DEBUG_ASSERT (nmatch > 1);
DEBUG_ASSERT (mctx->state_log != NULL);
@@ -1388,23 +1395,18 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
cur_node = dfa->init_node;
re_node_set_init_empty (&eps_via_nodes);
- if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
- prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
- else
+ if (!regmatch_list_resize (&prev_idx_match, nmatch))
{
- prev_idx_match = re_malloc (regmatch_t, nmatch);
- if (prev_idx_match == NULL)
- {
- free_fail_stack_return (fs);
- return REG_ESPACE;
- }
- prev_idx_match_malloced = true;
+ regmatch_list_free (&prev_idx_match);
+ free_fail_stack_return (fs);
+ return REG_ESPACE;
}
- memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+ for (size_t i = 0; i < nmatch; i++)
+ *regmatch_list_at (&prev_idx_match, i) = pmatch[i];
for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
{
- update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
+ update_regs (dfa, pmatch, &prev_idx_match, cur_node, idx, nmatch);
if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
{
@@ -1417,8 +1419,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
if (reg_idx == nmatch)
{
re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
+ regmatch_list_free (&prev_idx_match);
return free_fail_stack_return (fs);
}
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
@@ -1427,8 +1428,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
else
{
re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
+ regmatch_list_free (&prev_idx_match);
return REG_NOERROR;
}
}
@@ -1442,8 +1442,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
if (__glibc_unlikely (cur_node == -2))
{
re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
+ regmatch_list_free (&prev_idx_match);
free_fail_stack_return (fs);
return REG_ESPACE;
}
@@ -1453,15 +1452,13 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
else
{
re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
+ regmatch_list_free (&prev_idx_match);
return REG_NOMATCH;
}
}
}
re_node_set_free (&eps_via_nodes);
- if (prev_idx_match_malloced)
- re_free (prev_idx_match);
+ regmatch_list_free (&prev_idx_match);
return free_fail_stack_return (fs);
}
@@ -1483,7 +1480,8 @@ free_fail_stack_return (struct re_fail_stack_t *fs)
static void
update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
- regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
+ struct regmatch_list *prev_idx_match, Idx cur_node, Idx cur_idx,
+ Idx nmatch)
{
int type = dfa->nodes[cur_node].type;
if (type == OP_OPEN_SUBEXP)
@@ -1508,18 +1506,20 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
pmatch[reg_num].rm_eo = cur_idx;
/* This is a non-empty match or we are not inside an optional
subexpression. Accept this right away. */
- memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+ for (size_t i = 0; i < nmatch; i++)
+ *regmatch_list_at (prev_idx_match, i) = pmatch[i];
}
else
{
if (dfa->nodes[cur_node].opt_subexp
- && prev_idx_match[reg_num].rm_so != -1)
+ && regmatch_list_at (prev_idx_match, reg_num)->rm_so != -1)
/* We transited through an empty match for an optional
subexpression, like (a?)*, and this is not the subexp's
first match. Copy back the old content of the registers
so that matches of an inner subexpression are undone as
well, like in ((a?))*. */
- memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
+ memcpy (pmatch, regmatch_list_begin (prev_idx_match),
+ sizeof (regmatch_t) * nmatch);
else
/* We completed a subexpression, but it may be part of
an optional one, so do not update PREV_IDX_MATCH. */