libdep plugin: fix bugs in parser and improve escaping
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_binutils_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 |
success
|
Test passed
|
Commit Message
Fix bugs in the __.LIBDEP string to argument list parser in the libdep
plugin.
PR ld/31906
* libdep_plugin.c (st2vec): fix bug where null byte was not copied
on memmove, repeating the last character. Allow literal \ by escaping
it with \\. Allow escaping of quotes inside quoted strings. Fix out
of bounds errors leading to segfaults on input strings like
`-L/'a\'b'`.
Signed-off-by: Harmen Stoppels <me@harmenstoppels.nl>
---
ld/libdep_plugin.c | 63 +++++++++++++++++++---------------------------
1 file changed, 26 insertions(+), 37 deletions(-)
Comments
There's another buffer overflow bug if the input is tab-separated.
The char **res array is allocated based on number of literal ` ` spaces,
but should use `isspace`.
I have another fix that works allocation free and in linear time in the
input string, it just counts the number of arguments and packs them as
contiguous C-strings. On the call site it advances with
`arg = strchr(arg, '\0') + 1`.
I would also like feedback on character escaping. The current implementation
is different from how a shell behaves. How should it behave?
On Wednesday, June 19th, 2024 at 12:03 PM, Harmen Stoppels <me@harmenstoppels.nl> wrote:
>
>
> Fix bugs in the __.LIBDEP string to argument list parser in the libdep
> plugin.
>
> PR ld/31906
> * libdep_plugin.c (st2vec): fix bug where null byte was not copied
> on memmove, repeating the last character. Allow literal \ by escaping
> it with \\. Allow escaping of quotes inside quoted strings. Fix out
> of bounds errors leading to segfaults on input strings like
> `-L/'a\\'b'`.
>
> Signed-off-by: Harmen Stoppels me@harmenstoppels.nl
>
> ---
> ld/libdep_plugin.c | 63 +++++++++++++++++++---------------------------
> 1 file changed, 26 insertions(+), 37 deletions(-)
>
> diff --git a/ld/libdep_plugin.c b/ld/libdep_plugin.c
> index 414f3cffe12..44fb7cbd588 100644
> --- a/ld/libdep_plugin.c
> +++ b/ld/libdep_plugin.c
> @@ -138,7 +138,7 @@ str2vec (char *in)
> {
> char **res;
> char *s, *first, *end;
> - char *sq, *dq;
> + char *quote;
> int i;
>
> end = in + strlen (in);
> @@ -157,55 +157,44 @@ str2vec (char *in)
> return res;
>
> i = 0;
> - sq = NULL;
> - dq = NULL;
> + quote = NULL;
> res[0] = first;
> - for (s = first; *s; s++)
> + for (s = first; *s;)
> {
> if (s == '\\')
> {
> - memmove (s, s+1, end-s-1);
> - end--;
> + / trailing backslash is not an escape character */
> + if (s + 1 == end)
> + break;
> + memmove (s, s + 1, end - s);
> + --end;
> + ++s;
> }
> - if (isspace ((unsigned char) *s))
> + else if (*s == '\'' || *s == '\"')
> {
> - if (sq || dq)
> - continue;
> - *s++ = '\0';
> - while (isspace ((unsigned char) *s)) s++;
> - if (*s)
> - res[++i] = s;
> - }
> - if (*s == '\'' && !dq)
> - {
> - if (sq)
> + if (!quote)
> + quote = s++;
> + else if (*s == *quote)
> {
> - memmove (sq, sq+1, s-sq-1);
> - memmove (s-2, s+1, end-s-1);
> + memmove (quote, quote + 1, s - quote - 1);
> + memmove (s - 1, s + 1, end - s);
> end -= 2;
> - s--;
> - sq = NULL;
> + --s;
> + quote = NULL;
> }
> else
> - {
> - sq = s;
> - }
> + ++s;
> }
> - if (*s == '"' && !sq)
> + else if (!quote && isspace ((unsigned char) *s))
> {
> - if (dq)
> - {
> - memmove (dq, dq+1, s-dq-1);
> - memmove (s-2, s+1, end-s-1);
> - end -= 2;
> - s--;
> - dq = NULL;
> - }
> - else
> - {
> - dq = s;
> - }
> + *s++ = '\0';
> + while (isspace ((unsigned char) *s))
> + s++;
> + if (*s)
> + res[++i] = s;
> }
> + else
> + ++s;
> }
> res[++i] = NULL;
> return res;
> --
> 2.40.1
@@ -138,7 +138,7 @@ str2vec (char *in)
{
char **res;
char *s, *first, *end;
- char *sq, *dq;
+ char *quote;
int i;
end = in + strlen (in);
@@ -157,55 +157,44 @@ str2vec (char *in)
return res;
i = 0;
- sq = NULL;
- dq = NULL;
+ quote = NULL;
res[0] = first;
- for (s = first; *s; s++)
+ for (s = first; *s;)
{
if (*s == '\\')
{
- memmove (s, s+1, end-s-1);
- end--;
+ /* trailing backslash is not an escape character */
+ if (s + 1 == end)
+ break;
+ memmove (s, s + 1, end - s);
+ --end;
+ ++s;
}
- if (isspace ((unsigned char) *s))
+ else if (*s == '\'' || *s == '\"')
{
- if (sq || dq)
- continue;
- *s++ = '\0';
- while (isspace ((unsigned char) *s)) s++;
- if (*s)
- res[++i] = s;
- }
- if (*s == '\'' && !dq)
- {
- if (sq)
+ if (!quote)
+ quote = s++;
+ else if (*s == *quote)
{
- memmove (sq, sq+1, s-sq-1);
- memmove (s-2, s+1, end-s-1);
+ memmove (quote, quote + 1, s - quote - 1);
+ memmove (s - 1, s + 1, end - s);
end -= 2;
- s--;
- sq = NULL;
+ --s;
+ quote = NULL;
}
else
- {
- sq = s;
- }
+ ++s;
}
- if (*s == '"' && !sq)
+ else if (!quote && isspace ((unsigned char) *s))
{
- if (dq)
- {
- memmove (dq, dq+1, s-dq-1);
- memmove (s-2, s+1, end-s-1);
- end -= 2;
- s--;
- dq = NULL;
- }
- else
- {
- dq = s;
- }
+ *s++ = '\0';
+ while (isspace ((unsigned char) *s))
+ s++;
+ if (*s)
+ res[++i] = s;
}
+ else
+ ++s;
}
res[++i] = NULL;
return res;