libdep plugin: fix bugs in parser and improve escaping

Message ID Ldqb0QEln1fWhzH9Ln9YR5hk7syEtZwKzisxvb3CUbiVfxQPQBjHL0jt0iZRd8irUv-Y9QmFeQPUXaOUPSbqzA1sWVYB5qAO2cl-L7DwX0Y=@harmenstoppels.nl
State New
Headers
Series libdep plugin: fix bugs in parser and improve escaping |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed

Commit Message

Harmen Stoppels June 19, 2024, 10:03 a.m. UTC
  Fix bugs in the __.LIBDEP string to argument list parser in the libdep
plugin.

	PR ld/31906
	* libdep_plugin.c (st2vec): fix bug where null byte was not copied
	  on memmove, repeating the last character.  Allow literal \ by escaping
	  it with \\.  Allow escaping of quotes inside quoted strings.  Fix out
	  of bounds errors leading to segfaults on input strings like
	  `-L/'a\'b'`.

Signed-off-by: Harmen Stoppels <me@harmenstoppels.nl>
---
 ld/libdep_plugin.c | 63 +++++++++++++++++++---------------------------
 1 file changed, 26 insertions(+), 37 deletions(-)
  

Comments

Harmen Stoppels June 19, 2024, 3:07 p.m. UTC | #1
There's another buffer overflow bug if the input is tab-separated.

The char **res array is allocated based on number of literal ` ` spaces,
but should use `isspace`.

I have another fix that works allocation free and in linear time in the
input string, it just counts the number of arguments and packs them as
contiguous C-strings. On the call site it advances with
`arg = strchr(arg, '\0') + 1`.

I would also like feedback on character escaping. The current implementation
is different from how a shell behaves. How should it behave?


On Wednesday, June 19th, 2024 at 12:03 PM, Harmen Stoppels <me@harmenstoppels.nl> wrote:

> 
> 
> Fix bugs in the __.LIBDEP string to argument list parser in the libdep
> plugin.
> 
> PR ld/31906
> * libdep_plugin.c (st2vec): fix bug where null byte was not copied
> on memmove, repeating the last character. Allow literal \ by escaping
> it with \\. Allow escaping of quotes inside quoted strings. Fix out
> of bounds errors leading to segfaults on input strings like
> `-L/'a\\'b'`.
> 
> Signed-off-by: Harmen Stoppels me@harmenstoppels.nl
> 
> ---
> ld/libdep_plugin.c | 63 +++++++++++++++++++---------------------------
> 1 file changed, 26 insertions(+), 37 deletions(-)
> 
> diff --git a/ld/libdep_plugin.c b/ld/libdep_plugin.c
> index 414f3cffe12..44fb7cbd588 100644
> --- a/ld/libdep_plugin.c
> +++ b/ld/libdep_plugin.c
> @@ -138,7 +138,7 @@ str2vec (char *in)
> {
> char **res;
> char *s, *first, *end;
> - char *sq, *dq;
> + char *quote;
> int i;
> 
> end = in + strlen (in);
> @@ -157,55 +157,44 @@ str2vec (char *in)
> return res;
> 
> i = 0;
> - sq = NULL;
> - dq = NULL;
> + quote = NULL;
> res[0] = first;
> - for (s = first; *s; s++)
> + for (s = first; *s;)
> {
> if (s == '\\')
> {
> - memmove (s, s+1, end-s-1);
> - end--;
> + / trailing backslash is not an escape character */
> + if (s + 1 == end)
> + break;
> + memmove (s, s + 1, end - s);
> + --end;
> + ++s;
> }
> - if (isspace ((unsigned char) *s))
> + else if (*s == '\'' || *s == '\"')
> {
> - if (sq || dq)
> - continue;
> - *s++ = '\0';
> - while (isspace ((unsigned char) *s)) s++;
> - if (*s)
> - res[++i] = s;
> - }
> - if (*s == '\'' && !dq)
> - {
> - if (sq)
> + if (!quote)
> + quote = s++;
> + else if (*s == *quote)
> {
> - memmove (sq, sq+1, s-sq-1);
> - memmove (s-2, s+1, end-s-1);
> + memmove (quote, quote + 1, s - quote - 1);
> + memmove (s - 1, s + 1, end - s);
> end -= 2;
> - s--;
> - sq = NULL;
> + --s;
> + quote = NULL;
> }
> else
> - {
> - sq = s;
> - }
> + ++s;
> }
> - if (*s == '"' && !sq)
> + else if (!quote && isspace ((unsigned char) *s))
> {
> - if (dq)
> - {
> - memmove (dq, dq+1, s-dq-1);
> - memmove (s-2, s+1, end-s-1);
> - end -= 2;
> - s--;
> - dq = NULL;
> - }
> - else
> - {
> - dq = s;
> - }
> + *s++ = '\0';
> + while (isspace ((unsigned char) *s))
> + s++;
> + if (*s)
> + res[++i] = s;
> }
> + else
> + ++s;
> }
> res[++i] = NULL;
> return res;
> --
> 2.40.1
  

Patch

diff --git a/ld/libdep_plugin.c b/ld/libdep_plugin.c
index 414f3cffe12..44fb7cbd588 100644
--- a/ld/libdep_plugin.c
+++ b/ld/libdep_plugin.c
@@ -138,7 +138,7 @@  str2vec (char *in)
 {
   char **res;
   char *s, *first, *end;
-  char *sq, *dq;
+  char *quote;
   int i;
 
   end = in + strlen (in);
@@ -157,55 +157,44 @@  str2vec (char *in)
     return res;
 
   i = 0;
-  sq = NULL;
-  dq = NULL;
+  quote = NULL;
   res[0] = first;
-  for (s = first; *s; s++)
+  for (s = first; *s;)
     {
       if (*s == '\\')
 	{
-	  memmove (s, s+1, end-s-1);
-	  end--;
+	  /* trailing backslash is not an escape character */
+	  if (s + 1 == end)
+	    break;
+	  memmove (s, s + 1, end - s);
+	  --end;
+	  ++s;
 	}
-      if (isspace ((unsigned char) *s))
+      else if (*s == '\'' || *s == '\"')
 	{
-	  if (sq || dq)
-	    continue;
-	  *s++ = '\0';
-	  while (isspace ((unsigned char) *s)) s++;
-	  if (*s)
-	    res[++i] = s;
-	}
-      if (*s == '\'' && !dq)
-	{
-	  if (sq)
+	  if (!quote)
+	    quote = s++;
+	  else if (*s == *quote)
 	    {
-	      memmove (sq, sq+1, s-sq-1);
-	      memmove (s-2, s+1, end-s-1);
+	      memmove (quote, quote + 1, s - quote - 1);
+	      memmove (s - 1, s + 1, end - s);
 	      end -= 2;
-	      s--;
-	      sq = NULL;
+	      --s;
+	      quote = NULL;
 	    }
 	  else
-	    {
-	      sq = s;
-	    }
+	    ++s;
 	}
-      if (*s == '"' && !sq)
+      else if (!quote && isspace ((unsigned char) *s))
 	{
-	  if (dq)
-	    {
-	      memmove (dq, dq+1, s-dq-1);
-	      memmove (s-2, s+1, end-s-1);
-	      end -= 2;
-	      s--;
-	      dq = NULL;
-	    }
-	  else
-	    {
-	      dq = s;
-	    }
+	  *s++ = '\0';
+	  while (isspace ((unsigned char) *s))
+	    s++;
+	  if (*s)
+	    res[++i] = s;
 	}
+      else
+	++s;
     }
   res[++i] = NULL;
   return res;