[1/6] gdb: improve escaping when completing filenames

Message ID 0543026b57b831d545d99e325d6e6c3ed9c4c968.1711712401.git.aburgess@redhat.com
State New
Headers
Series Further filename completion improvements |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Testing passed

Commit Message

Andrew Burgess March 29, 2024, 11:42 a.m. UTC
  This commit is a step towards improving filename quoting when
completing filenames.

I've struggled a bit trying to split this series into chunks.  There's
a lot of dependencies between different parts of the completion
system, and trying to get this working correctly is pretty messy.

This first step is really about implementing 3 readline hooks:

  rl_char_is_quoted_p
    - Is a particular character quoted within readline's input buffer?
  rl_filename_dequoting_function
    - Remove quoting characters from a filename.
  rl_filename_quoting_function
    - Add quoting characters to a filename.

See 'info readline' for more details.

There's still a bunch of stuff that doesn't work after this commit,
mostly around the 'complete' command which of course doesn't go
through readline, so doesn't benefit from all of these new functions
yet, I'll add some of this in a later commit.

Tab completion is now slightly improved though, it is possible to
tab-complete a filename that includes a double or single quote, either
in an unquoted string or within a string surrounded by single or
double quotes, backslash escaping is used when necessary.

There are some additional tests to cover the new functionality.
---
 gdb/completer.c                               | 163 +++++++++++++++++-
 .../gdb.base/filename-completion.exp          |  34 ++++
 2 files changed, 194 insertions(+), 3 deletions(-)
  

Comments

Lancelot SIX March 30, 2024, 11:48 p.m. UTC | #1
Hi Andrew,

Thanks for working on that!

I have a couple of really minor comments below.

On Fri, Mar 29, 2024 at 11:42:27AM +0000, Andrew Burgess wrote:
> --- a/gdb/completer.c
> +++ b/gdb/completer.c
> @@ -204,6 +204,153 @@ noop_completer (struct cmd_list_element *ignore,
>  {
>  }
>  
> +/* Return 1 if the character at EINDEX in STRING is quoted (there is an
> +   unclosed quoted string), or if the character at EINDEX is quoted by a
> +   backslash.  */
> +
> +static int
> +gdb_completer_file_name_char_is_quoted (char *string, int eindex)
> +{
> +  for (int i = 0; i <= eindex && string[i] != '\0'; )
> +    {
> +      char c = string[i];
> +
> +      if (c == '\\')
> +	{
> +	  /* The backslash itself is not quoted.  */
> +	  if (i >= eindex)
> +	    return 0;
> +	  ++i;
> +	  /* But the next character is.  */
> +	  if (i >= eindex)
> +	    return 1;
> +	  if (string[i] == '\0')
> +	    return 0;
> +	  ++i;
> +	  continue;
> +	}
> +      else if (strchr (rl_completer_quote_characters, c) != nullptr)
> +	{
> +	  /* This assumes that extract_string_maybe_quoted can handle a
> +	     string quoted with character C.  Currently this is true as the
> +	     only characters we put in rl_completer_quote_characters are
> +	     single and/or double quotes, both of which
> +	     extract_string_maybe_quoted can handle.  */

Maybe it is worth asserting that assumption

    gdb_assert (c == '"' || c == '\'');

> +	  const char *tmp = &string[i];
> +	  (void) extract_string_maybe_quoted (&tmp);
> +	  i = tmp - string;
> +
> +	  /* Consider any character within the string we just skipped over
> +	     as quoted, though this might not be completely correct; the
> +	     opening and closing quotes are not themselves quoted.  But so
> +	     far this doesn't seem to have caused any issues.  */
> +	  if (i >= eindex)
> +	    return 1;
> +	}
> +      else
> +	++i;
> +    }
> +
> +  return 0;
> +}
> +
> +/* Removing character escaping from FILENAME.  QUOTE_CHAR is the quote
> +   character around FILENAME or the null-character if there is no quoting
> +   around FILENAME.  */
> +
> +static char *
> +gdb_completer_file_name_dequote (char *filename, int quote_char)
> +{
> +  std::string tmp;
> +
> +  if (quote_char == '\'')
> +    {
> +      /* There is no backslash escaping within a single quoted string.  In
> +	 this case we can just return the input string.  */
> +      tmp = filename;
> +    }
> +  else if (quote_char == '"')
> +    {
> +      /* Remove escaping from a double quoted string.  */
> +      for (const char *input = filename;
> +	   *input != '\0';
> +	   ++input)
> +	{
> +	  if (input[0] == '\\'
> +	      && input[1] != '\0'
> +	      && strchr ("\"\\", input[1]) != nullptr)
> +	    ++input;
> +	  tmp += *input;
> +	}
> +    }
> +  else
> +    {

It might be overkill, but I would probably add "gdb_assert (quote_char ==
'\0')".

> +      /* Remove escaping from an unquoted string.  */
> +      for (const char *input = filename;
> +	   *input != '\0';
> +	   ++input)
> +	{
> +	  /* We allow anything to be escaped in an unquoted string.  */
> +	  if (*input == '\\')
> +	    {
> +	      ++input;
> +	      if (*input == '\0')
> +		break;
> +	    }
> +
> +	  tmp += *input;
> +	}
> +    }
> +
> +  return strdup (tmp.c_str ());
> +}
> +
> +/* Apply character escaping to the file name in TEXT.  QUOTE_PTR points to
> +   the quote character surrounding TEXT, or points to the null-character if
> +   there are no quotes around TEXT.  MATCH_TYPE will be one of the readline
> +   constants SINGLE_MATCH or MULTI_MATCH depending on if there is one or
> +   many completions.  */
> +
> +static char *
> +gdb_completer_file_name_quote (char *text, int match_type, char *quote_ptr)

The match_type argument is never used, maybe mark it with
[[maybe_unused]]?

> +{
> +  std::string str;
> +
> +  if (*quote_ptr == '\'')
> +    {
> +      /* There is no backslash escaping permitted within a single quoted
> +	 string, so in this case we can just return the input sting.  */
> +      str = text;
> +    }
> +  else if (*quote_ptr == '"')
> +    {
> +      /* Add escaping for a double quoted filename.  */
> +      for (const char *input = text;
> +	   *input != '\0';
> +	   ++input)
> +	{
> +	  if (strchr ("\"\\", *input) != nullptr)
> +	    str += '\\';
> +	  str += *input;
> +	}
> +    }
> +  else
> +    {

Similarly, I’d add a "gdb_assert (*quote_ptr == '\0')".

> +      /* Add escaping for an unquoted filename.  */
> +      for (const char *input = text;
> +	   *input != '\0';
> +	   ++input)
> +	{
> +	  if (strchr (" \t\n\\\"'", *input)
> +	      != nullptr)
> +	    str += '\\';
> +	  str += *input;
> +	}
> +    }
> +
> +  return strdup (str.c_str ());
> +}
> +

Best,
Lancelot.
  

Patch

diff --git a/gdb/completer.c b/gdb/completer.c
index 8e34e30f46b..4cda5f3a383 100644
--- a/gdb/completer.c
+++ b/gdb/completer.c
@@ -204,6 +204,153 @@  noop_completer (struct cmd_list_element *ignore,
 {
 }
 
+/* Return 1 if the character at EINDEX in STRING is quoted (there is an
+   unclosed quoted string), or if the character at EINDEX is quoted by a
+   backslash.  */
+
+static int
+gdb_completer_file_name_char_is_quoted (char *string, int eindex)
+{
+  for (int i = 0; i <= eindex && string[i] != '\0'; )
+    {
+      char c = string[i];
+
+      if (c == '\\')
+	{
+	  /* The backslash itself is not quoted.  */
+	  if (i >= eindex)
+	    return 0;
+	  ++i;
+	  /* But the next character is.  */
+	  if (i >= eindex)
+	    return 1;
+	  if (string[i] == '\0')
+	    return 0;
+	  ++i;
+	  continue;
+	}
+      else if (strchr (rl_completer_quote_characters, c) != nullptr)
+	{
+	  /* This assumes that extract_string_maybe_quoted can handle a
+	     string quoted with character C.  Currently this is true as the
+	     only characters we put in rl_completer_quote_characters are
+	     single and/or double quotes, both of which
+	     extract_string_maybe_quoted can handle.  */
+	  const char *tmp = &string[i];
+	  (void) extract_string_maybe_quoted (&tmp);
+	  i = tmp - string;
+
+	  /* Consider any character within the string we just skipped over
+	     as quoted, though this might not be completely correct; the
+	     opening and closing quotes are not themselves quoted.  But so
+	     far this doesn't seem to have caused any issues.  */
+	  if (i >= eindex)
+	    return 1;
+	}
+      else
+	++i;
+    }
+
+  return 0;
+}
+
+/* Removing character escaping from FILENAME.  QUOTE_CHAR is the quote
+   character around FILENAME or the null-character if there is no quoting
+   around FILENAME.  */
+
+static char *
+gdb_completer_file_name_dequote (char *filename, int quote_char)
+{
+  std::string tmp;
+
+  if (quote_char == '\'')
+    {
+      /* There is no backslash escaping within a single quoted string.  In
+	 this case we can just return the input string.  */
+      tmp = filename;
+    }
+  else if (quote_char == '"')
+    {
+      /* Remove escaping from a double quoted string.  */
+      for (const char *input = filename;
+	   *input != '\0';
+	   ++input)
+	{
+	  if (input[0] == '\\'
+	      && input[1] != '\0'
+	      && strchr ("\"\\", input[1]) != nullptr)
+	    ++input;
+	  tmp += *input;
+	}
+    }
+  else
+    {
+      /* Remove escaping from an unquoted string.  */
+      for (const char *input = filename;
+	   *input != '\0';
+	   ++input)
+	{
+	  /* We allow anything to be escaped in an unquoted string.  */
+	  if (*input == '\\')
+	    {
+	      ++input;
+	      if (*input == '\0')
+		break;
+	    }
+
+	  tmp += *input;
+	}
+    }
+
+  return strdup (tmp.c_str ());
+}
+
+/* Apply character escaping to the file name in TEXT.  QUOTE_PTR points to
+   the quote character surrounding TEXT, or points to the null-character if
+   there are no quotes around TEXT.  MATCH_TYPE will be one of the readline
+   constants SINGLE_MATCH or MULTI_MATCH depending on if there is one or
+   many completions.  */
+
+static char *
+gdb_completer_file_name_quote (char *text, int match_type, char *quote_ptr)
+{
+  std::string str;
+
+  if (*quote_ptr == '\'')
+    {
+      /* There is no backslash escaping permitted within a single quoted
+	 string, so in this case we can just return the input sting.  */
+      str = text;
+    }
+  else if (*quote_ptr == '"')
+    {
+      /* Add escaping for a double quoted filename.  */
+      for (const char *input = text;
+	   *input != '\0';
+	   ++input)
+	{
+	  if (strchr ("\"\\", *input) != nullptr)
+	    str += '\\';
+	  str += *input;
+	}
+    }
+  else
+    {
+      /* Add escaping for an unquoted filename.  */
+      for (const char *input = text;
+	   *input != '\0';
+	   ++input)
+	{
+	  if (strchr (" \t\n\\\"'", *input)
+	      != nullptr)
+	    str += '\\';
+	  str += *input;
+	}
+    }
+
+  return strdup (str.c_str ());
+}
+
 /* Complete on filenames.  */
 
 void
@@ -211,6 +358,7 @@  filename_completer (struct cmd_list_element *ignore,
 		    completion_tracker &tracker,
 		    const char *text, const char *word)
 {
+  rl_char_is_quoted_p = gdb_completer_file_name_char_is_quoted;
   rl_completer_quote_characters = gdb_completer_file_name_quote_characters;
 
   int subsequent_name = 0;
@@ -262,6 +410,7 @@  filename_completer_handle_brkchars (struct cmd_list_element *ignore,
     (gdb_completer_file_name_break_characters);
 
   rl_completer_quote_characters = gdb_completer_file_name_quote_characters;
+  rl_char_is_quoted_p = gdb_completer_file_name_char_is_quoted;
 }
 
 /* Find the bounds of the current word for completion purposes, and
@@ -1261,6 +1410,7 @@  complete_line_internal_1 (completion_tracker &tracker,
      completing file names then we can switch to the file name quote
      character set (i.e., both single- and double-quotes).  */
   rl_completer_quote_characters = gdb_completer_expression_quote_characters;
+  rl_char_is_quoted_p = nullptr;
 
   /* Decide whether to complete on a list of gdb commands or on
      symbols.  */
@@ -2153,9 +2303,11 @@  completion_tracker::build_completion_result (const char *text,
   /* Build replacement word, based on the LCD.  */
 
   recompute_lowest_common_denominator ();
-  match_list[0]
-    = expand_preserving_ws (text, end - start,
-			    m_lowest_common_denominator);
+  if (rl_filename_completion_desired)
+    match_list[0] = xstrdup (m_lowest_common_denominator);
+  else
+    match_list[0]
+      = expand_preserving_ws (text, end - start, m_lowest_common_denominator);
 
   if (m_lowest_common_denominator_unique)
     {
@@ -3018,6 +3170,11 @@  _initialize_completer ()
   rl_attempted_completion_function = gdb_rl_attempted_completion_function;
   set_rl_completer_word_break_characters (default_word_break_characters ());
 
+  /* Setup readline globals relating to filename completion.  */
+  rl_filename_quote_characters = " \t\n\\\"'";
+  rl_filename_dequoting_function = gdb_completer_file_name_dequote;
+  rl_filename_quoting_function = gdb_completer_file_name_quote;
+
   add_setshow_zuinteger_unlimited_cmd ("max-completions", no_class,
 				       &max_completions, _("\
 Set maximum number of completion candidates."), _("\
diff --git a/gdb/testsuite/gdb.base/filename-completion.exp b/gdb/testsuite/gdb.base/filename-completion.exp
index b700977cec5..f23e8671f40 100644
--- a/gdb/testsuite/gdb.base/filename-completion.exp
+++ b/gdb/testsuite/gdb.base/filename-completion.exp
@@ -40,6 +40,9 @@  proc setup_directory_tree {} {
     remote_exec host "touch \"${root}/aaa/aa bb\""
     remote_exec host "touch \"${root}/aaa/aa cc\""
 
+    remote_exec host "touch \"${root}/bb1/aa\\\"bb\""
+    remote_exec host "touch \"${root}/bb1/aa'bb\""
+
     return $root
 }
 
@@ -89,6 +92,37 @@  proc run_tests { root } {
 		    "aa cc"
 		} "" "${qc}" false \
 		"expand filenames containing spaces"
+
+	    test_gdb_complete_multiple "file ${qc}${root}/bb1/" \
+		"a" "a" {
+		    "aa\"bb"
+		    "aa'bb"
+		} "" "${qc}" false \
+		"expand filenames containing quotes"
+	} else {
+	    set sp "\\ "
+
+	    test_gdb_complete_tab_multiple "file ${qc}${root}/aaa/a" \
+		"a${sp}" {
+		    "aa bb"
+		    "aa cc"
+		} false \
+		"expand filenames containing spaces"
+
+	    test_gdb_complete_tab_multiple "file ${qc}${root}/bb1/a" \
+		"a" {
+		    "aa\"bb"
+		    "aa'bb"
+		} false \
+		"expand filenames containing quotes"
+
+	    test_gdb_complete_tab_unique "file ${qc}${root}/bb1/aa\\\"" \
+		"file ${qc}${root}/bb1/aa\\\\\"bb${qc}" " " \
+	    "expand unique filename containing double quotes"
+
+	    test_gdb_complete_tab_unique "file ${qc}${root}/bb1/aa\\'" \
+		"file ${qc}${root}/bb1/aa\\\\'bb${qc}" " " \
+		"expand unique filename containing single quote"
 	}
     }
 }