[PATCHv6,01/14] gdb: split apart two different types of filename completion

Message ID 64cdc6d455664efce7ef6f5bad84b5e036158df8.1725017349.git.aburgess@redhat.com
State New
Headers
Series Further filename completion improvements |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Test passed

Commit Message

Andrew Burgess Aug. 30, 2024, 11:33 a.m. UTC
  Unfortunately we have two different types of filename completion in
GDB.

The majority of commands have what I call unquoted filename
completion, this is for commands like 'set logging file ...', 'target
core ...', and 'add-auto-load-safe-path ...'.  For these commands
everything after the command name (that is not a command option) is
treated as a single filename.  If the filename contains white space
then this does not need to be escaped, nor does the filename need to
be quoted.  In fact, the filename argument is not de-quoted, and does
not have any escaping removed, so if a user does try to add such
things, they will be treated as part of the filename.  As an example:

  (gdb) target core "/path/that contains/some white space"

Will look for a directory calls '"' (double quotes) in the local
directory.

A small number of commands do de-quote and remove escapes from
filename arguments.  These command accept what I call quoted and
escaped filenames.  Right now these are the commands that specify the
file for GDB to debug, so:

  file
  exec-file
  symbol-file
  add-symbol-file
  remove-symbol-file

As an example of this in action:

  (gdb) file "/path/that contains/some white space"

In this case GDB would load the file:

  /path/that contains/some white space

Current filename completion always assumes that filenames can be
quoted, though escaping doesn't work in completion right now.  But the
assumption that quoting is allowed is clearly wrong.

This commit splits filename completion into two.  The existing
filename_completer is retained, and is used for unquoted filenames.  A
second filename_maybe_quoted_completer is added which can be used for
completing quoted filenames.

The filename completion test has been extended to cover more cases.
As part of the extended testing I need to know the character that
should be used to separate filenames within a path.  For this TCL 8.6+
has $::tcl_platform(pathSeparator).  To support older versions of TCL
I've added some code to testsuite/lib/gdb.exp.

You might notice that after this commit the completion for unquoted
files is all done in the brkchars phase, that is the function
filename_completer_handle_brkchars calculates the completions and
marks the completion_tracker as using a custom word point.  The reason
for this is that we don't want to break on white space for this
completion, but if we rely on readline to find the completion word,
readline will consider the entire command line, and with no white
space in the word break character set, readline will end up using the
entire command line as the word to complete.

For now at least, the completer for quoted filenames does generate its
completions during the completion phase, though this is going to
change in a later commit.
---
 gdb/completer.c                               |  86 +++++--
 gdb/completer.h                               |  24 +-
 gdb/exec.c                                    |   4 +-
 gdb/guile/scm-cmd.c                           |   2 +-
 gdb/python/py-cmd.c                           |   2 +-
 gdb/symfile.c                                 |   4 +-
 .../gdb.base/filename-completion.exp          | 209 ++++++++++++++----
 gdb/testsuite/lib/gdb.exp                     |  10 +
 8 files changed, 269 insertions(+), 72 deletions(-)
  

Patch

diff --git a/gdb/completer.c b/gdb/completer.c
index 1008ec23ba5..93eb8db6db0 100644
--- a/gdb/completer.c
+++ b/gdb/completer.c
@@ -182,6 +182,17 @@  static const char gdb_completer_file_name_break_characters[] =
   " \t\n*|\"';:?><";
 #endif
 
+/* When completing on file names, for commands that don't accept quoted
+   file names, the only character that can be used as a word separator is
+   the path separator.  Every other character is treated as a literal
+   character within the filename.  */
+static const char gdb_completer_path_break_characters[] =
+#ifdef HAVE_DOS_BASED_FILE_SYSTEM
+  ";";
+#else
+  ":";
+#endif
+
 /* Characters that can be used to quote expressions.  Note that we can't
    include '"' (double quote) because the gdb C parser treats such quoted
    sequences as strings.  */
@@ -203,15 +214,15 @@  noop_completer (struct cmd_list_element *ignore,
 {
 }
 
-/* Complete on filenames.  */
+/* Generate filename completions of WORD, storing the completions into
+   TRACKER.  This is used for generating completions for commands that
+   only accept unquoted filenames as well as for commands that accept
+   quoted and escaped filenames.  */
 
-void
-filename_completer (struct cmd_list_element *ignore,
-		    completion_tracker &tracker,
-		    const char *text, const char *word)
+static void
+filename_completer_generate_completions (completion_tracker &tracker,
+					 const char *word)
 {
-  rl_completer_quote_characters = gdb_completer_file_name_quote_characters;
-
   int subsequent_name = 0;
   while (1)
     {
@@ -249,13 +260,13 @@  filename_completer (struct cmd_list_element *ignore,
     }
 }
 
-/* The corresponding completer_handle_brkchars
-   implementation.  */
+/* The brkchars callback used when completing filenames that can be
+   quoted.  */
 
 static void
-filename_completer_handle_brkchars (struct cmd_list_element *ignore,
-				    completion_tracker &tracker,
-				    const char *text, const char *word)
+filename_maybe_quoted_completer_handle_brkchars
+	(struct cmd_list_element *ignore, completion_tracker &tracker,
+	 const char *text, const char *word)
 {
   set_rl_completer_word_break_characters
     (gdb_completer_file_name_break_characters);
@@ -263,6 +274,50 @@  filename_completer_handle_brkchars (struct cmd_list_element *ignore,
   rl_completer_quote_characters = gdb_completer_file_name_quote_characters;
 }
 
+/* Complete on filenames.  This is for commands that accepts possibly
+   quoted filenames.  */
+
+void
+filename_maybe_quoted_completer (struct cmd_list_element *ignore,
+				 completion_tracker &tracker,
+				 const char *text, const char *word)
+{
+  filename_maybe_quoted_completer_handle_brkchars (ignore, tracker,
+						   text, word);
+  filename_completer_generate_completions (tracker, word);
+}
+
+/* The brkchars callback used by commands that don't accept quoted
+   filenames.  */
+
+static void
+filename_completer_handle_brkchars
+	(struct cmd_list_element *ignore, completion_tracker &tracker,
+	 const char *text, const char *word)
+{
+  gdb_assert (word == nullptr);
+
+  set_rl_completer_word_break_characters (gdb_completer_path_break_characters);
+  rl_completer_quote_characters = nullptr;
+  rl_filename_quoting_desired = 0;
+
+  tracker.set_use_custom_word_point (true);
+  word = advance_to_filename_complete_word_point (tracker, text);
+  filename_completer (ignore, tracker, text, word);
+}
+
+/* See completer.h.  */
+
+void
+filename_completer
+	(struct cmd_list_element *ignore, completion_tracker &tracker,
+	 const char *text, const char *word)
+{
+  gdb_assert (tracker.use_custom_word_point ());
+  gdb_assert (word != nullptr);
+  filename_completer_generate_completions (tracker, word);
+}
+
 /* Find the bounds of the current word for completion purposes, and
    return a pointer to the end of the word.  This mimics (and is a
    modified version of) readline's _rl_find_completion_word internal
@@ -447,8 +502,8 @@  const char *
 advance_to_filename_complete_word_point (completion_tracker &tracker,
 					 const char *text)
 {
-  const char *brk_chars = gdb_completer_file_name_break_characters;
-  const char *quote_chars = gdb_completer_file_name_quote_characters;
+  const char *brk_chars = gdb_completer_path_break_characters;
+  const char *quote_chars = nullptr;
   return advance_to_completion_word (tracker, brk_chars, quote_chars, text);
 }
 
@@ -1880,6 +1935,9 @@  completer_handle_brkchars_func_for_completer (completer_ftype *fn)
   if (fn == filename_completer)
     return filename_completer_handle_brkchars;
 
+  if (fn == filename_maybe_quoted_completer)
+    return filename_maybe_quoted_completer_handle_brkchars;
+
   if (fn == location_completer)
     return location_completer_handle_brkchars;
 
diff --git a/gdb/completer.h b/gdb/completer.h
index 98a12f3907c..c6d064ec9d6 100644
--- a/gdb/completer.h
+++ b/gdb/completer.h
@@ -563,9 +563,10 @@  extern completion_result
 const char *advance_to_expression_complete_word_point
   (completion_tracker &tracker, const char *text);
 
-/* Assuming TEXT is an filename, find the completion word point for
-   TEXT, emulating the algorithm readline uses to find the word
-   point.  */
+/* Assuming TEXT is a filename, find the completion word point for TEXT,
+   emulating the algorithm readline uses to find the word point.  The
+   filenames that are located by this function assume no filename
+   quoting, this function should be paired with filename_completer.  */
 extern const char *advance_to_filename_complete_word_point
   (completion_tracker &tracker, const char *text);
 
@@ -573,10 +574,27 @@  extern void noop_completer (struct cmd_list_element *,
 			    completion_tracker &tracker,
 			    const char *, const char *);
 
+/* Filename completer for commands that don't accept quoted filenames.
+   This completer does support completing a list of filenames that are
+   separated with the path separator (':' for UNIX and ';' for MS-DOS).
+
+   When adding a new command it is better to write the command so it
+   accepts quoted filenames and use filename_maybe_quoted_completer, for
+   examples see the 'exec' and 'exec-file' commands.  */
+
 extern void filename_completer (struct cmd_list_element *,
 				completion_tracker &tracker,
 				const char *, const char *);
 
+/* Filename completer for commands where the filename argument can be
+   quoted.  This completer also supports completing a list of filenames
+   that are separated with the path separator (':' for UNIX and ';' for
+   MS-DOS).  */
+
+extern void filename_maybe_quoted_completer (struct cmd_list_element *,
+					     completion_tracker &tracker,
+					     const char *, const char *);
+
 extern void expression_completer (struct cmd_list_element *,
 				  completion_tracker &tracker,
 				  const char *, const char *);
diff --git a/gdb/exec.c b/gdb/exec.c
index 683b0a17b14..38ce4619907 100644
--- a/gdb/exec.c
+++ b/gdb/exec.c
@@ -1074,14 +1074,14 @@  and it is the program executed when you use the `run' command.\n\
 If FILE cannot be found as specified, your execution directory path\n\
 ($PATH) is searched for a command of that name.\n\
 No arg means to have no executable file and no symbols."), &cmdlist);
-  set_cmd_completer (c, filename_completer);
+  set_cmd_completer (c, filename_maybe_quoted_completer);
 
   c = add_cmd ("exec-file", class_files, exec_file_command, _("\
 Use FILE as program for getting contents of pure memory.\n\
 If FILE cannot be found as specified, your execution directory path\n\
 is searched for a command of that name.\n\
 No arg means have no executable file."), &cmdlist);
-  set_cmd_completer (c, filename_completer);
+  set_cmd_completer (c, filename_maybe_quoted_completer);
 
   add_com ("section", class_files, set_section_command, _("\
 Change the base address of section SECTION of the exec file to ADDR.\n\
diff --git a/gdb/guile/scm-cmd.c b/gdb/guile/scm-cmd.c
index 2a5507686b0..8255529a2fe 100644
--- a/gdb/guile/scm-cmd.c
+++ b/gdb/guile/scm-cmd.c
@@ -110,7 +110,7 @@  struct cmdscm_completer
 static const struct cmdscm_completer cmdscm_completers[] =
 {
   { "COMPLETE_NONE", noop_completer },
-  { "COMPLETE_FILENAME", filename_completer },
+  { "COMPLETE_FILENAME", filename_maybe_quoted_completer },
   { "COMPLETE_LOCATION", location_completer },
   { "COMPLETE_COMMAND", command_completer },
   { "COMPLETE_SYMBOL", symbol_completer },
diff --git a/gdb/python/py-cmd.c b/gdb/python/py-cmd.c
index 2ce2c90d805..e042f20fa0d 100644
--- a/gdb/python/py-cmd.c
+++ b/gdb/python/py-cmd.c
@@ -39,7 +39,7 @@  struct cmdpy_completer
 static const struct cmdpy_completer completers[] =
 {
   { "COMPLETE_NONE", noop_completer },
-  { "COMPLETE_FILENAME", filename_completer },
+  { "COMPLETE_FILENAME", filename_maybe_quoted_completer },
   { "COMPLETE_LOCATION", location_completer },
   { "COMPLETE_COMMAND", command_completer },
   { "COMPLETE_SYMBOL", symbol_completer },
diff --git a/gdb/symfile.c b/gdb/symfile.c
index cf7ab0b94e6..c3cd961c0b9 100644
--- a/gdb/symfile.c
+++ b/gdb/symfile.c
@@ -3814,7 +3814,7 @@  Usage: symbol-file [-readnow | -readnever] [-o OFF] FILE\n\
 OFF is an optional offset which is added to each section address.\n\
 The `file' command can also load symbol tables, as well as setting the file\n\
 to execute.\n" READNOW_READNEVER_HELP), &cmdlist);
-  set_cmd_completer (c, filename_completer);
+  set_cmd_completer (c, filename_maybe_quoted_completer);
 
   c = add_cmd ("add-symbol-file", class_files, add_symbol_file_command, _("\
 Load symbols from FILE, assuming FILE has been dynamically loaded.\n\
@@ -3828,7 +3828,7 @@  OFF is an optional offset which is added to the default load addresses\n\
 of all sections for which no other address was specified.\n"
 READNOW_READNEVER_HELP),
 	       &cmdlist);
-  set_cmd_completer (c, filename_completer);
+  set_cmd_completer (c, filename_maybe_quoted_completer);
 
   c = add_cmd ("remove-symbol-file", class_files,
 	       remove_symbol_file_command, _("\
diff --git a/gdb/testsuite/gdb.base/filename-completion.exp b/gdb/testsuite/gdb.base/filename-completion.exp
index b700977cec5..37629bfbf77 100644
--- a/gdb/testsuite/gdb.base/filename-completion.exp
+++ b/gdb/testsuite/gdb.base/filename-completion.exp
@@ -23,8 +23,16 @@  load_lib completion-support.exp
 #
 # root/			[ DIRECTORY ]
 #   aaa/		[ DIRECTORY ]
+#     aa bb		[ FILE ]
+#     aa cc		[ FILE ]
+#   aaa/		[ DIRECTORY ]
 #   bb1/		[ DIRECTORY ]
 #   bb2/		[ DIRECTORY ]
+#     dir 1/		[ DIRECTORY ]
+#       unique file	[ FILE ]
+#     dir 2/		[ DIRECTORY ]
+#       file 1		[ FILE ]
+#       file 2		[ FILE ]
 #   cc1/		[ DIRECTORY ]
 #   cc2			[ FILE ]
 proc setup_directory_tree {} {
@@ -36,68 +44,170 @@  proc setup_directory_tree {} {
     remote_exec host "mkdir -p ${root}/bb2"
     remote_exec host "mkdir -p ${root}/cc1"
     remote_exec host "touch ${root}/cc2"
-
     remote_exec host "touch \"${root}/aaa/aa bb\""
     remote_exec host "touch \"${root}/aaa/aa cc\""
+    remote_exec host "mkdir -p \"${root}/bb2/dir 1\""
+    remote_exec host "mkdir -p \"${root}/bb2/dir 2\""
+    remote_exec host "touch \"${root}/bb2/dir 1/unique file\""
+    remote_exec host "touch \"${root}/bb2/dir 2/file 1\""
+    remote_exec host "touch \"${root}/bb2/dir 2/file 2\""
 
     return $root
 }
 
-# Run filename completetion tests.  ROOT is the base directory as
-# returned from setup_directory_tree, though, if ROOT is a
-# sub-directory of the user's home directory ROOT might have been
-# modified to replace the $HOME prefix with a single "~" character.
-proc run_tests { root } {
-
-    # Completing 'thread apply all ...' commands uses a custom word
-    # point.  At one point we had a bug where doing this would break
-    # completion of quoted filenames that contained white space.
-    test_gdb_complete_unique "thread apply all hel" \
-	"thread apply all help" " " false \
-	"complete a 'thread apply all' command"
-
-    foreach_with_prefix qc [list "" "'" "\""] {
-	test_gdb_complete_none "file ${qc}${root}/xx" \
-	    "expand a non-existent filename"
-
-	test_gdb_complete_unique "file ${qc}${root}/a" \
-	    "file ${qc}${root}/aaa/" "" false \
-	    "expand a unique filename"
-
-	test_gdb_complete_multiple "file ${qc}${root}/" \
-	    "b" "b" {
-		"bb1/"
-		"bb2/"
-	    } "" "${qc}" false \
-	    "expand multiple directory names"
-
-	test_gdb_complete_multiple "file ${qc}${root}/" \
-	    "c" "c" {
-		"cc1/"
-		"cc2"
-	    } "" "${qc}" false \
-	    "expand mixed directory and file names"
-
-	# GDB does not currently escape word break characters
-	# (e.g. white space) correctly in unquoted filenames.
-	if { $qc ne "" } {
-	    set sp " "
-
-	    test_gdb_complete_multiple "file ${qc}${root}/aaa/" \
-		"a" "a${sp}" {
-		    "aa bb"
-		    "aa cc"
+# Run filename completetion tests for those command that accept quoting and
+# escaping of the filename argument.
+#
+# ROOT is the base directory as returned from setup_directory_tree, though,
+# if ROOT is a sub-directory of the user's home directory ROOT might have
+# been modified to replace the $HOME prefix with a single "~" character.
+proc run_quoting_and_escaping_tests { root } {
+    # Test all the commands which allow quoting of filenames, and
+    # which require whitespace to be escaped in unquoted filenames.
+    foreach_with_prefix cmd { file exec-file symbol-file add-symbol-file } {
+	gdb_start
+
+	# Completing 'thread apply all ...' commands uses a custom word
+	# point.  At one point we had a bug where doing this would break
+	# completion of quoted filenames that contained white space.
+	test_gdb_complete_unique "thread apply all hel" \
+	    "thread apply all help" " " false \
+	    "complete a 'thread apply all' command"
+
+	foreach_with_prefix qc [list "" "'" "\""] {
+	    test_gdb_complete_none "$cmd ${qc}${root}/xx" \
+		"expand a non-existent filename"
+
+	    test_gdb_complete_unique "$cmd ${qc}${root}/a" \
+		"$cmd ${qc}${root}/aaa/" "" false \
+		"expand a unique filename"
+
+	    test_gdb_complete_multiple "$cmd ${qc}${root}/" \
+		"b" "b" {
+		    "bb1/"
+		    "bb2/"
+		} "" "${qc}" false \
+		"expand multiple directory names"
+
+	    test_gdb_complete_multiple "$cmd ${qc}${root}/" \
+		"c" "c" {
+		    "cc1/"
+		    "cc2"
 		} "" "${qc}" false \
-		"expand filenames containing spaces"
+		"expand mixed directory and file names"
+
+	    # GDB does not currently escape word break characters
+	    # (e.g. white space) correctly in unquoted filenames.
+	    if { $qc ne "" } {
+		set sp " "
+
+		test_gdb_complete_multiple "$cmd ${qc}${root}/aaa/" \
+		    "a" "a${sp}" {
+			"aa bb"
+			"aa cc"
+		    } "" "${qc}" false \
+		    "expand filenames containing spaces"
+	    }
 	}
+
+	gdb_exit
     }
 }
 
-gdb_start
+# Helper for run_unquoted_tests.  ROOT is the root directory as setup
+# by setup_directory_tree.  CMD is the GDB command to test.  PREFIX is
+# a possible prefix filename to prepend to the filename being
+# completed.
+proc run_unquoted_tests_core { root cmd { prefix "" } } {
+    gdb_start
+
+    if { $prefix != "" } {
+	# Platform specific path separator (':' on UNIX, ';' on MS-DOS).
+	set pathsep $::tcl_platform(pathSeparator)
+
+	set prefix ${prefix}${pathsep}
+    }
+
+    test_gdb_complete_none "$cmd ${prefix}${root}${root}/xx" \
+	"expand a non-existent filename"
+
+    test_gdb_complete_unique "$cmd ${prefix}${root}/a" \
+	"$cmd ${prefix}${root}/aaa/" "" false \
+	"expand a unique filename"
+
+    test_gdb_complete_unique "$cmd ${prefix}${root}/bb2/dir 1/uni" \
+	"$cmd ${prefix}${root}/bb2/dir 1/unique file" " " false \
+	"expand a unique filename containing whitespace"
+
+    test_gdb_complete_multiple "$cmd ${prefix}${root}/" \
+	"b" "b" {
+	    "bb1/"
+	    "bb2/"
+	} "" "" false \
+	"expand multiple directory names"
+
+    test_gdb_complete_multiple "$cmd ${prefix}${root}/" \
+	"c" "c" {
+	    "cc1/"
+	    "cc2"
+	} "" "" false \
+	"expand mixed directory and file names"
+
+    test_gdb_complete_multiple "$cmd ${prefix}${root}/aaa/" \
+	"a" "a " {
+	    "aa bb"
+	    "aa cc"
+	} "" "" false \
+	"expand filenames containing spaces"
+
+    test_gdb_complete_multiple "$cmd ${prefix}${root}/bb2/dir 2/" \
+	"fi" "le " {
+	    "file 1"
+	    "file 2"
+	} "" "" false \
+	"expand filenames containing spaces in path"
+
+    gdb_exit
+}
+
+
+# Run filename completetion tests for a sample of commands that take an
+# unquoted, unescaped filename as an argument.  Only a sample of commands
+# are (currently) tested as there's a lot of commands that accept this style
+# of filename argument.
+#
+# ROOT is the base directory as returned from setup_directory_tree, though,
+# if ROOT is a sub-directory of the user's home directory ROOT might have
+# been modified to replace the $HOME prefix with a single "~" character.
+proc run_unquoted_tests { root } {
+    # Test all the commands which allow quoting of filenames, and
+    # which require whitespace to be escaped in unquoted filenames.
+    foreach_with_prefix cmd { "maint print c-tdesc" "set logging file" \
+				  "target core" "add-auto-load-safe-path" } {
+	run_unquoted_tests_core $root $cmd
+    }
+
+    foreach prefix [list \
+			"${root}/bb2/dir 1" \
+			"${root}/bb2/dir 1/unique file" \
+			"${root}/cc1" \
+			"${root}/cc2"] {
+
+	# Don't use the full path in the test name, just use the
+	# part after the ROOT directory.
+	set id [string range $prefix [string length ${root}] end]
+	with_test_prefix "prefix=$id" {
+	    foreach_with_prefix cmd { "add-auto-load-safe-path" "path" } {
+		run_unquoted_tests_core $root $cmd $prefix
+	    }
+	}
+    }
+}
 
 set root [setup_directory_tree]
 
-run_tests $root
+run_quoting_and_escaping_tests $root
+run_unquoted_tests $root
 
 # This test relies on using the $HOME directory.  We could make this
 # work for remote hosts, but right now, this isn't supported.
@@ -114,7 +224,8 @@  if {![is_remote host]} {
 
 	with_test_prefix "with tilde" {
 	    # And rerun the tests.
-	    run_tests $tilde_root
+	    run_quoting_and_escaping_tests $tilde_root
+	    run_unquoted_tests $tilde_root
 	}
     }
 }
diff --git a/gdb/testsuite/lib/gdb.exp b/gdb/testsuite/lib/gdb.exp
index 2d33470b0ea..136ac3d90ef 100644
--- a/gdb/testsuite/lib/gdb.exp
+++ b/gdb/testsuite/lib/gdb.exp
@@ -1702,6 +1702,16 @@  if { [tcl_version_at_least 8 6] == 0 } {
 
 	return $res
     }
+
+    # ::tcl_platform(pathSeparator) was added in 8.6.
+    switch $::tcl_platform(platform) {
+	windows {
+	    set ::tcl_platform(pathSeparator) ;
+	}
+	default {
+	    set ::tcl_platform(pathSeparator) :
+	}
+    }
 }
 
 if { [tcl_version_at_least 8 6 2] == 0 } {