objdump: Implement short name search

Message ID CAOscM-s9V4+7xqy2JLOAh-ABrWnMLKpQ=txZgimP5LFc-xQaFA@mail.gmail.com
State New
Headers
Series objdump: Implement short name search |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Testing passed

Commit Message

Mikołaj Piróg Feb. 28, 2024, 5:46 p.m. UTC
  This patch introduces the "--short-name-search" option to objdump. When
this option is used like so: objdump --disassemble=ns::foo
--short-name-search -C file The objdump will then output disassembled
output for functions that match the name "ns::foo". The current behaviour
of objdump in this context is to match the whole function signature, not
only the name (like so: "ns::foo(int, char const*)"). Option
"--short-name-search" works only in conjunction with demangling ("-C"). The
motivation behind this change is to ease a process of a quick disassembly
of a given function in a binary, since providing the whole function
signature isn't comfortable. I am not particularly happy with the name for
this option, but I couldn't come up with anything better. If the
functionality provided by this patch is desired in objdump, I am happy to
listen for suggestions for a better name.
  

Comments

Alan Modra Feb. 29, 2024, 2:03 a.m. UTC | #1
On Wed, Feb 28, 2024 at 06:46:18PM +0100, Mikołaj Piróg wrote:
> This patch introduces the "--short-name-search" option to objdump. When
> this option is used like so: objdump --disassemble=ns::foo
> --short-name-search -C file The objdump will then output disassembled
> output for functions that match the name "ns::foo". The current behaviour
> of objdump in this context is to match the whole function signature, not
> only the name (like so: "ns::foo(int, char const*)"). Option
> "--short-name-search" works only in conjunction with demangling ("-C"). The
> motivation behind this change is to ease a process of a quick disassembly
> of a given function in a binary, since providing the whole function
> signature isn't comfortable. I am not particularly happy with the name for
> this option, but I couldn't come up with anything better. If the
> functionality provided by this patch is desired in objdump, I am happy to
> listen for suggestions for a better name.

I like the idea.  It sounds useful.  Hmm, I wonder could you provide
this functionality without a new option.  ie. When demamgling
automatically detect that the user has specified a function name
without args.

If you do need a new option then the patch needs changes to
doc/binutils.texi, describing what the new objdump option does.

> diff --git a/binutils/objdump.c b/binutils/objdump.c
> index 7beb221cb2f..a013eee3c12 100644
> --- a/binutils/objdump.c
> +++ b/binutils/objdump.c
> @@ -138,6 +138,7 @@ static bool extended_color_output = false; /* --visualize-jumps=extended-color.
>  static int process_links = false;       /* --process-links.  */
>  static int show_all_symbols;            /* --show-all-symbols.  */
>  static bool decompressed_dumps = false; /* -Z, --decompress.  */
> +static bool short_name_search = false; /* --short-name-search */
>  
>  static enum color_selection
>    {
> @@ -272,6 +273,8 @@ usage (FILE *stream, int status)
>    -D, --disassemble-all    Display assembler contents of all sections\n"));
>    fprintf (stream, _("\
>        --disassemble=<sym>  Display assembler contents from <sym>\n"));
> +  fprintf (stream, _("\
> +      --short-name-search  When searching for <sym>, compare functions names, not signatures\n"));
>    fprintf (stream, _("\
>    -S, --source             Intermix source code with disassembly\n"));
>    fprintf (stream, _("\
> @@ -488,7 +491,8 @@ enum option_values
>  #endif
>      OPTION_SFRAME,
>      OPTION_VISUALIZE_JUMPS,
> -    OPTION_DISASSEMBLER_COLOR
> +    OPTION_DISASSEMBLER_COLOR,
> +    OPTION_SYMBOL_SHORT_NAME_SEARCH
>    };
>  
>  static struct option long_options[]=
> @@ -543,6 +547,7 @@ static struct option long_options[]=
>    {"section", required_argument, NULL, 'j'},
>    {"section-headers", no_argument, NULL, 'h'},
>    {"sframe", optional_argument, NULL, OPTION_SFRAME},
> +  {"short-name-search", no_argument, NULL, OPTION_SYMBOL_SHORT_NAME_SEARCH},
>    {"show-all-symbols", no_argument, &show_all_symbols, 1},
>    {"show-raw-insn", no_argument, &show_raw_insn, 1},
>    {"source", no_argument, NULL, 'S'},
> @@ -3905,7 +3910,11 @@ disassemble_section (bfd *abfd, asection *section, void *inf)
>  	      if (do_demangle && name[0] != '\0')
>  		{
>  		  /* Demangle the name.  */
> -		  alloc = bfd_demangle (abfd, name, demangle_flags);
> +		  if (short_name_search)
> +		    alloc = bfd_demangle (abfd, name, DMGL_NO_OPTS);
> +		  else
> +		    alloc = bfd_demangle (abfd, name, demangle_flags);
> +
>  		  if (alloc != NULL)
>  		    name = alloc;
>  		}
> @@ -3923,7 +3932,7 @@ disassemble_section (bfd *abfd, asection *section, void *inf)
>  		   (*rel_pp)->address - rel_offset < sym_offset)
>  			  ++rel_pp;
>  
> -		  if (sym->flags & BSF_FUNCTION)
> +		  if (sym->flags & BSF_FUNCTION && !(short_name_search && do_demangle))
>  		    {
>  		      if (bfd_get_flavour (abfd) == bfd_target_elf_flavour
>  			  && ((elf_symbol_type *) sym)->internal_elf_sym.st_size > 0)

I think this should be moved a little lower, so that loop_until is
function_sym, but I haven't checked fully that this won't exit the
loop prematurely.  Obviously you want to disassemble the entire
function, but also continue looking for another occurrence of the same
function name (with a different signature).

> @@ -6200,6 +6209,9 @@ main (int argc, char **argv)
>  	    dump_sframe_section_name = xstrdup (optarg);
>  	  seenflag = true;
>  	  break;
> +	case OPTION_SYMBOL_SHORT_NAME_SEARCH:
> +	  short_name_search = true;
> +	  break;
>  	case 'G':
>  	  dump_stab_section_info = true;
>  	  seenflag = true;
> @@ -6287,6 +6299,5 @@ main (int argc, char **argv)
>    free (dump_ctf_section_name);
>    free (dump_ctf_parent_name);
>    free ((void *) source_comment);
> -
>    return exit_status;
>  }
  
Mikołaj Piróg Feb. 29, 2024, 9:29 p.m. UTC | #2
I agree that providing this functionality without a new option is a much
better solution for the user.

I have discovered that my approach in this patch is too simplistic: it does
not work with templated functions. Demangling templated functions with
DMGL_NO_PARAMS will result in name still containing the template
qualification - and it will not match. "std::vector<char,
std::allocator>::emplace_back<char>(params)" will be transferred to
"std::vector<char, std::allocator>::emplace_back<char>". Besides,
demangling an argumentless function (like "main()") with "DMGL_NO_PARAMS",
results in NULL, which in the end works because in this case the function
fallbacks to pure sym.

If this functionality was to work with such cases, each symbol will have to
be demangled to contain only its name, so the vector function mentioned
earlier should become "std::vector::emplace_back", which is natural for the
users to type as <sym> when searching for it. I am unsure how to implement
that (demangling the function to contain only the qualified name): I looked
at bfd.c and cp-demangle.c, but I haven't found a function to do that. It
could be done by iterating over "demangle_component" struct, like
"d_print_comp_inner" does, picking only necessary parts of the name, but it
would have to be done carefully not to miss a part of the name. If someone
experienced with demangling could provide some directions as to how to
accomplish that, I would be grateful.

Assuming the problem above has been solved, making it all work without a
new switch should be easy, assuming demangled symbol names cannot ever
contain '(' sign (I think this is sane assumption).  If that is true, then
if sym contains '(', the user supplied a function signature. Otherwise,
only the function name was supplied, and objdump can act accordingly.

czw., 29 lut 2024 o 03:03 Alan Modra <amodra@gmail.com> napisał(a):

> On Wed, Feb 28, 2024 at 06:46:18PM +0100, Mikołaj Piróg wrote:
> > This patch introduces the "--short-name-search" option to objdump. When
> > this option is used like so: objdump --disassemble=ns::foo
> > --short-name-search -C file The objdump will then output disassembled
> > output for functions that match the name "ns::foo". The current behaviour
> > of objdump in this context is to match the whole function signature, not
> > only the name (like so: "ns::foo(int, char const*)"). Option
> > "--short-name-search" works only in conjunction with demangling ("-C").
> The
> > motivation behind this change is to ease a process of a quick disassembly
> > of a given function in a binary, since providing the whole function
> > signature isn't comfortable. I am not particularly happy with the name
> for
> > this option, but I couldn't come up with anything better. If the
> > functionality provided by this patch is desired in objdump, I am happy to
> > listen for suggestions for a better name.
>
> I like the idea.  It sounds useful.  Hmm, I wonder could you provide
> this functionality without a new option.  ie. When demamgling
> automatically detect that the user has specified a function name
> without args.
>
> If you do need a new option then the patch needs changes to
> doc/binutils.texi, describing what the new objdump option does.
>
> > diff --git a/binutils/objdump.c b/binutils/objdump.c
> > index 7beb221cb2f..a013eee3c12 100644
> > --- a/binutils/objdump.c
> > +++ b/binutils/objdump.c
> > @@ -138,6 +138,7 @@ static bool extended_color_output = false; /*
> --visualize-jumps=extended-color.
> >  static int process_links = false;       /* --process-links.  */
> >  static int show_all_symbols;            /* --show-all-symbols.  */
> >  static bool decompressed_dumps = false; /* -Z, --decompress.  */
> > +static bool short_name_search = false; /* --short-name-search */
> >
> >  static enum color_selection
> >    {
> > @@ -272,6 +273,8 @@ usage (FILE *stream, int status)
> >    -D, --disassemble-all    Display assembler contents of all
> sections\n"));
> >    fprintf (stream, _("\
> >        --disassemble=<sym>  Display assembler contents from <sym>\n"));
> > +  fprintf (stream, _("\
> > +      --short-name-search  When searching for <sym>, compare functions
> names, not signatures\n"));
> >    fprintf (stream, _("\
> >    -S, --source             Intermix source code with disassembly\n"));
> >    fprintf (stream, _("\
> > @@ -488,7 +491,8 @@ enum option_values
> >  #endif
> >      OPTION_SFRAME,
> >      OPTION_VISUALIZE_JUMPS,
> > -    OPTION_DISASSEMBLER_COLOR
> > +    OPTION_DISASSEMBLER_COLOR,
> > +    OPTION_SYMBOL_SHORT_NAME_SEARCH
> >    };
> >
> >  static struct option long_options[]=
> > @@ -543,6 +547,7 @@ static struct option long_options[]=
> >    {"section", required_argument, NULL, 'j'},
> >    {"section-headers", no_argument, NULL, 'h'},
> >    {"sframe", optional_argument, NULL, OPTION_SFRAME},
> > +  {"short-name-search", no_argument, NULL,
> OPTION_SYMBOL_SHORT_NAME_SEARCH},
> >    {"show-all-symbols", no_argument, &show_all_symbols, 1},
> >    {"show-raw-insn", no_argument, &show_raw_insn, 1},
> >    {"source", no_argument, NULL, 'S'},
> > @@ -3905,7 +3910,11 @@ disassemble_section (bfd *abfd, asection
> *section, void *inf)
> >             if (do_demangle && name[0] != '\0')
> >               {
> >                 /* Demangle the name.  */
> > -               alloc = bfd_demangle (abfd, name, demangle_flags);
> > +               if (short_name_search)
> > +                 alloc = bfd_demangle (abfd, name, DMGL_NO_OPTS);
> > +               else
> > +                 alloc = bfd_demangle (abfd, name, demangle_flags);
> > +
> >                 if (alloc != NULL)
> >                   name = alloc;
> >               }
> > @@ -3923,7 +3932,7 @@ disassemble_section (bfd *abfd, asection *section,
> void *inf)
> >                  (*rel_pp)->address - rel_offset < sym_offset)
> >                         ++rel_pp;
> >
> > -               if (sym->flags & BSF_FUNCTION)
> > +               if (sym->flags & BSF_FUNCTION && !(short_name_search &&
> do_demangle))
> >                   {
> >                     if (bfd_get_flavour (abfd) == bfd_target_elf_flavour
> >                         && ((elf_symbol_type *)
> sym)->internal_elf_sym.st_size > 0)
>
> I think this should be moved a little lower, so that loop_until is
> function_sym, but I haven't checked fully that this won't exit the
> loop prematurely.  Obviously you want to disassemble the entire
> function, but also continue looking for another occurrence of the same
> function name (with a different signature).
>
> > @@ -6200,6 +6209,9 @@ main (int argc, char **argv)
> >           dump_sframe_section_name = xstrdup (optarg);
> >         seenflag = true;
> >         break;
> > +     case OPTION_SYMBOL_SHORT_NAME_SEARCH:
> > +       short_name_search = true;
> > +       break;
> >       case 'G':
> >         dump_stab_section_info = true;
> >         seenflag = true;
> > @@ -6287,6 +6299,5 @@ main (int argc, char **argv)
> >    free (dump_ctf_section_name);
> >    free (dump_ctf_parent_name);
> >    free ((void *) source_comment);
> > -
> >    return exit_status;
> >  }
>
>
> --
> Alan Modra
> Australia Development Lab, IBM
>
  

Patch

diff --git a/binutils/objdump.c b/binutils/objdump.c
index 7beb221cb2f..a013eee3c12 100644
--- a/binutils/objdump.c
+++ b/binutils/objdump.c
@@ -138,6 +138,7 @@  static bool extended_color_output = false; /* --visualize-jumps=extended-color.
 static int process_links = false;       /* --process-links.  */
 static int show_all_symbols;            /* --show-all-symbols.  */
 static bool decompressed_dumps = false; /* -Z, --decompress.  */
+static bool short_name_search = false; /* --short-name-search */
 
 static enum color_selection
   {
@@ -272,6 +273,8 @@  usage (FILE *stream, int status)
   -D, --disassemble-all    Display assembler contents of all sections\n"));
   fprintf (stream, _("\
       --disassemble=<sym>  Display assembler contents from <sym>\n"));
+  fprintf (stream, _("\
+      --short-name-search  When searching for <sym>, compare functions names, not signatures\n"));
   fprintf (stream, _("\
   -S, --source             Intermix source code with disassembly\n"));
   fprintf (stream, _("\
@@ -488,7 +491,8 @@  enum option_values
 #endif
     OPTION_SFRAME,
     OPTION_VISUALIZE_JUMPS,
-    OPTION_DISASSEMBLER_COLOR
+    OPTION_DISASSEMBLER_COLOR,
+    OPTION_SYMBOL_SHORT_NAME_SEARCH
   };
 
 static struct option long_options[]=
@@ -543,6 +547,7 @@  static struct option long_options[]=
   {"section", required_argument, NULL, 'j'},
   {"section-headers", no_argument, NULL, 'h'},
   {"sframe", optional_argument, NULL, OPTION_SFRAME},
+  {"short-name-search", no_argument, NULL, OPTION_SYMBOL_SHORT_NAME_SEARCH},
   {"show-all-symbols", no_argument, &show_all_symbols, 1},
   {"show-raw-insn", no_argument, &show_raw_insn, 1},
   {"source", no_argument, NULL, 'S'},
@@ -3905,7 +3910,11 @@  disassemble_section (bfd *abfd, asection *section, void *inf)
 	      if (do_demangle && name[0] != '\0')
 		{
 		  /* Demangle the name.  */
-		  alloc = bfd_demangle (abfd, name, demangle_flags);
+		  if (short_name_search)
+		    alloc = bfd_demangle (abfd, name, DMGL_NO_OPTS);
+		  else
+		    alloc = bfd_demangle (abfd, name, demangle_flags);
+
 		  if (alloc != NULL)
 		    name = alloc;
 		}
@@ -3923,7 +3932,7 @@  disassemble_section (bfd *abfd, asection *section, void *inf)
 		   (*rel_pp)->address - rel_offset < sym_offset)
 			  ++rel_pp;
 
-		  if (sym->flags & BSF_FUNCTION)
+		  if (sym->flags & BSF_FUNCTION && !(short_name_search && do_demangle))
 		    {
 		      if (bfd_get_flavour (abfd) == bfd_target_elf_flavour
 			  && ((elf_symbol_type *) sym)->internal_elf_sym.st_size > 0)
@@ -6200,6 +6209,9 @@  main (int argc, char **argv)
 	    dump_sframe_section_name = xstrdup (optarg);
 	  seenflag = true;
 	  break;
+	case OPTION_SYMBOL_SHORT_NAME_SEARCH:
+	  short_name_search = true;
+	  break;
 	case 'G':
 	  dump_stab_section_info = true;
 	  seenflag = true;
@@ -6287,6 +6299,5 @@  main (int argc, char **argv)
   free (dump_ctf_section_name);
   free (dump_ctf_parent_name);
   free ((void *) source_comment);
-
   return exit_status;
 }