Revised "Don't return (null) from bfd_elf_sym_name"

Message ID ZwS27PNTSw2klhh6@squeak.grove.modra.org
State New
Headers
Series Revised "Don't return (null) from bfd_elf_sym_name" |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm warning Patch is already merged
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 warning Patch is already merged

Commit Message

Alan Modra Oct. 8, 2024, 4:37 a.m. UTC
  Commit 68bbe1183379 results in a lot of follow up work, much of which
likely is still to be done. (And yes, since this is all for corrupted
or fuzzed object files, a whole lot of work doesn't much benefit
anyone.  It was a bad idea to put NULL in asymbol->name.)  So I'm
changing the approach to instead put a unique empty string for symbols
with a corrupted st_name.  An empty string won't require much work to
ensure nm, objcopy, objdump etc. won't crash, since these tools
already must work with unnamed local symbols.

The unique empty string is called bfd_symbol_error_name.  This patch
uses that name string for corrupted symbols in the ELF and COFF
backends.  Such symbols are displayed by nm and objdump as the
translated string "<corrupt>", which is what the COFF backend used to
put directly into corrupted symbols.

The following should be applied over top of reverting
ef166f451fbc  objcopy fixes for commit 68bbe1183379
389fdfbe0d2a  elf.c and elflink.c fixes for commit 68bbe1183379
06116013f80e  dlltool fixes for commit 68bbe1183379
6e40f9bb31be  is_target_special_symbol fixes for commit 68bbe1183379
0c13ac533e59  get_synthetic_symtab fixes for commit 68bbe1183379
265757dc6e4d  bfd_elf_sym_name_raw
68bbe1183379  Don't return "(null)" from bfd_elf_sym_name

ie. it's the way I should have written the original patch, plus a few
tides and cleanups I retained from the reverted patches.
  

Patch

diff --git a/bfd/bfd-in2.h b/bfd/bfd-in2.h
index 40ec416dded..3b047d922f3 100644
--- a/bfd/bfd-in2.h
+++ b/bfd/bfd-in2.h
@@ -1260,6 +1260,11 @@  typedef struct _symbol_info
   const char *stab_name;       /* String for stab type.  */
 } symbol_info;
 
+/* An empty string that will not match the address of any other
+   symbol name, even unnamed local symbols which will also have empty
+   string names.  This can be used to flag a symbol as corrupt if its
+   name uses an out of range string table index.  */
+extern const char bfd_symbol_error_name[];
 #define bfd_get_symtab_upper_bound(abfd) \
        BFD_SEND (abfd, _bfd_get_symtab_upper_bound, (abfd))
 
diff --git a/bfd/coffgen.c b/bfd/coffgen.c
index cc1c655738b..5754dbbae15 100644
--- a/bfd/coffgen.c
+++ b/bfd/coffgen.c
@@ -1928,7 +1928,7 @@  coff_get_normalized_symtab (bfd *abfd)
 	      if ((bfd_size_type) aux->u.auxent.x_file.x_n.x_n.x_offset
 		  >= obj_coff_strings_len (abfd))
 		sym->u.syment._n._n_n._n_offset =
-		  (uintptr_t) _("<corrupt>");
+		  (uintptr_t) bfd_symbol_error_name;
 	      else
 		sym->u.syment._n._n_n._n_offset =
 		  (uintptr_t) (string_table
@@ -1978,7 +1978,7 @@  coff_get_normalized_symtab (bfd *abfd)
 		    if ((bfd_size_type) aux->u.auxent.x_file.x_n.x_n.x_offset
 			>= obj_coff_strings_len (abfd))
 		      aux->u.auxent.x_file.x_n.x_n.x_offset =
-			(uintptr_t) _("<corrupt>");
+			(uintptr_t) bfd_symbol_error_name;
 		    else
 		      aux->u.auxent.x_file.x_n.x_n.x_offset =
 			(uintptr_t) (string_table
@@ -2028,7 +2028,7 @@  coff_get_normalized_symtab (bfd *abfd)
 		}
 	      if (sym->u.syment._n._n_n._n_offset >= obj_coff_strings_len (abfd))
 		sym->u.syment._n._n_n._n_offset =
-		  (uintptr_t) _("<corrupt>");
+		  (uintptr_t) bfd_symbol_error_name;
 	      else
 		sym->u.syment._n._n_n._n_offset =
 		  (uintptr_t) (string_table
@@ -2047,7 +2047,7 @@  coff_get_normalized_symtab (bfd *abfd)
 		 the debug data.  */
 	      if (sym->u.syment._n._n_n._n_offset >= debug_sec->size)
 		sym->u.syment._n._n_n._n_offset =
-		  (uintptr_t) _("<corrupt>");
+		  (uintptr_t) bfd_symbol_error_name;
 	      else
 		sym->u.syment._n._n_n._n_offset =
 		  (uintptr_t) (debug_sec_data
@@ -2161,11 +2161,13 @@  coff_print_symbol (bfd *abfd,
 		   bfd_print_symbol_type how)
 {
   FILE * file = (FILE *) filep;
+  const char *symname = (symbol->name != bfd_symbol_error_name
+			 ? symbol->name : _("<corrupt>"));
 
   switch (how)
     {
     case bfd_print_symbol_name:
-      fprintf (file, "%s", symbol->name);
+      fprintf (file, "%s", symname);
       break;
 
     case bfd_print_symbol_more:
@@ -2189,7 +2191,7 @@  coff_print_symbol (bfd *abfd,
 	  if (combined < obj_raw_syments (abfd)
 	      || combined >= obj_raw_syments (abfd) + obj_raw_syment_count (abfd))
 	    {
-	      fprintf (file, _("<corrupt info> %s"), symbol->name);
+	      fprintf (file, _("<corrupt info> %s"), symname);
 	      break;
 	    }
 
@@ -2207,7 +2209,7 @@  coff_print_symbol (bfd *abfd,
 		   combined->u.syment.n_sclass,
 		   combined->u.syment.n_numaux);
 	  bfd_fprintf_vma (abfd, file, val);
-	  fprintf (file, " %s", symbol->name);
+	  fprintf (file, " %s", symname);
 
 	  for (aux = 0; aux < combined->u.syment.n_numaux; aux++)
 	    {
@@ -2297,7 +2299,9 @@  coff_print_symbol (bfd *abfd,
 
 	  if (l)
 	    {
-	      fprintf (file, "\n%s :", l->u.sym->name);
+	      fprintf (file, "\n%s :",
+		       l->u.sym->name != bfd_symbol_error_name
+		       ? l->u.sym->name : _("<corrupt>"));
 	      l++;
 	      while (l->line_number)
 		{
@@ -2317,7 +2321,7 @@  coff_print_symbol (bfd *abfd,
 		   symbol->section->name,
 		   coffsymbol (symbol)->native ? "n" : "g",
 		   coffsymbol (symbol)->lineno ? "l" : " ",
-		   symbol->name);
+		   symname);
 	}
     }
 }
diff --git a/bfd/ecoff.c b/bfd/ecoff.c
index 5ee7ffaf489..d0cb9e18aaa 100644
--- a/bfd/ecoff.c
+++ b/bfd/ecoff.c
@@ -1452,11 +1452,13 @@  _bfd_ecoff_print_symbol (bfd *abfd,
   const struct ecoff_debug_swap * const debug_swap
     = &ecoff_backend (abfd)->debug_swap;
   FILE *file = (FILE *)filep;
+  const char *symname = (symbol->name != bfd_symbol_error_name
+			 ? symbol->name : _("<corrupt>"));
 
   switch (how)
     {
     case bfd_print_symbol_name:
-      fprintf (file, "%s", symbol->name);
+      fprintf (file, "%s", symname);
       break;
     case bfd_print_symbol_more:
       if (ecoffsymbol (symbol)->local)
@@ -1526,7 +1528,7 @@  _bfd_ecoff_print_symbol (bfd *abfd,
 		 (unsigned) ecoff_ext.asym.sc,
 		 (unsigned) ecoff_ext.asym.index,
 		 jmptbl, cobol_main, weakext,
-		 symbol->name);
+		 symname);
 
 	if (ecoffsymbol (symbol)->fdr != NULL
 	    && ecoff_ext.asym.index != indexNil)
diff --git a/bfd/elf.c b/bfd/elf.c
index c882a66ab5c..997befd5686 100644
--- a/bfd/elf.c
+++ b/bfd/elf.c
@@ -530,13 +530,11 @@  bfd_elf_get_elf_syms (bfd *ibfd,
 }
 
 /* Look up a symbol name.  */
-const char *
-bfd_elf_sym_name (bfd *abfd,
-		  Elf_Internal_Shdr *symtab_hdr,
-		  Elf_Internal_Sym *isym,
-		  asection *sym_sec)
+static const char *
+bfd_elf_sym_name_raw (bfd *abfd,
+		      Elf_Internal_Shdr *symtab_hdr,
+		      Elf_Internal_Sym *isym)
 {
-  const char *name;
   unsigned int iname = isym->st_name;
   unsigned int shindex = symtab_hdr->sh_link;
 
@@ -548,9 +546,18 @@  bfd_elf_sym_name (bfd *abfd,
       shindex = elf_elfheader (abfd)->e_shstrndx;
     }
 
-  name = bfd_elf_string_from_elf_section (abfd, shindex, iname);
+  return bfd_elf_string_from_elf_section (abfd, shindex, iname);
+}
+
+const char *
+bfd_elf_sym_name (bfd *abfd,
+		  Elf_Internal_Shdr *symtab_hdr,
+		  Elf_Internal_Sym *isym,
+		  asection *sym_sec)
+{
+  const char *name = bfd_elf_sym_name_raw (abfd, symtab_hdr, isym);
   if (name == NULL)
-    name = "(null)";
+    name = bfd_symbol_error_name;
   else if (sym_sec && *name == '\0')
     name = bfd_section_name (sym_sec);
 
@@ -583,7 +590,7 @@  group_signature (bfd *abfd, Elf_Internal_Shdr *ghdr)
 			    &isym, esym, &eshndx) == NULL)
     return NULL;
 
-  return bfd_elf_sym_name (abfd, hdr, &isym, NULL);
+  return bfd_elf_sym_name_raw (abfd, hdr, &isym);
 }
 
 static bool
@@ -2314,10 +2321,13 @@  bfd_elf_print_symbol (bfd *abfd,
 		      bfd_print_symbol_type how)
 {
   FILE *file = (FILE *) filep;
+  const char *symname = (symbol->name != bfd_symbol_error_name
+			 ? symbol->name : _("<corrupt>"));
+
   switch (how)
     {
     case bfd_print_symbol_name:
-      fprintf (file, "%s", symbol->name);
+      fprintf (file, "%s", symname);
       break;
     case bfd_print_symbol_more:
       fprintf (file, "elf ");
@@ -2340,11 +2350,10 @@  bfd_elf_print_symbol (bfd *abfd,
 	if (bed->elf_backend_print_symbol_all)
 	  name = (*bed->elf_backend_print_symbol_all) (abfd, filep, symbol);
 
-	if (name == NULL)
-	  {
-	    name = symbol->name;
-	    bfd_print_symbol_vandf (abfd, file, symbol);
-	  }
+	if (name != NULL)
+	  symname = name;
+	else
+	  bfd_print_symbol_vandf (abfd, file, symbol);
 
 	fprintf (file, " %s\t", section_name);
 	/* Print the "other" value for a symbol.  For common symbols,
@@ -2391,7 +2400,7 @@  bfd_elf_print_symbol (bfd *abfd,
 	    fprintf (file, " 0x%02x", (unsigned int) st_other);
 	  }
 
-	fprintf (file, " %s", name);
+	fprintf (file, " %s", symname);
       }
       break;
     }
@@ -8730,17 +8739,16 @@  swap_out_syms (bfd *abfd,
 	  && (flags & (BSF_SECTION_SYM | BSF_GLOBAL)) == BSF_SECTION_SYM)
 	{
 	  /* Local section symbols have no name.  */
-	  sym.st_name = (unsigned long) -1;
+	  sym.st_name = 0;
 	}
       else
 	{
 	  /* Call _bfd_elf_strtab_offset after _bfd_elf_strtab_finalize
 	     to get the final offset for st_name.  */
-	  sym.st_name
-	    = (unsigned long) _bfd_elf_strtab_add (stt, syms[idx]->name,
-						   false);
-	  if (sym.st_name == (unsigned long) -1)
+	  size_t stridx = _bfd_elf_strtab_add (stt, syms[idx]->name, false);
+	  if (stridx == (size_t) -1)
 	    goto error_return;
+	  sym.st_name = stridx;
 	}
 
       bfd_vma value = syms[idx]->value;
@@ -8951,9 +8959,7 @@  Unable to handle section index %x in ELF symbol.  Using ABS instead."),
   for (idx = 0; idx < outbound_syms_index; idx++)
     {
       struct elf_sym_strtab *elfsym = &symstrtab[idx];
-      if (elfsym->sym.st_name == (unsigned long) -1)
-	elfsym->sym.st_name = 0;
-      else
+      if (elfsym->sym.st_name != 0)
 	elfsym->sym.st_name = _bfd_elf_strtab_offset (stt,
 						      elfsym->sym.st_name);
       if (info && info->callbacks->ctf_new_symbol)
diff --git a/bfd/elf32-i386.c b/bfd/elf32-i386.c
index f27c0621a93..2e8d59518ac 100644
--- a/bfd/elf32-i386.c
+++ b/bfd/elf32-i386.c
@@ -1723,7 +1723,7 @@  elf_i386_scan_relocs (bfd *abfd,
 		      name = h->root.root.string;
 		    else
 		      name = bfd_elf_sym_name (abfd, symtab_hdr, isym,
-					     NULL);
+					       NULL);
 		    _bfd_error_handler
 		      /* xgettext:c-format */
 		      (_("%pB: `%s' accessed both as normal and "
diff --git a/bfd/elf32-v850.c b/bfd/elf32-v850.c
index 85cbcbc3505..bb3ce8d88fe 100644
--- a/bfd/elf32-v850.c
+++ b/bfd/elf32-v850.c
@@ -1933,8 +1933,11 @@  v850_elf_info_to_howto_rela (bfd *abfd,
 static bool
 v850_elf_is_local_label_name (bfd *abfd ATTRIBUTE_UNUSED, const char *name)
 {
-  return (   (name[0] == '.' && (name[1] == 'L' || name[1] == '.'))
-	  || (name[0] == '_' &&  name[1] == '.' && name[2] == 'L' && name[3] == '_'));
+  if (name[0] == '.' && (name[1] == 'L' || name[1] == '.'))
+    return true;
+  if (name[0] == '_' && name[1] == '.' && name[2] == 'L' && name[3] == '_')
+    return true;
+  return false;
 }
 
 static bool
diff --git a/bfd/elflink.c b/bfd/elflink.c
index 9eb1122d513..a498dbb12a6 100644
--- a/bfd/elflink.c
+++ b/bfd/elflink.c
@@ -8819,6 +8819,8 @@  bfd_elf_match_symbols_in_sections (asection *sec1, asection *sec2,
 	    symp->name = bfd_elf_string_from_elf_section (bfd1,
 							  hdr1->sh_link,
 							  ssym->st_name);
+	    if (symp->name == NULL)
+	      goto done;
 	    symp++;
 	  }
 
@@ -8832,6 +8834,8 @@  bfd_elf_match_symbols_in_sections (asection *sec1, asection *sec2,
 	    symp->name = bfd_elf_string_from_elf_section (bfd2,
 							  hdr2->sh_link,
 							  ssym->st_name);
+	    if (symp->name == NULL)
+	      goto done;
 	    symp++;
 	  }
 
@@ -8878,14 +8882,22 @@  bfd_elf_match_symbols_in_sections (asection *sec1, asection *sec2,
     goto done;
 
   for (i = 0; i < count1; i++)
-    symtable1[i].name
-      = bfd_elf_string_from_elf_section (bfd1, hdr1->sh_link,
-					 symtable1[i].u.isym->st_name);
+    {
+      symtable1[i].name
+	= bfd_elf_string_from_elf_section (bfd1, hdr1->sh_link,
+					   symtable1[i].u.isym->st_name);
+      if (symtable1[i].name == NULL)
+	goto done;
+    }
 
   for (i = 0; i < count2; i++)
-    symtable2[i].name
-      = bfd_elf_string_from_elf_section (bfd2, hdr2->sh_link,
-					 symtable2[i].u.isym->st_name);
+    {
+      symtable2[i].name
+	= bfd_elf_string_from_elf_section (bfd2, hdr2->sh_link,
+					   symtable2[i].u.isym->st_name);
+      if (symtable2[i].name == NULL)
+	goto done;
+    }
 
   /* Sort symbol by name.  */
   qsort (symtable1, count1, sizeof (struct elf_symbol),
diff --git a/bfd/elfnn-riscv.c b/bfd/elfnn-riscv.c
index 90ecc276f31..c8bf45f4293 100644
--- a/bfd/elfnn-riscv.c
+++ b/bfd/elfnn-riscv.c
@@ -5612,7 +5612,7 @@  riscv_elf_is_target_special_symbol (bfd *abfd, asymbol *sym)
 {
   /* PR27584, local and empty symbols.  Since they are usually
      generated for pcrel relocations.  */
-  return (!strcmp (sym->name, "")
+  return (!sym->name[0]
 	  || _bfd_elf_is_local_label_name (abfd, sym->name)
 	  /* PR27916, mapping symbols.  */
 	  || riscv_elf_is_mapping_symbols (sym->name));
diff --git a/bfd/pef.c b/bfd/pef.c
index f330b92e821..2d2f5597a66 100644
--- a/bfd/pef.c
+++ b/bfd/pef.c
@@ -210,16 +210,18 @@  bfd_pef_print_symbol (bfd *abfd,
 		      bfd_print_symbol_type how)
 {
   FILE *file = (FILE *) afile;
+  const char *symname = (symbol->name != bfd_symbol_error_name
+			 ? symbol->name : _("<corrupt>"));
 
   switch (how)
     {
     case bfd_print_symbol_name:
-      fprintf (file, "%s", symbol->name);
+      fprintf (file, "%s", symname);
       break;
     default:
       bfd_print_symbol_vandf (abfd, (void *) file, symbol);
-      fprintf (file, " %-5s %s", symbol->section->name, symbol->name);
-      if (startswith (symbol->name, "__traceback_"))
+      fprintf (file, " %-5s %s", symbol->section->name, symname);
+      if (startswith (symname, "__traceback_"))
 	{
 	  unsigned char *buf;
 	  size_t offset = symbol->value + 4;
diff --git a/bfd/syms.c b/bfd/syms.c
index b370a3375d9..68f07302ec1 100644
--- a/bfd/syms.c
+++ b/bfd/syms.c
@@ -342,6 +342,11 @@  EXTERNAL
 .  const char *stab_name;	{* String for stab type.  *}
 .} symbol_info;
 .
+.{* An empty string that will not match the address of any other
+.   symbol name, even unnamed local symbols which will also have empty
+.   string names.  This can be used to flag a symbol as corrupt if its
+.   name uses an out of range string table index.  *}
+.extern const char bfd_symbol_error_name[];
 */
 
 #include "sysdep.h"
@@ -351,6 +356,8 @@  EXTERNAL
 #include "bfdlink.h"
 #include "aout/stab_gnu.h"
 
+const char bfd_symbol_error_name[] = { 0 };
+
 /*
 DOCDD
 INODE
@@ -394,7 +401,7 @@  bfd_is_local_label (bfd *abfd, asymbol *sym)
      if we didn't reject them here.  */
   if ((sym->flags & (BSF_GLOBAL | BSF_WEAK | BSF_FILE | BSF_SECTION_SYM)) != 0)
     return false;
-  if (sym->name == NULL)
+  if (sym->name == NULL || sym->name == bfd_symbol_error_name)
     return false;
   return bfd_is_local_label_name (abfd, sym->name);
 }
@@ -777,7 +784,8 @@  bfd_symbol_info (asymbol *symbol, symbol_info *ret)
   else
     ret->value = symbol->value + symbol->section->vma;
 
-  ret->name = symbol->name;
+  ret->name = (symbol->name != bfd_symbol_error_name
+	       ? symbol->name : _("<corrupt>"));
 }
 
 /*
diff --git a/binutils/objcopy.c b/binutils/objcopy.c
index 24e31cc5805..378285062b2 100644
--- a/binutils/objcopy.c
+++ b/binutils/objcopy.c
@@ -1590,14 +1590,14 @@  filter_symbols (bfd *abfd, bfd *obfd, asymbol **osyms,
 	{
 	  char *new_name;
 
-	  if (name != NULL
-	      && name[0] == '_'
+	  if (name[0] == '_'
 	      && name[1] == '_'
 	      && strcmp (name + (name[2] == '_'), "__gnu_lto_slim") == 0)
 	    {
-	      fatal (_("redefining symbols does not work on LTO-compiled object files"));
+	      fatal (_("redefining symbols does not work"
+		       " on LTO-compiled object files"));
 	    }
-	  
+
 	  new_name = (char *) lookup_sym_redefinition (name);
 	  if (new_name == name
 	      && (flags & BSF_SECTION_SYM) != 0)
@@ -2956,7 +2956,7 @@  copy_object (bfd *ibfd, bfd *obfd, const bfd_arch_info_type *input_arch)
 	  pset = find_section_list (padd->name, false,
 				    SECTION_CONTEXT_SET_FLAGS);
 	  if (pset != NULL)
-	    {	      
+	    {
 	      flags = pset->flags | SEC_HAS_CONTENTS;
 	      flags = check_new_section_flags (flags, obfd, padd->name);
 	    }
diff --git a/ld/ldlang.c b/ld/ldlang.c
index 7f9e3d2b119..343c4de53f4 100644
--- a/ld/ldlang.c
+++ b/ld/ldlang.c
@@ -4895,9 +4895,6 @@  ld_is_local_symbol (asymbol * sym)
   if (name == NULL || *name == 0)
     return false;
 
-  if (strcmp (name, "(null)") == 0)
-    return false;
-
   /* Skip .Lxxx and such like.  */
   if (bfd_is_local_label (link_info.output_bfd, sym))
     return false;