elf: Strip unreferenced weak undefined symbols

Message ID 20240418122856.190234-1-hjl.tools@gmail.com
State New
Headers
Series elf: Strip unreferenced weak undefined symbols |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Testing passed

Commit Message

H.J. Lu April 18, 2024, 12:28 p.m. UTC
  Linker will resolve an undefined symbol only if it is referenced by
relocation.  Unreferenced weak undefined symbols serve no purpose.
Weak undefined symbols appear in the dynamic symbol table only when they
are referenced by dynamic relocation.  Mark symbols with relocation and
strip undefined weak symbols if they don't have relocation and aren't
in the dynamic symbol table.

bfd/

	PR ld/31652
	* elf-bfd.h (elf_link_hash_entry): Add has_reloc.
	* elf-vxworks.c (elf_vxworks_emit_relocs): Set has_reloc.
	* elflink.c (_bfd_elf_link_output_relocs): Likewise.
	(elf_link_output_extsym): Strip undefined weak symbols if they
	don't have relocation and aren't in the dynamic symbol table.

ld/

	PR ld/31652
	* testsuite/ld-elf/elf.exp: Run undefweak tests.
	* testsuite/ld-elf/undefweak-1.rd: New file.
	* testsuite/ld-elf/undefweak-1a.s: Likewise.
	* testsuite/ld-elf/undefweak-1b.s: Likewise.
	* testsuite/ld-x86-64/weakundef-1.nd: Likewise.
	* testsuite/ld-x86-64/weakundef-1a.s: Likewise.
	* testsuite/ld-x86-64/weakundef-1b.s: Likewise.
	* testsuite/ld-x86-64/x86-64.exp: Run undefweak tests.
---
 bfd/elf-bfd.h                         |  2 +
 bfd/elf-vxworks.c                     | 56 ++++++++++++++-------------
 bfd/elflink.c                         | 17 +++++++-
 ld/testsuite/ld-elf/elf.exp           | 15 +++++++
 ld/testsuite/ld-elf/undefweak-1.rd    | 10 +++++
 ld/testsuite/ld-elf/undefweak-1a.s    | 14 +++++++
 ld/testsuite/ld-elf/undefweak-1b.s    | 10 +++++
 ld/testsuite/ld-x86-64/weakundef-1.nd |  6 +++
 ld/testsuite/ld-x86-64/weakundef-1a.s |  6 +++
 ld/testsuite/ld-x86-64/weakundef-1b.s |  8 ++++
 ld/testsuite/ld-x86-64/x86-64.exp     | 18 +++++++++
 11 files changed, 134 insertions(+), 28 deletions(-)
 create mode 100644 ld/testsuite/ld-elf/undefweak-1.rd
 create mode 100644 ld/testsuite/ld-elf/undefweak-1a.s
 create mode 100644 ld/testsuite/ld-elf/undefweak-1b.s
 create mode 100644 ld/testsuite/ld-x86-64/weakundef-1.nd
 create mode 100644 ld/testsuite/ld-x86-64/weakundef-1a.s
 create mode 100644 ld/testsuite/ld-x86-64/weakundef-1b.s
  

Comments

Alan Modra April 19, 2024, 12:38 a.m. UTC | #1
On Thu, Apr 18, 2024 at 05:28:56AM -0700, H.J. Lu wrote:
> Linker will resolve an undefined symbol only if it is referenced by
> relocation.  Unreferenced weak undefined symbols serve no purpose.
> Weak undefined symbols appear in the dynamic symbol table only when they
> are referenced by dynamic relocation.  Mark symbols with relocation and
> strip undefined weak symbols if they don't have relocation and aren't
> in the dynamic symbol table.

OK, seems reasonable.
  
Alan Modra April 19, 2024, 7:15 a.m. UTC | #2
I guess I should have asked if you'd tested this patch.

alpha-linux-gnu  +FAIL: Generate undefweak-1.a
alpha-linux-gnu  +FAIL: Generate undefweak-1.o
alpha-netbsd  +FAIL: Generate undefweak-1.a
alpha-netbsd  +FAIL: Generate undefweak-1.o
alpha-unknown-freebsd4.7  +FAIL: Generate undefweak-1.a
alpha-unknown-freebsd4.7  +FAIL: Generate undefweak-1.o
d30v-elf  +XPASS: Generate undefweak-1.a
d30v-elf  +XPASS: Generate undefweak-1.o
dlx-elf  +XPASS: Generate undefweak-1.a
dlx-elf  +XPASS: Generate undefweak-1.o
fr30-elf  +XPASS: Generate undefweak-1.a
fr30-elf  +XPASS: Generate undefweak-1.o
frv-elf  +XPASS: Generate undefweak-1.a
frv-elf  +XPASS: Generate undefweak-1.o
ft32-elf  +XPASS: Generate undefweak-1.a
ft32-elf  +XPASS: Generate undefweak-1.o
iq2000-elf  +XPASS: Generate undefweak-1.a
iq2000-elf  +XPASS: Generate undefweak-1.o
mn10200-elf  +XPASS: Generate undefweak-1.a
mn10200-elf  +XPASS: Generate undefweak-1.o
msp430-elf  +XPASS: Generate undefweak-1.a
msp430-elf  +XPASS: Generate undefweak-1.o
mt-elf  +XPASS: Generate undefweak-1.a
mt-elf  +XPASS: Generate undefweak-1.o
pj-elf  +XPASS: Generate undefweak-1.a
pj-elf  +XPASS: Generate undefweak-1.o
powerpc64-freebsd  +FAIL: ld-powerpc/relbrlt
powerpc64le-linux-gnu  +FAIL: Build pr26391-3
powerpc64le-linux-gnu  +FAIL: Build pr26391-4
powerpc64le-linux-gnu  +FAIL: Run pr26391-3
powerpc64le-linux-gnu  +FAIL: Run pr26391-4
powerpc64le-linux-gnu  +FAIL: ld-powerpc/relbrlt
powerpc64-linux-gnu  +FAIL: Build pr26391-3
powerpc64-linux-gnu  +FAIL: Build pr26391-4
powerpc64-linux-gnu  +FAIL: Run pr26391-3
powerpc64-linux-gnu  +FAIL: Run pr26391-4
powerpc64-linux-gnu  +FAIL: ld-powerpc/relbrlt
powerpc-eabisim  +FAIL: ld-powerpc/relbrlt
powerpc-eabivle  +FAIL: ld-powerpc/relbrlt
powerpcle-elf  +FAIL: ld-powerpc/relbrlt
powerpc-linux-gnu  +FAIL: ld-powerpc/relbrlt
s12z-elf  +XPASS: Generate undefweak-1.a
s12z-elf  +XPASS: Generate undefweak-1.o
xgate-elf  +XPASS: Generate undefweak-1.a
xgate-elf  +XPASS: Generate undefweak-1.o

I'm applying the following to fix the powerpc failures, I'll leave the
rest to you.

	PR ld/31652
	* elflink.c (_bfd_elf_link_output_relocs): Don't segfault
	on NULL rel_hash.

diff --git a/bfd/elflink.c b/bfd/elflink.c
index 4f72d1b2a90..6db6a9c0b47 100644
--- a/bfd/elflink.c
+++ b/bfd/elflink.c
@@ -2985,12 +2985,13 @@ _bfd_elf_link_output_relocs (bfd *output_bfd,
 		      * bed->s->int_rels_per_ext_rel);
   while (irela < irelaend)
     {
-      if (*rel_hash)
+      if (rel_hash && *rel_hash)
 	(*rel_hash)->has_reloc = 1;
       (*swap_out) (output_bfd, irela, erel);
       irela += bed->s->int_rels_per_ext_rel;
       erel += input_rel_hdr->sh_entsize;
-      rel_hash++;
+      if (rel_hash)
+	rel_hash++;
     }
 
   /* Bump the counter, so that we know where to add the next set of
  
Alexandre Oliva April 22, 2024, 5:50 a.m. UTC | #3
Thanks again for the patch.

Olivier pointed out that a comment you moved had a duplicate word, and
suggested removing it.  Ok to install?

From: Olivier Hainque <hainque@adacore.com>
Date: Mon, 22 Apr 2024 05:30:38 +0000

Remove duplicate word.

for  bfd/ChangeLog

	PR ld/31652
	* elflink.c (_bfd_elf_link_output_relocs): Drop duplicate word.
---
 bfd/elf-vxworks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bfd/elf-vxworks.c b/bfd/elf-vxworks.c
index 36e5540f9c04..5d91913f097f 100644
--- a/bfd/elf-vxworks.c
+++ b/bfd/elf-vxworks.c
@@ -186,7 +186,7 @@ elf_vxworks_emit_relocs (bfd *output_bfd,
 		     library.  We are creating a definition in the output
 		     file but it does not come from any of our normal (.o)
 		     files. ie. a PLT stub.  Normally this would be a
-		     relocation against against SHN_UNDEF with the VMA of
+		     relocation against SHN_UNDEF with the VMA of
 		     the PLT stub.  This upsets the VxWorks loader.
 		     Convert it to a section-relative relocation.  This
 		     gets some other symbols (for instance .dynbss), but
  
H.J. Lu April 23, 2024, 4:13 p.m. UTC | #4
On Sun, Apr 21, 2024 at 10:50 PM Alexandre Oliva <oliva@gnu.org> wrote:
>
> Thanks again for the patch.
>
> Olivier pointed out that a comment you moved had a duplicate word, and
> suggested removing it.  Ok to install?
>
> From: Olivier Hainque <hainque@adacore.com>
> Date: Mon, 22 Apr 2024 05:30:38 +0000
>
> Remove duplicate word.
>
> for  bfd/ChangeLog
>
>         PR ld/31652
>         * elflink.c (_bfd_elf_link_output_relocs): Drop duplicate word.
> ---
>  bfd/elf-vxworks.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/bfd/elf-vxworks.c b/bfd/elf-vxworks.c
> index 36e5540f9c04..5d91913f097f 100644
> --- a/bfd/elf-vxworks.c
> +++ b/bfd/elf-vxworks.c
> @@ -186,7 +186,7 @@ elf_vxworks_emit_relocs (bfd *output_bfd,
>                      library.  We are creating a definition in the output
>                      file but it does not come from any of our normal (.o)
>                      files. ie. a PLT stub.  Normally this would be a
> -                    relocation against against SHN_UNDEF with the VMA of
> +                    relocation against SHN_UNDEF with the VMA of
>                      the PLT stub.  This upsets the VxWorks loader.
>                      Convert it to a section-relative relocation.  This
>                      gets some other symbols (for instance .dynbss), but
> --
> 2.25.1
>

I am checking in this as an obvious fix.
  

Patch

diff --git a/bfd/elf-bfd.h b/bfd/elf-bfd.h
index ef5dcb55e72..92a0287d40e 100644
--- a/bfd/elf-bfd.h
+++ b/bfd/elf-bfd.h
@@ -232,6 +232,8 @@  struct elf_link_hash_entry
      a strong defined symbol alias.  U.ALIAS points to a list of aliases,
      the definition having is_weakalias clear.  */
   unsigned int is_weakalias : 1;
+  /* Symbol has a relocation.  */
+  unsigned int has_reloc : 1;
 
   /* String table index in .dynstr if this is a dynamic symbol.  */
   unsigned long dynstr_index;
diff --git a/bfd/elf-vxworks.c b/bfd/elf-vxworks.c
index 4c172cd4115..36e5540f9c0 100644
--- a/bfd/elf-vxworks.c
+++ b/bfd/elf-vxworks.c
@@ -172,35 +172,39 @@  elf_vxworks_emit_relocs (bfd *output_bfd,
 	   irela += bed->s->int_rels_per_ext_rel,
 	     hash_ptr++)
 	{
-	  if (*hash_ptr
-	      && (*hash_ptr)->def_dynamic
-	      && !(*hash_ptr)->def_regular
-	      && ((*hash_ptr)->root.type == bfd_link_hash_defined
-		  || (*hash_ptr)->root.type == bfd_link_hash_defweak)
-	      && (*hash_ptr)->root.u.def.section->output_section != NULL)
+	  if (*hash_ptr)
 	    {
-	      /* This is a relocation from an executable or shared
-		 library against a symbol in a different shared
-		 library.  We are creating a definition in the output
-		 file but it does not come from any of our normal (.o)
-		 files. ie. a PLT stub.  Normally this would be a
-		 relocation against against SHN_UNDEF with the VMA of
-		 the PLT stub.  This upsets the VxWorks loader.
-		 Convert it to a section-relative relocation.  This
-		 gets some other symbols (for instance .dynbss), but
-		 is conservatively correct.  */
-	      for (j = 0; j < bed->s->int_rels_per_ext_rel; j++)
+	      (*hash_ptr)->has_reloc = 1;
+	      if ((*hash_ptr)->def_dynamic
+		  && !(*hash_ptr)->def_regular
+		  && ((*hash_ptr)->root.type == bfd_link_hash_defined
+		      || (*hash_ptr)->root.type == bfd_link_hash_defweak)
+		  && (*hash_ptr)->root.u.def.section->output_section != NULL)
 		{
-		  asection *sec = (*hash_ptr)->root.u.def.section;
-		  int this_idx = sec->output_section->target_index;
-
-		  irela[j].r_info
-		    = ELF32_R_INFO (this_idx, ELF32_R_TYPE (irela[j].r_info));
-		  irela[j].r_addend += (*hash_ptr)->root.u.def.value;
-		  irela[j].r_addend += sec->output_offset;
+		  /* This is a relocation from an executable or shared
+		     library against a symbol in a different shared
+		     library.  We are creating a definition in the output
+		     file but it does not come from any of our normal (.o)
+		     files. ie. a PLT stub.  Normally this would be a
+		     relocation against against SHN_UNDEF with the VMA of
+		     the PLT stub.  This upsets the VxWorks loader.
+		     Convert it to a section-relative relocation.  This
+		     gets some other symbols (for instance .dynbss), but
+		     is conservatively correct.  */
+		  for (j = 0; j < bed->s->int_rels_per_ext_rel; j++)
+		    {
+		      asection *sec = (*hash_ptr)->root.u.def.section;
+		      int this_idx = sec->output_section->target_index;
+
+		      irela[j].r_info
+			= ELF32_R_INFO (this_idx,
+					ELF32_R_TYPE (irela[j].r_info));
+		      irela[j].r_addend += (*hash_ptr)->root.u.def.value;
+		      irela[j].r_addend += sec->output_offset;
+		    }
+		  /* Stop the generic routine adjusting this entry.  */
+		  *hash_ptr = NULL;
 		}
-	      /* Stop the generic routine adjusting this entry.  */
-	      *hash_ptr = NULL;
 	    }
 	}
     }
diff --git a/bfd/elflink.c b/bfd/elflink.c
index 9c53bfcf7d4..4f72d1b2a90 100644
--- a/bfd/elflink.c
+++ b/bfd/elflink.c
@@ -2942,8 +2942,7 @@  _bfd_elf_link_output_relocs (bfd *output_bfd,
 			     asection *input_section,
 			     Elf_Internal_Shdr *input_rel_hdr,
 			     Elf_Internal_Rela *internal_relocs,
-			     struct elf_link_hash_entry **rel_hash
-			       ATTRIBUTE_UNUSED)
+			     struct elf_link_hash_entry **rel_hash)
 {
   Elf_Internal_Rela *irela;
   Elf_Internal_Rela *irelaend;
@@ -2986,9 +2985,12 @@  _bfd_elf_link_output_relocs (bfd *output_bfd,
 		      * bed->s->int_rels_per_ext_rel);
   while (irela < irelaend)
     {
+      if (*rel_hash)
+	(*rel_hash)->has_reloc = 1;
       (*swap_out) (output_bfd, irela, erel);
       irela += bed->s->int_rels_per_ext_rel;
       erel += input_rel_hdr->sh_entsize;
+      rel_hash++;
     }
 
   /* Bump the counter, so that we know where to add the next set of
@@ -10769,6 +10771,13 @@  elf_link_output_extsym (struct bfd_hash_entry *bh, void *data)
 	   && (h->root.u.undef.abfd->flags & BFD_PLUGIN) != 0)
     strip = true;
 
+  /* Remember if this symbol should be stripped.  */
+  bool should_strip = strip;
+
+  /* Strip undefined weak symbols link if they don't have relocation.  */
+  if (!strip)
+    strip = !h->has_reloc && h->root.type == bfd_link_hash_undefweak;
+
   type = h->type;
 
   /* If we're stripping it, and it's not a dynamic symbol, there's
@@ -10917,6 +10926,10 @@  elf_link_output_extsym (struct bfd_hash_entry *bh, void *data)
 	  eoinfo->failed = true;
 	  return false;
 	}
+      /* If a symbol is in the dynamic symbol table and isn't a
+	 should-strip symbol, also keep it in the symbol table.  */
+      if (!should_strip)
+	strip = false;
     }
 
   /* If we are marking the symbol as undefined, and there are no
diff --git a/ld/testsuite/ld-elf/elf.exp b/ld/testsuite/ld-elf/elf.exp
index 685b87588e7..874ba662d2b 100644
--- a/ld/testsuite/ld-elf/elf.exp
+++ b/ld/testsuite/ld-elf/elf.exp
@@ -171,6 +171,21 @@  run_ld_link_tests [list \
 	"implib" ] \
 ] \[uses_genelf\]
 
+run_ld_link_tests [list \
+    [list "Generate undefweak-1.a" \
+	"" "" \
+	$hpux \
+	{undefweak-1a.s} \
+	{} \
+	"undefweak-1.a" ] \
+    [list "Generate undefweak-1.o" \
+	"-r" "tmpdir/undefweak-1.a" \
+	$hpux \
+	{undefweak-1b.s} \
+	{{readelf {-rsW} undefweak-1.rd}} \
+	"undefweak-1.o" ] \
+] \[uses_genelf\]
+
 #v850 gas complains about .tbss.var section attributes.
 if { [check_gc_sections_available] && ![istarget "v850-*-*"] } {
     run_ld_link_tests {
diff --git a/ld/testsuite/ld-elf/undefweak-1.rd b/ld/testsuite/ld-elf/undefweak-1.rd
new file mode 100644
index 00000000000..5b48c13c892
--- /dev/null
+++ b/ld/testsuite/ld-elf/undefweak-1.rd
@@ -0,0 +1,10 @@ 
+#source: undefweak-1a.s
+#source: undefweak-1b.s
+#as:
+#ld: -r
+#readelf: -sW
+
+#failif
+#...
+.*: 0+ +0 +FUNC +WEAK +DEFAULT +UND +bar
+#pass
diff --git a/ld/testsuite/ld-elf/undefweak-1a.s b/ld/testsuite/ld-elf/undefweak-1a.s
new file mode 100644
index 00000000000..1d885fb9997
--- /dev/null
+++ b/ld/testsuite/ld-elf/undefweak-1a.s
@@ -0,0 +1,14 @@ 
+	.section .text.foobar,"axG",%progbits,foo,comdat
+	.weak foo
+	.type foo,%function
+foo:
+	.nop
+	.size foo, . - foo
+	.weak bar
+	.set bar, foo
+	.text
+	.global baz
+	.type baz,%function
+baz:
+	.dc.a foo
+	.size baz, . - baz
diff --git a/ld/testsuite/ld-elf/undefweak-1b.s b/ld/testsuite/ld-elf/undefweak-1b.s
new file mode 100644
index 00000000000..8ba34ea855e
--- /dev/null
+++ b/ld/testsuite/ld-elf/undefweak-1b.s
@@ -0,0 +1,10 @@ 
+	.section .text.foobar,"axG",%progbits,foo,comdat
+	.weak foo
+	.type foo,%function
+foo:
+	.nop
+	.size foo, . - foo
+	.text
+	.global _start
+	.set _start,foo
+	.dc.a baz
diff --git a/ld/testsuite/ld-x86-64/weakundef-1.nd b/ld/testsuite/ld-x86-64/weakundef-1.nd
new file mode 100644
index 00000000000..c4c89bfabdb
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/weakundef-1.nd
@@ -0,0 +1,6 @@ 
+#nm: -n
+#target: x86_64-*-*
+
+#...
+ +w +bar
+#pass
diff --git a/ld/testsuite/ld-x86-64/weakundef-1a.s b/ld/testsuite/ld-x86-64/weakundef-1a.s
new file mode 100644
index 00000000000..b6eea3201a6
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/weakundef-1a.s
@@ -0,0 +1,6 @@ 
+	.text
+	.global foo
+	.type foo,%function
+foo:
+	.nop
+	.size foo, . - foo
diff --git a/ld/testsuite/ld-x86-64/weakundef-1b.s b/ld/testsuite/ld-x86-64/weakundef-1b.s
new file mode 100644
index 00000000000..3d2da7dc548
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/weakundef-1b.s
@@ -0,0 +1,8 @@ 
+	.weak bar
+	.text
+	.global _start
+	.type _start,%function
+_start:
+	mov	bar@GOTPCREL(%rip), %rax
+	mov	foo@GOTPCREL(%rip), %rax
+	.size _start, . - _start
diff --git a/ld/testsuite/ld-x86-64/x86-64.exp b/ld/testsuite/ld-x86-64/x86-64.exp
index e370f393a7a..6546fff4efc 100644
--- a/ld/testsuite/ld-x86-64/x86-64.exp
+++ b/ld/testsuite/ld-x86-64/x86-64.exp
@@ -264,6 +264,24 @@  run_ld_link_tests [list \
 	 {objdump -drj.plt tlsdesc.pd}} \
 	 "libtlsdesc.so" \
     ] \
+    [list \
+	"Generate x86-64-weakundef-1.so" \
+	"-shared -melf_x86_64" \
+	"" \
+	"--64" \
+	{weakundef-1a.s} \
+	{} \
+	"x86-64-weakundef-1.so" \
+    ] \
+    [list \
+	"Generate x86-64-weakundef-1" \
+	"-melf_x86_64" \
+	"tmpdir/x86-64-weakundef-1.so" \
+	"--64" \
+	{weakundef-1b.s} \
+	{{nm -n weakundef-1.nd}} \
+	"x86-64-weakundef-1" \
+    ] \
 ]
 
 set test_name "Mixed x86_64 and i386 input test 1"