[2/3] PE-COFF: Prefer weak external with defined fallback over null fallback

Message ID 20260530191522.57144-3-peter0x44@disroot.org
State New
Headers
Series PE-COFF: Fix weak external symbol resolution bugs |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_binutils_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_binutils_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_binutils_check--master-aarch64 success Test passed
linaro-tcwg-bot/tcwg_binutils_check--master-arm success Test passed

Commit Message

Peter Damianov May 30, 2026, 7:15 p.m. UTC
  When two PE COFF weak externals for the same symbol are linked (both
C_NT_WEAK with aux records), the generic linker takes no action on the
second one (NOACT: weak undef meets existing weak undef).  This means
the first file's fallback alias always wins regardless of whether it
points to a real definition or to NULL.

This causes a problem when a weak declaration (fallback = NULL,
i.e. "resolve to NULL if nothing provides it") is linked before a
weak definition (fallback = actual function body).  The symbol resolves
to address 0 at runtime, causing a crash when called.

Fix this by comparing the fallback targets when two weak externals
meet: if the incoming weak external's fallback resolves to a defined
symbol and the existing one does not (or points to NULL),
update the aux record to use the better fallback.

This matches the behavior of lld after:
  https://github.com/llvm/llvm-project/commit/7ca5698b4c3698d06065e0941df7f23d72913d23

bfd/
	* cofflink.c (coff_link_add_symbols): When two PE COFF weak
	externals meet, prefer the one whose fallback alias resolves
	to a defined symbol.
---
 bfd/cofflink.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
  

Comments

Alan Modra May 31, 2026, 10:45 p.m. UTC | #1
On Sat, May 30, 2026 at 03:15:21PM -0400, Peter Damianov wrote:

Heh, ignore my question on the previous patch.

> bfd/
> 	* cofflink.c (coff_link_add_symbols): When two PE COFF weak
> 	externals meet, prefer the one whose fallback alias resolves
> 	to a defined symbol.
> ---
>  bfd/cofflink.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 70 insertions(+)
> 
> diff --git a/bfd/cofflink.c b/bfd/cofflink.c
> index a38f692a008..93ecfebef03 100644
> --- a/bfd/cofflink.c
> +++ b/bfd/cofflink.c
> @@ -543,6 +543,76 @@ coff_link_add_symbols (bfd *abfd,
>  		      (*sym_hash)->aux = alloc;
>  		    }
>  		}
> +
> +	      /* When two PE COFF weak externals meet (both with aux
> +		 records specifying fallback aliases), prefer the one
> +		 whose fallback resolves to a defined symbol over one
> +		 whose fallback is undefined or NULL.  This
> +		 handles the case where a weak declaration (with a
> +		 fallback of NULL) is seen before a weak
> +		 definition (with a fallback of the actual function
> +		 body).  */
> +	      else if (IS_WEAK_EXTERNAL (abfd, sym)
> +		       && sym.n_numaux > 0
> +		       && (*sym_hash)->root.type == bfd_link_hash_undefweak
> +		       && (*sym_hash)->symbol_class == C_NT_WEAK
> +		       && (*sym_hash)->numaux == 1)
> +		{
> +		  /* Parse the incoming aux to get the fallback tagndx.  */
> +		  union internal_auxent new_aux;
> +		  bfd_coff_swap_aux_in (abfd, esym + symesz, sym.n_type,
> +					sym.n_sclass, 0, sym.n_numaux,
> +					&new_aux);
> +		  unsigned long new_tagndx = new_aux.x_sym.x_tagndx.u32;
> +		  struct coff_link_hash_entry *h2_new = NULL;
> +		  struct coff_link_hash_entry *h2_old = NULL;
> +
> +		  if (new_tagndx < obj_raw_syment_count (abfd))
> +		    h2_new = obj_coff_sym_hashes (abfd)[new_tagndx];
> +
> +		  unsigned long old_tagndx
> +		    = (*sym_hash)->aux->x_sym.x_tagndx.u32;
> +		  if (old_tagndx
> +		      < obj_raw_syment_count ((*sym_hash)->auxbfd))
> +		    h2_old = obj_coff_sym_hashes
> +		      ((*sym_hash)->auxbfd)[old_tagndx];
> +
> +		  /* Update if the new fallback is defined but the old
> +		     one is not (or points to NULL).  */
> +		  if (h2_new != NULL
> +		      && (h2_new->root.type == bfd_link_hash_defined
> +			  || h2_new->root.type == bfd_link_hash_defweak)
> +		      && (h2_old == NULL
> +			  || h2_old->root.type == bfd_link_hash_undefined
> +			  || h2_old->root.type == bfd_link_hash_undefweak
> +			  || (h2_old->root.type == bfd_link_hash_defined
> +			      && bfd_is_abs_section
> +				   (h2_old->root.u.def.section)
> +			      && h2_old->root.u.def.value == 0)))

Why the special treatment of absolute symbols with value zero?

> +		    {
> +		      union internal_auxent *alloc;
> +		      unsigned int i;
> +		      bfd_byte *eaux;
> +		      union internal_auxent *iaux;
> +
> +		      (*sym_hash)->symbol_class = sym.n_sclass;
> +		      (*sym_hash)->auxbfd = abfd;
> +		      (*sym_hash)->numaux = sym.n_numaux;
> +		      alloc = ((union internal_auxent *)

No need to cast the void* return from bfd_hash_allocate.  Lots of
older code does this, but that's due to it originally being written
for K&R C.

> +			       bfd_hash_allocate (&info->hash->table,
> +						  (sym.n_numaux
> +						   * sizeof (*alloc))));
> +		      if (alloc == NULL)
> +			goto error_return;
> +		      for (i = 0, eaux = esym + symesz, iaux = alloc;
> +			   i < sym.n_numaux;
> +			   i++, eaux += symesz, iaux++)
> +			bfd_coff_swap_aux_in (abfd, eaux, sym.n_type,
> +					      sym.n_sclass, (int) i,
> +					      sym.n_numaux, iaux);
> +		      (*sym_hash)->aux = alloc;
> +		    }
> +		}
>  	    }
>  
>  	  if (classification == COFF_SYMBOL_PE_SECTION
> -- 
> 2.54.0
  

Patch

diff --git a/bfd/cofflink.c b/bfd/cofflink.c
index a38f692a008..93ecfebef03 100644
--- a/bfd/cofflink.c
+++ b/bfd/cofflink.c
@@ -543,6 +543,76 @@  coff_link_add_symbols (bfd *abfd,
 		      (*sym_hash)->aux = alloc;
 		    }
 		}
+
+	      /* When two PE COFF weak externals meet (both with aux
+		 records specifying fallback aliases), prefer the one
+		 whose fallback resolves to a defined symbol over one
+		 whose fallback is undefined or NULL.  This
+		 handles the case where a weak declaration (with a
+		 fallback of NULL) is seen before a weak
+		 definition (with a fallback of the actual function
+		 body).  */
+	      else if (IS_WEAK_EXTERNAL (abfd, sym)
+		       && sym.n_numaux > 0
+		       && (*sym_hash)->root.type == bfd_link_hash_undefweak
+		       && (*sym_hash)->symbol_class == C_NT_WEAK
+		       && (*sym_hash)->numaux == 1)
+		{
+		  /* Parse the incoming aux to get the fallback tagndx.  */
+		  union internal_auxent new_aux;
+		  bfd_coff_swap_aux_in (abfd, esym + symesz, sym.n_type,
+					sym.n_sclass, 0, sym.n_numaux,
+					&new_aux);
+		  unsigned long new_tagndx = new_aux.x_sym.x_tagndx.u32;
+		  struct coff_link_hash_entry *h2_new = NULL;
+		  struct coff_link_hash_entry *h2_old = NULL;
+
+		  if (new_tagndx < obj_raw_syment_count (abfd))
+		    h2_new = obj_coff_sym_hashes (abfd)[new_tagndx];
+
+		  unsigned long old_tagndx
+		    = (*sym_hash)->aux->x_sym.x_tagndx.u32;
+		  if (old_tagndx
+		      < obj_raw_syment_count ((*sym_hash)->auxbfd))
+		    h2_old = obj_coff_sym_hashes
+		      ((*sym_hash)->auxbfd)[old_tagndx];
+
+		  /* Update if the new fallback is defined but the old
+		     one is not (or points to NULL).  */
+		  if (h2_new != NULL
+		      && (h2_new->root.type == bfd_link_hash_defined
+			  || h2_new->root.type == bfd_link_hash_defweak)
+		      && (h2_old == NULL
+			  || h2_old->root.type == bfd_link_hash_undefined
+			  || h2_old->root.type == bfd_link_hash_undefweak
+			  || (h2_old->root.type == bfd_link_hash_defined
+			      && bfd_is_abs_section
+				   (h2_old->root.u.def.section)
+			      && h2_old->root.u.def.value == 0)))
+		    {
+		      union internal_auxent *alloc;
+		      unsigned int i;
+		      bfd_byte *eaux;
+		      union internal_auxent *iaux;
+
+		      (*sym_hash)->symbol_class = sym.n_sclass;
+		      (*sym_hash)->auxbfd = abfd;
+		      (*sym_hash)->numaux = sym.n_numaux;
+		      alloc = ((union internal_auxent *)
+			       bfd_hash_allocate (&info->hash->table,
+						  (sym.n_numaux
+						   * sizeof (*alloc))));
+		      if (alloc == NULL)
+			goto error_return;
+		      for (i = 0, eaux = esym + symesz, iaux = alloc;
+			   i < sym.n_numaux;
+			   i++, eaux += symesz, iaux++)
+			bfd_coff_swap_aux_in (abfd, eaux, sym.n_type,
+					      sym.n_sclass, (int) i,
+					      sym.n_numaux, iaux);
+		      (*sym_hash)->aux = alloc;
+		    }
+		}
 	    }
 
 	  if (classification == COFF_SYMBOL_PE_SECTION