[10/14] libdw: Try .dwp file in __libdw_find_split_unit()

Message ID 45e13e025847b8fb84cd8acfe66cae87961993d4.1695837512.git.osandov@fb.com
State Superseded
Delegated to: Mark Wielaard
Headers
Series elfutils: DWARF package (.dwp) file support |

Commit Message

Omar Sandoval Sept. 27, 2023, 6:20 p.m. UTC
  From: Omar Sandoval <osandov@fb.com>

Try opening the file in the location suggested by the standard (the
skeleton file name + ".dwp") and looking up the unit in the package
index.  The rest is similar to .dwo files, with slightly different
cleanup since a single Dwarf handle is shared.

Signed-off-by: Omar Sandoval <osandov@fb.com>
---
 libdw/ChangeLog                   | 11 ++++-
 libdw/dwarf_begin_elf.c           |  1 +
 libdw/dwarf_cu_dwp_section_info.c | 19 ++++++++
 libdw/dwarf_end.c                 | 10 ++++-
 libdw/libdwP.h                    | 16 ++++++-
 libdw/libdw_find_split_unit.c     | 75 ++++++++++++++++++++++++++++---
 6 files changed, 122 insertions(+), 10 deletions(-)
  

Comments

Mark Wielaard Nov. 2, 2023, 7:56 p.m. UTC | #1
Hi Omar,

On Wed, Sep 27, 2023 at 11:20:59AM -0700, Omar Sandoval wrote:
> Try opening the file in the location suggested by the standard (the
> skeleton file name + ".dwp") and looking up the unit in the package
> index.  The rest is similar to .dwo files, with slightly different
> cleanup since a single Dwarf handle is shared.

This seems a good default given it is what the standard says.
But do you know of any distro doing this?

The case I am wondering about is for separate .debug files (which
surprisingly isn't standardized). In that case this would be
foobar.debug.dwz (and not foobar.dwz).

It might also be an idea to just create one file with both the
skeletons and the .dwo and dwz index sections.
 
> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
>  libdw/ChangeLog                   | 11 ++++-
>  libdw/dwarf_begin_elf.c           |  1 +
>  libdw/dwarf_cu_dwp_section_info.c | 19 ++++++++
>  libdw/dwarf_end.c                 | 10 ++++-
>  libdw/libdwP.h                    | 16 ++++++-
>  libdw/libdw_find_split_unit.c     | 75 ++++++++++++++++++++++++++++---
>  6 files changed, 122 insertions(+), 10 deletions(-)
> 
> diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> index f491587f..f37d9411 100644
> --- a/libdw/ChangeLog
> +++ b/libdw/ChangeLog
> @@ -1,6 +1,8 @@
>  2023-09-27  Omar Sandoval  <osandov@fb.com>
>  
>  	* libdw_find_split_unit.c (try_split_file): Make static.
> +	(try_dwp_file): New function.
> +	(__libdw_find_split_unit): Call try_dwp_file.
>  	* dwarf_entrypc.c (dwarf_entrypc): Call dwarf_lowpc.
>  	* dwarf_ranges.c (dwarf_ranges): Use skeleton ranges section for
>  	skeleton units.
> @@ -20,7 +22,8 @@
>  	instead of dbg parameter, which is now unused.
>  	* libdwP.h (Dwarf_Macro_Op_Table): Replace is_64bit with address_size
>  	and offset_size.  Add dbg.
> -	(Dwarf): Add cu_index and tu_index.  Add elfpath.
> +	(Dwarf): Add cu_index and tu_index.  Add elfpath.  Add dwp_dwarf and
> +	dwp_fd.
>  	(Dwarf_CU): Add dwp_row.
>  	(Dwarf_Package_Index): New type.
>  	(DW_SECT_TYPES): New macro.
> @@ -31,6 +34,8 @@
>  	(__libdw_debugdir): Replace declaration with...
>  	(__libdw_elfpath): New declaration.
>  	(__libdw_set_debugdir): New declaration.
> +	(__libdw_dwp_findcu_id): New declaration.
> +	(__libdw_link_skel_split): Handle .debug_addr for dwp.
>  	* dwarf_begin_elf.c (dwarf_scnnames): Add IDX_debug_cu_index and
>  	IDX_debug_tu_index.
>  	(scn_to_string_section_idx): Ditto.
> @@ -43,9 +48,11 @@
>  	(__libdw_set_debugdir): New function.
>  	(valid_p): Call __libdw_elfpath and __libdw_set_debugdir instead of
>  	__libdw_debugdir.
> +	(dwarf_begin_elf): Initialize result->dwp_fd.
>  	* Makefile.am (libdw_a_SOURCES): Add dwarf_cu_dwp_section_info.c.
>  	* dwarf_end.c (dwarf_end): Free dwarf->cu_index and dwarf->tu_index.
> -	Free dwarf->elfpath.
> +	Free dwarf->elfpath.  Free dwarf->dwp_dwarf and close dwarf->dwp_fd.
> +	(cu_free): Don't free split dbg if it is dwp_dwarf.
>  	* dwarf_error.c (errmsgs): Add DWARF_E_UNKNOWN_SECTION.
>  	* libdw.h (dwarf_cu_dwp_section_info): New declaration.
>  	* libdw.map (ELFUTILS_0.190): Add dwarf_cu_dwp_section_info.
> diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c
> index 323a91d0..ca2b7e2a 100644
> --- a/libdw/dwarf_begin_elf.c
> +++ b/libdw/dwarf_begin_elf.c
> @@ -567,6 +567,7 @@ dwarf_begin_elf (Elf *elf, Dwarf_Cmd cmd, Elf_Scn *scngrp)
>  
>    result->elf = elf;
>    result->alt_fd = -1;
> +  result->dwp_fd = -1;
>  
>    /* Initialize the memory handling.  Initial blocks are allocated on first
>       actual allocation.  */
> diff --git a/libdw/dwarf_cu_dwp_section_info.c b/libdw/dwarf_cu_dwp_section_info.c
> index 6766fb9a..7bf08d9d 100644
> --- a/libdw/dwarf_cu_dwp_section_info.c
> +++ b/libdw/dwarf_cu_dwp_section_info.c
> @@ -340,6 +340,25 @@ __libdw_dwp_find_unit (Dwarf *dbg, bool debug_types, Dwarf_Off off,
>  				   abbrev_offsetp, NULL);
>  }
>  
> +Dwarf_CU *
> +internal_function
> +__libdw_dwp_findcu_id (Dwarf *dbg, uint64_t unit_id8)
> +{
> +  Dwarf_Package_Index *index = __libdw_package_index (dbg, false);
> +  uint32_t unit_row;
> +  Dwarf_Off offset;
> +  Dwarf_CU *cu;
> +  if (__libdw_dwp_unit_row (index, unit_id8, &unit_row) == 0
> +      && __libdw_dwp_section_info (index, unit_row, DW_SECT_INFO, &offset,
> +				   NULL) == 0
> +      && (cu = __libdw_findcu (dbg, offset, false)) != NULL
> +      && cu->unit_type == DW_UT_split_compile
> +      && cu->unit_id8 == unit_id8)
> +    return cu;
> +  else
> +    return NULL;
> +}

No lookup for split_type?

>  int
>  dwarf_cu_dwp_section_info (Dwarf_CU *cu, unsigned int section,
>  			   Dwarf_Off *offsetp, Dwarf_Off *sizep)
> diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
> index b7f817d9..78224ddb 100644
> --- a/libdw/dwarf_end.c
> +++ b/libdw/dwarf_end.c
> @@ -66,7 +66,9 @@ cu_free (void *arg)
>  	  /* The fake_addr_cu might be shared, only release one.  */
>  	  if (p->dbg->fake_addr_cu == p->split->dbg->fake_addr_cu)
>  	    p->split->dbg->fake_addr_cu = NULL;
> -	  INTUSE(dwarf_end) (p->split->dbg);
> +	  /* There is only one DWP file. We free it later.  */
> +	  if (p->split->dbg != p->dbg->dwp_dwarf)
> +	    INTUSE(dwarf_end) (p->split->dbg);
>  	}
>      }
>  }
> @@ -147,6 +149,12 @@ dwarf_end (Dwarf *dwarf)
>  	  close (dwarf->alt_fd);
>  	}
>  
> +      if (dwarf->dwp_fd != -1)
> +	{
> +	  INTUSE(dwarf_end) (dwarf->dwp_dwarf);
> +	  close (dwarf->dwp_fd);
> +	}
> +
>        /* The cached path and dir we found the Dwarf ELF file in.  */
>        free (dwarf->elfpath);
>        free (dwarf->debugdir);

OK

> diff --git a/libdw/libdwP.h b/libdw/libdwP.h
> index 214d1711..16355274 100644
> --- a/libdw/libdwP.h
> +++ b/libdw/libdwP.h
> @@ -180,6 +180,9 @@ struct Dwarf
>    /* dwz alternate DWARF file.  */
>    Dwarf *alt_dwarf;
>  
> +  /* DWARF package file.  */
> +  Dwarf *dwp_dwarf;
> +
>    /* The section data.  */
>    Elf_Data *sectiondata[IDX_last];
>  
> @@ -197,6 +200,9 @@ struct Dwarf
>       close this file descriptor.  */
>    int alt_fd;
>  
> +  /* File descriptor of DWARF package file.  */
> +  int dwp_fd;
> +
>    /* Information for traversing the .debug_pubnames section.  This is
>       an array and separately allocated with malloc.  */
>    struct pubnames_s
> @@ -719,6 +725,10 @@ extern int __libdw_dwp_find_unit (Dwarf *dbg, bool debug_types, Dwarf_Off off,
>  				  Dwarf_Off *abbrev_offsetp)
>       __nonnull_attribute__ (1, 7, 8) internal_function;
>  
> +/* Find the compilation unit in a DWARF package file with the given id.  */
> +extern Dwarf_CU *__libdw_dwp_findcu_id (Dwarf *dbg, uint64_t unit_id8)
> +     __nonnull_attribute__ (1) internal_function;
> +
>  /* Get abbreviation with given code.  */
>  extern Dwarf_Abbrev *__libdw_findabbrev (struct Dwarf_CU *cu,
>  					 unsigned int code)

OK.

> @@ -1373,8 +1383,10 @@ __libdw_link_skel_split (Dwarf_CU *skel, Dwarf_CU *split)
>       There is only one per split debug.  */
>    Dwarf *dbg = skel->dbg;
>    Dwarf *sdbg = split->dbg;
> -  if (sdbg->sectiondata[IDX_debug_addr] == NULL
> -      && dbg->sectiondata[IDX_debug_addr] != NULL)
> +  if (dbg->sectiondata[IDX_debug_addr] != NULL
> +      && (sdbg->sectiondata[IDX_debug_addr] == NULL
> +	  || (sdbg->sectiondata[IDX_debug_addr]
> +	      == dbg->sectiondata[IDX_debug_addr])))
>      {
>        sdbg->sectiondata[IDX_debug_addr]
>  	= dbg->sectiondata[IDX_debug_addr];

ehe? What exactly are we checking here?

> diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c
> index 533f8072..e1e78951 100644
> --- a/libdw/libdw_find_split_unit.c
> +++ b/libdw/libdw_find_split_unit.c
> @@ -85,6 +85,67 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
>      }
>  }
>  
> +static void
> +try_dwp_file (Dwarf_CU *cu)
> +{
> +  if (cu->dbg->dwp_dwarf == NULL)
> +    {
> +      if (cu->dbg->elfpath != NULL)
> +	{
> +	  /* The DWARF 5 standard says "the package file is typically placed in
> +	     the same directory as the application, and is given the same name
> +	     with a '.dwp' extension".  */
> +	  size_t elfpath_len = strlen (cu->dbg->elfpath);
> +	  char *dwp_path = malloc (elfpath_len + 5);
> +	  if (dwp_path == NULL)
> +	    {
> +	      __libdw_seterrno (DWARF_E_NOMEM);
> +	      return;
> +	    }

Yes, but the caller doesn't check for errors. So the seterrno is not
very useful.

> +	  memcpy (dwp_path, cu->dbg->elfpath, elfpath_len);
> +	  strcpy (dwp_path + elfpath_len, ".dwp");
> +	  int dwp_fd = open (dwp_path, O_RDONLY);
> +	  free (dwp_path);
> +	  if (dwp_fd != -1)
> +	    {
> +	      Dwarf *dwp_dwarf = dwarf_begin (dwp_fd, DWARF_C_READ);
> +	      /* There's no way to know whether we got the correct file until
> +		 we look up the unit, but it should at least be a dwp file.  */
> +	      if (dwp_dwarf != NULL
> +		  && (dwp_dwarf->sectiondata[IDX_debug_cu_index] != NULL
> +		      || dwp_dwarf->sectiondata[IDX_debug_tu_index] != NULL))
> +		{
> +		  cu->dbg->dwp_dwarf = dwp_dwarf;
> +		  cu->dbg->dwp_fd = dwp_fd;
> +		}
> +	      else
> +		close (dwp_fd);
> +	    }
> +	}
> +      if (cu->dbg->dwp_dwarf == NULL)
> +	cu->dbg->dwp_dwarf = (Dwarf *) -1;
> +    }
> +
> +  if (cu->dbg->dwp_dwarf != (Dwarf *) -1)
> +    {
> +      Dwarf_CU *split = __libdw_dwp_findcu_id (cu->dbg->dwp_dwarf,
> +					       cu->unit_id8);
> +      if (split != NULL)
> +	{
> +	  if (tsearch (split->dbg, &cu->dbg->split_tree,
> +		       __libdw_finddbg_cb) == NULL)
> +	    {
> +	      /* Something went wrong.  Don't link.  */
> +	      __libdw_seterrno (DWARF_E_NOMEM);
> +	      return;
> +	    }

Same as above.

> +	  /* Link skeleton and split compile units.  */
> +	  __libdw_link_skel_split (cu, split);
> +	}
> +    }
> +}

OK.

>  Dwarf_CU *
>  internal_function
>  __libdw_find_split_unit (Dwarf_CU *cu)
> @@ -98,14 +159,18 @@ __libdw_find_split_unit (Dwarf_CU *cu)
>       same id as the skeleton.  */
>    if (cu->unit_type == DW_UT_skeleton)
>      {
> +      /* First, try the dwp file.  */
> +      try_dwp_file (cu);
> +
>        Dwarf_Die cudie = CUDIE (cu);
>        Dwarf_Attribute dwo_name;
> -      /* It is fine if dwo_dir doesn't exists, but then dwo_name needs
> -	 to be an absolute path.  */
> -      if (dwarf_attr (&cudie, DW_AT_dwo_name, &dwo_name) != NULL
> -	  || dwarf_attr (&cudie, DW_AT_GNU_dwo_name, &dwo_name) != NULL)
> +      /* Try a dwo file.  It is fine if dwo_dir doesn't exist, but then
> +	 dwo_name needs to be an absolute path.  */
> +      if (cu->split == (Dwarf_CU *) -1
> +	  && (dwarf_attr (&cudie, DW_AT_dwo_name, &dwo_name) != NULL
> +	      || dwarf_attr (&cudie, DW_AT_GNU_dwo_name, &dwo_name) != NULL))
>  	{
> -	  /* First try the dwo file name in the same directory
> +	  /* Try the dwo file name in the same directory
>  	     as we found the skeleton file.  */
>  	  const char *dwo_file = dwarf_formstring (&dwo_name);
>  	  const char *debugdir = cu->dbg->debugdir;

OK. Because try_dwp_file will call __libdw_link_skel_split to set
cu->split.

This again will have to be analyzed carefully when we add the
thread-safety work.

Thanks,

Mark
  
Omar Sandoval Nov. 7, 2023, 7:07 a.m. UTC | #2
On Thu, Nov 02, 2023 at 08:56:14PM +0100, Mark Wielaard wrote:
> Hi Omar,
> 
> On Wed, Sep 27, 2023 at 11:20:59AM -0700, Omar Sandoval wrote:
> > Try opening the file in the location suggested by the standard (the
> > skeleton file name + ".dwp") and looking up the unit in the package
> > index.  The rest is similar to .dwo files, with slightly different
> > cleanup since a single Dwarf handle is shared.
> 
> This seems a good default given it is what the standard says.
> But do you know of any distro doing this?

I don't. As far as I know, Meta and Google are the only ones using dwp
widely, and we're putting it in the location recommended by the
standard. (I think some Rust tooling is starting to pick dwp up, too,
but I haven't looked into it much.)

> The case I am wondering about is for separate .debug files (which
> surprisingly isn't standardized). In that case this would be
> foobar.debug.dwz (and not foobar.dwz).
> 
> It might also be an idea to just create one file with both the
> skeletons and the .dwo and dwz index sections.
>  
> > Signed-off-by: Omar Sandoval <osandov@fb.com>
> > ---
> >  libdw/ChangeLog                   | 11 ++++-
> >  libdw/dwarf_begin_elf.c           |  1 +
> >  libdw/dwarf_cu_dwp_section_info.c | 19 ++++++++
> >  libdw/dwarf_end.c                 | 10 ++++-
> >  libdw/libdwP.h                    | 16 ++++++-
> >  libdw/libdw_find_split_unit.c     | 75 ++++++++++++++++++++++++++++---
> >  6 files changed, 122 insertions(+), 10 deletions(-)
> > 
> > diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> > index f491587f..f37d9411 100644
> > --- a/libdw/ChangeLog
> > +++ b/libdw/ChangeLog
> > @@ -1,6 +1,8 @@
> >  2023-09-27  Omar Sandoval  <osandov@fb.com>
> >  
> >  	* libdw_find_split_unit.c (try_split_file): Make static.
> > +	(try_dwp_file): New function.
> > +	(__libdw_find_split_unit): Call try_dwp_file.
> >  	* dwarf_entrypc.c (dwarf_entrypc): Call dwarf_lowpc.
> >  	* dwarf_ranges.c (dwarf_ranges): Use skeleton ranges section for
> >  	skeleton units.
> > @@ -20,7 +22,8 @@
> >  	instead of dbg parameter, which is now unused.
> >  	* libdwP.h (Dwarf_Macro_Op_Table): Replace is_64bit with address_size
> >  	and offset_size.  Add dbg.
> > -	(Dwarf): Add cu_index and tu_index.  Add elfpath.
> > +	(Dwarf): Add cu_index and tu_index.  Add elfpath.  Add dwp_dwarf and
> > +	dwp_fd.
> >  	(Dwarf_CU): Add dwp_row.
> >  	(Dwarf_Package_Index): New type.
> >  	(DW_SECT_TYPES): New macro.
> > @@ -31,6 +34,8 @@
> >  	(__libdw_debugdir): Replace declaration with...
> >  	(__libdw_elfpath): New declaration.
> >  	(__libdw_set_debugdir): New declaration.
> > +	(__libdw_dwp_findcu_id): New declaration.
> > +	(__libdw_link_skel_split): Handle .debug_addr for dwp.
> >  	* dwarf_begin_elf.c (dwarf_scnnames): Add IDX_debug_cu_index and
> >  	IDX_debug_tu_index.
> >  	(scn_to_string_section_idx): Ditto.
> > @@ -43,9 +48,11 @@
> >  	(__libdw_set_debugdir): New function.
> >  	(valid_p): Call __libdw_elfpath and __libdw_set_debugdir instead of
> >  	__libdw_debugdir.
> > +	(dwarf_begin_elf): Initialize result->dwp_fd.
> >  	* Makefile.am (libdw_a_SOURCES): Add dwarf_cu_dwp_section_info.c.
> >  	* dwarf_end.c (dwarf_end): Free dwarf->cu_index and dwarf->tu_index.
> > -	Free dwarf->elfpath.
> > +	Free dwarf->elfpath.  Free dwarf->dwp_dwarf and close dwarf->dwp_fd.
> > +	(cu_free): Don't free split dbg if it is dwp_dwarf.
> >  	* dwarf_error.c (errmsgs): Add DWARF_E_UNKNOWN_SECTION.
> >  	* libdw.h (dwarf_cu_dwp_section_info): New declaration.
> >  	* libdw.map (ELFUTILS_0.190): Add dwarf_cu_dwp_section_info.
> > diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c
> > index 323a91d0..ca2b7e2a 100644
> > --- a/libdw/dwarf_begin_elf.c
> > +++ b/libdw/dwarf_begin_elf.c
> > @@ -567,6 +567,7 @@ dwarf_begin_elf (Elf *elf, Dwarf_Cmd cmd, Elf_Scn *scngrp)
> >  
> >    result->elf = elf;
> >    result->alt_fd = -1;
> > +  result->dwp_fd = -1;
> >  
> >    /* Initialize the memory handling.  Initial blocks are allocated on first
> >       actual allocation.  */
> > diff --git a/libdw/dwarf_cu_dwp_section_info.c b/libdw/dwarf_cu_dwp_section_info.c
> > index 6766fb9a..7bf08d9d 100644
> > --- a/libdw/dwarf_cu_dwp_section_info.c
> > +++ b/libdw/dwarf_cu_dwp_section_info.c
> > @@ -340,6 +340,25 @@ __libdw_dwp_find_unit (Dwarf *dbg, bool debug_types, Dwarf_Off off,
> >  				   abbrev_offsetp, NULL);
> >  }
> >  
> > +Dwarf_CU *
> > +internal_function
> > +__libdw_dwp_findcu_id (Dwarf *dbg, uint64_t unit_id8)
> > +{
> > +  Dwarf_Package_Index *index = __libdw_package_index (dbg, false);
> > +  uint32_t unit_row;
> > +  Dwarf_Off offset;
> > +  Dwarf_CU *cu;
> > +  if (__libdw_dwp_unit_row (index, unit_id8, &unit_row) == 0
> > +      && __libdw_dwp_section_info (index, unit_row, DW_SECT_INFO, &offset,
> > +				   NULL) == 0
> > +      && (cu = __libdw_findcu (dbg, offset, false)) != NULL
> > +      && cu->unit_type == DW_UT_split_compile
> > +      && cu->unit_id8 == unit_id8)
> > +    return cu;
> > +  else
> > +    return NULL;
> > +}
> 
> No lookup for split_type?

Type units don't have a skeleton (except apparently in some early
prototypes of the DWARF4 GNU format), so there's no need to look them up
this way.

[snip]

> > @@ -1373,8 +1383,10 @@ __libdw_link_skel_split (Dwarf_CU *skel, Dwarf_CU *split)
> >       There is only one per split debug.  */
> >    Dwarf *dbg = skel->dbg;
> >    Dwarf *sdbg = split->dbg;
> > -  if (sdbg->sectiondata[IDX_debug_addr] == NULL
> > -      && dbg->sectiondata[IDX_debug_addr] != NULL)
> > +  if (dbg->sectiondata[IDX_debug_addr] != NULL
> > +      && (sdbg->sectiondata[IDX_debug_addr] == NULL
> > +	  || (sdbg->sectiondata[IDX_debug_addr]
> > +	      == dbg->sectiondata[IDX_debug_addr])))
> >      {
> >        sdbg->sectiondata[IDX_debug_addr]
> >  	= dbg->sectiondata[IDX_debug_addr];
> 
> ehe? What exactly are we checking here?

Oops, I could've sworn I added a comment here. A couple more lines of
context in the diff are helpful here:

@@ -1371,12 +1381,14 @@ __libdw_link_skel_split (Dwarf_CU *skel, Dwarf_CU *split)
   /* Get .debug_addr and addr_base greedy.
      We also need it for the fake addr cu.
      There is only one per split debug.  */
   Dwarf *dbg = skel->dbg;
   Dwarf *sdbg = split->dbg;
-  if (sdbg->sectiondata[IDX_debug_addr] == NULL
-      && dbg->sectiondata[IDX_debug_addr] != NULL)
+  if (dbg->sectiondata[IDX_debug_addr] != NULL
+      && (sdbg->sectiondata[IDX_debug_addr] == NULL
+	  || (sdbg->sectiondata[IDX_debug_addr]
+	      == dbg->sectiondata[IDX_debug_addr])))
     {
       sdbg->sectiondata[IDX_debug_addr]
 	= dbg->sectiondata[IDX_debug_addr];
       split->addr_base = __libdw_cu_addr_base (skel);
       sdbg->fake_addr_cu = dbg->fake_addr_cu;

This is supposed to copy the .debug_addr section of the skeleton file to
the split file, and also copy the .debug_addr base of the skeleton unit
to the split unit. The former only needs to be done once per file, but
for DWP the latter needs to be done for every unit.

So the check is "link the .debug_addr info if this is the first time
we're linking this split file OR are we linking this split file to the
same skeleton file."

I'll add a comment.

> > diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c
> > index 533f8072..e1e78951 100644
> > --- a/libdw/libdw_find_split_unit.c
> > +++ b/libdw/libdw_find_split_unit.c
> > @@ -85,6 +85,67 @@ try_split_file (Dwarf_CU *cu, const char *dwo_path)
> >      }
> >  }
> >  
> > +static void
> > +try_dwp_file (Dwarf_CU *cu)
> > +{
> > +  if (cu->dbg->dwp_dwarf == NULL)
> > +    {
> > +      if (cu->dbg->elfpath != NULL)
> > +	{
> > +	  /* The DWARF 5 standard says "the package file is typically placed in
> > +	     the same directory as the application, and is given the same name
> > +	     with a '.dwp' extension".  */
> > +	  size_t elfpath_len = strlen (cu->dbg->elfpath);
> > +	  char *dwp_path = malloc (elfpath_len + 5);
> > +	  if (dwp_path == NULL)
> > +	    {
> > +	      __libdw_seterrno (DWARF_E_NOMEM);
> > +	      return;
> > +	    }
> 
> Yes, but the caller doesn't check for errors. So the seterrno is not
> very useful.

I think I copied that from try_split_file. Should I update the caller?

[snip]

Thanks!
  

Patch

diff --git a/libdw/ChangeLog b/libdw/ChangeLog
index f491587f..f37d9411 100644
--- a/libdw/ChangeLog
+++ b/libdw/ChangeLog
@@ -1,6 +1,8 @@ 
 2023-09-27  Omar Sandoval  <osandov@fb.com>
 
 	* libdw_find_split_unit.c (try_split_file): Make static.
+	(try_dwp_file): New function.
+	(__libdw_find_split_unit): Call try_dwp_file.
 	* dwarf_entrypc.c (dwarf_entrypc): Call dwarf_lowpc.
 	* dwarf_ranges.c (dwarf_ranges): Use skeleton ranges section for
 	skeleton units.
@@ -20,7 +22,8 @@ 
 	instead of dbg parameter, which is now unused.
 	* libdwP.h (Dwarf_Macro_Op_Table): Replace is_64bit with address_size
 	and offset_size.  Add dbg.
-	(Dwarf): Add cu_index and tu_index.  Add elfpath.
+	(Dwarf): Add cu_index and tu_index.  Add elfpath.  Add dwp_dwarf and
+	dwp_fd.
 	(Dwarf_CU): Add dwp_row.
 	(Dwarf_Package_Index): New type.
 	(DW_SECT_TYPES): New macro.
@@ -31,6 +34,8 @@ 
 	(__libdw_debugdir): Replace declaration with...
 	(__libdw_elfpath): New declaration.
 	(__libdw_set_debugdir): New declaration.
+	(__libdw_dwp_findcu_id): New declaration.
+	(__libdw_link_skel_split): Handle .debug_addr for dwp.
 	* dwarf_begin_elf.c (dwarf_scnnames): Add IDX_debug_cu_index and
 	IDX_debug_tu_index.
 	(scn_to_string_section_idx): Ditto.
@@ -43,9 +48,11 @@ 
 	(__libdw_set_debugdir): New function.
 	(valid_p): Call __libdw_elfpath and __libdw_set_debugdir instead of
 	__libdw_debugdir.
+	(dwarf_begin_elf): Initialize result->dwp_fd.
 	* Makefile.am (libdw_a_SOURCES): Add dwarf_cu_dwp_section_info.c.
 	* dwarf_end.c (dwarf_end): Free dwarf->cu_index and dwarf->tu_index.
-	Free dwarf->elfpath.
+	Free dwarf->elfpath.  Free dwarf->dwp_dwarf and close dwarf->dwp_fd.
+	(cu_free): Don't free split dbg if it is dwp_dwarf.
 	* dwarf_error.c (errmsgs): Add DWARF_E_UNKNOWN_SECTION.
 	* libdw.h (dwarf_cu_dwp_section_info): New declaration.
 	* libdw.map (ELFUTILS_0.190): Add dwarf_cu_dwp_section_info.
diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c
index 323a91d0..ca2b7e2a 100644
--- a/libdw/dwarf_begin_elf.c
+++ b/libdw/dwarf_begin_elf.c
@@ -567,6 +567,7 @@  dwarf_begin_elf (Elf *elf, Dwarf_Cmd cmd, Elf_Scn *scngrp)
 
   result->elf = elf;
   result->alt_fd = -1;
+  result->dwp_fd = -1;
 
   /* Initialize the memory handling.  Initial blocks are allocated on first
      actual allocation.  */
diff --git a/libdw/dwarf_cu_dwp_section_info.c b/libdw/dwarf_cu_dwp_section_info.c
index 6766fb9a..7bf08d9d 100644
--- a/libdw/dwarf_cu_dwp_section_info.c
+++ b/libdw/dwarf_cu_dwp_section_info.c
@@ -340,6 +340,25 @@  __libdw_dwp_find_unit (Dwarf *dbg, bool debug_types, Dwarf_Off off,
 				   abbrev_offsetp, NULL);
 }
 
+Dwarf_CU *
+internal_function
+__libdw_dwp_findcu_id (Dwarf *dbg, uint64_t unit_id8)
+{
+  Dwarf_Package_Index *index = __libdw_package_index (dbg, false);
+  uint32_t unit_row;
+  Dwarf_Off offset;
+  Dwarf_CU *cu;
+  if (__libdw_dwp_unit_row (index, unit_id8, &unit_row) == 0
+      && __libdw_dwp_section_info (index, unit_row, DW_SECT_INFO, &offset,
+				   NULL) == 0
+      && (cu = __libdw_findcu (dbg, offset, false)) != NULL
+      && cu->unit_type == DW_UT_split_compile
+      && cu->unit_id8 == unit_id8)
+    return cu;
+  else
+    return NULL;
+}
+
 int
 dwarf_cu_dwp_section_info (Dwarf_CU *cu, unsigned int section,
 			   Dwarf_Off *offsetp, Dwarf_Off *sizep)
diff --git a/libdw/dwarf_end.c b/libdw/dwarf_end.c
index b7f817d9..78224ddb 100644
--- a/libdw/dwarf_end.c
+++ b/libdw/dwarf_end.c
@@ -66,7 +66,9 @@  cu_free (void *arg)
 	  /* The fake_addr_cu might be shared, only release one.  */
 	  if (p->dbg->fake_addr_cu == p->split->dbg->fake_addr_cu)
 	    p->split->dbg->fake_addr_cu = NULL;
-	  INTUSE(dwarf_end) (p->split->dbg);
+	  /* There is only one DWP file. We free it later.  */
+	  if (p->split->dbg != p->dbg->dwp_dwarf)
+	    INTUSE(dwarf_end) (p->split->dbg);
 	}
     }
 }
@@ -147,6 +149,12 @@  dwarf_end (Dwarf *dwarf)
 	  close (dwarf->alt_fd);
 	}
 
+      if (dwarf->dwp_fd != -1)
+	{
+	  INTUSE(dwarf_end) (dwarf->dwp_dwarf);
+	  close (dwarf->dwp_fd);
+	}
+
       /* The cached path and dir we found the Dwarf ELF file in.  */
       free (dwarf->elfpath);
       free (dwarf->debugdir);
diff --git a/libdw/libdwP.h b/libdw/libdwP.h
index 214d1711..16355274 100644
--- a/libdw/libdwP.h
+++ b/libdw/libdwP.h
@@ -180,6 +180,9 @@  struct Dwarf
   /* dwz alternate DWARF file.  */
   Dwarf *alt_dwarf;
 
+  /* DWARF package file.  */
+  Dwarf *dwp_dwarf;
+
   /* The section data.  */
   Elf_Data *sectiondata[IDX_last];
 
@@ -197,6 +200,9 @@  struct Dwarf
      close this file descriptor.  */
   int alt_fd;
 
+  /* File descriptor of DWARF package file.  */
+  int dwp_fd;
+
   /* Information for traversing the .debug_pubnames section.  This is
      an array and separately allocated with malloc.  */
   struct pubnames_s
@@ -719,6 +725,10 @@  extern int __libdw_dwp_find_unit (Dwarf *dbg, bool debug_types, Dwarf_Off off,
 				  Dwarf_Off *abbrev_offsetp)
      __nonnull_attribute__ (1, 7, 8) internal_function;
 
+/* Find the compilation unit in a DWARF package file with the given id.  */
+extern Dwarf_CU *__libdw_dwp_findcu_id (Dwarf *dbg, uint64_t unit_id8)
+     __nonnull_attribute__ (1) internal_function;
+
 /* Get abbreviation with given code.  */
 extern Dwarf_Abbrev *__libdw_findabbrev (struct Dwarf_CU *cu,
 					 unsigned int code)
@@ -1373,8 +1383,10 @@  __libdw_link_skel_split (Dwarf_CU *skel, Dwarf_CU *split)
      There is only one per split debug.  */
   Dwarf *dbg = skel->dbg;
   Dwarf *sdbg = split->dbg;
-  if (sdbg->sectiondata[IDX_debug_addr] == NULL
-      && dbg->sectiondata[IDX_debug_addr] != NULL)
+  if (dbg->sectiondata[IDX_debug_addr] != NULL
+      && (sdbg->sectiondata[IDX_debug_addr] == NULL
+	  || (sdbg->sectiondata[IDX_debug_addr]
+	      == dbg->sectiondata[IDX_debug_addr])))
     {
       sdbg->sectiondata[IDX_debug_addr]
 	= dbg->sectiondata[IDX_debug_addr];
diff --git a/libdw/libdw_find_split_unit.c b/libdw/libdw_find_split_unit.c
index 533f8072..e1e78951 100644
--- a/libdw/libdw_find_split_unit.c
+++ b/libdw/libdw_find_split_unit.c
@@ -85,6 +85,67 @@  try_split_file (Dwarf_CU *cu, const char *dwo_path)
     }
 }
 
+static void
+try_dwp_file (Dwarf_CU *cu)
+{
+  if (cu->dbg->dwp_dwarf == NULL)
+    {
+      if (cu->dbg->elfpath != NULL)
+	{
+	  /* The DWARF 5 standard says "the package file is typically placed in
+	     the same directory as the application, and is given the same name
+	     with a '.dwp' extension".  */
+	  size_t elfpath_len = strlen (cu->dbg->elfpath);
+	  char *dwp_path = malloc (elfpath_len + 5);
+	  if (dwp_path == NULL)
+	    {
+	      __libdw_seterrno (DWARF_E_NOMEM);
+	      return;
+	    }
+	  memcpy (dwp_path, cu->dbg->elfpath, elfpath_len);
+	  strcpy (dwp_path + elfpath_len, ".dwp");
+	  int dwp_fd = open (dwp_path, O_RDONLY);
+	  free (dwp_path);
+	  if (dwp_fd != -1)
+	    {
+	      Dwarf *dwp_dwarf = dwarf_begin (dwp_fd, DWARF_C_READ);
+	      /* There's no way to know whether we got the correct file until
+		 we look up the unit, but it should at least be a dwp file.  */
+	      if (dwp_dwarf != NULL
+		  && (dwp_dwarf->sectiondata[IDX_debug_cu_index] != NULL
+		      || dwp_dwarf->sectiondata[IDX_debug_tu_index] != NULL))
+		{
+		  cu->dbg->dwp_dwarf = dwp_dwarf;
+		  cu->dbg->dwp_fd = dwp_fd;
+		}
+	      else
+		close (dwp_fd);
+	    }
+	}
+      if (cu->dbg->dwp_dwarf == NULL)
+	cu->dbg->dwp_dwarf = (Dwarf *) -1;
+    }
+
+  if (cu->dbg->dwp_dwarf != (Dwarf *) -1)
+    {
+      Dwarf_CU *split = __libdw_dwp_findcu_id (cu->dbg->dwp_dwarf,
+					       cu->unit_id8);
+      if (split != NULL)
+	{
+	  if (tsearch (split->dbg, &cu->dbg->split_tree,
+		       __libdw_finddbg_cb) == NULL)
+	    {
+	      /* Something went wrong.  Don't link.  */
+	      __libdw_seterrno (DWARF_E_NOMEM);
+	      return;
+	    }
+
+	  /* Link skeleton and split compile units.  */
+	  __libdw_link_skel_split (cu, split);
+	}
+    }
+}
+
 Dwarf_CU *
 internal_function
 __libdw_find_split_unit (Dwarf_CU *cu)
@@ -98,14 +159,18 @@  __libdw_find_split_unit (Dwarf_CU *cu)
      same id as the skeleton.  */
   if (cu->unit_type == DW_UT_skeleton)
     {
+      /* First, try the dwp file.  */
+      try_dwp_file (cu);
+
       Dwarf_Die cudie = CUDIE (cu);
       Dwarf_Attribute dwo_name;
-      /* It is fine if dwo_dir doesn't exists, but then dwo_name needs
-	 to be an absolute path.  */
-      if (dwarf_attr (&cudie, DW_AT_dwo_name, &dwo_name) != NULL
-	  || dwarf_attr (&cudie, DW_AT_GNU_dwo_name, &dwo_name) != NULL)
+      /* Try a dwo file.  It is fine if dwo_dir doesn't exist, but then
+	 dwo_name needs to be an absolute path.  */
+      if (cu->split == (Dwarf_CU *) -1
+	  && (dwarf_attr (&cudie, DW_AT_dwo_name, &dwo_name) != NULL
+	      || dwarf_attr (&cudie, DW_AT_GNU_dwo_name, &dwo_name) != NULL))
 	{
-	  /* First try the dwo file name in the same directory
+	  /* Try the dwo file name in the same directory
 	     as we found the skeleton file.  */
 	  const char *dwo_file = dwarf_formstring (&dwo_name);
 	  const char *debugdir = cu->dbg->debugdir;