[v2] Add __libdw_getdieranges

Message ID 20240228011139.299975-1-amerey@redhat.com
State Committed
Headers
Series [v2] Add __libdw_getdieranges |

Commit Message

Aaron Merey Feb. 28, 2024, 1:11 a.m. UTC
  __libdw_getdieranges builds an aranges list by iterating over each
CU and recording each address range.

This function is an alternative to dwarf_getaranges.  dwarf_getaranges
attempts to read address ranges from .debug_aranges, which might be
absent or incomplete.

This patch replaces dwarf_getaranges with __libdw_getdieranges in
dwarf_addrdie and dwfl_module_addrdie.  The existing tests in
run-getsrc-die.sh are also rerun with .debug_aranges removed from
the testfiles.

https://sourceware.org/bugzilla/show_bug.cgi?id=22288
https://sourceware.org/bugzilla/show_bug.cgi?id=30948

Signed-off-by: Aaron Merey <amerey@redhat.com>
---

v2 addresses feedback from Mark's review:
https://sourceware.org/pipermail/elfutils-devel/2024q1/006853.html

Avoid calling free on arangelist when it's possibly corrupt.
Run tests in run-getsrc-die.sh twice, once with .debug_aranges
present in the testfile and once with the section removed.

 libdw/dwarf_addrdie.c    |   2 +-
 libdw/dwarf_getaranges.c | 186 ++++++++++++++++++++++++++++++---------
 libdw/libdwP.h           |  13 ++-
 libdwfl/cu.c             |   8 +-
 tests/run-getsrc-die.sh  |  22 ++++-
 5 files changed, 179 insertions(+), 52 deletions(-)
  

Comments

Mark Wielaard Feb. 29, 2024, 9:51 p.m. UTC | #1
Hi Aaron,

On Tue, Feb 27, 2024 at 08:11:39PM -0500, Aaron Merey wrote:
> __libdw_getdieranges builds an aranges list by iterating over each
> CU and recording each address range.
> 
> This function is an alternative to dwarf_getaranges.  dwarf_getaranges
> attempts to read address ranges from .debug_aranges, which might be
> absent or incomplete.
> 
> This patch replaces dwarf_getaranges with __libdw_getdieranges in
> dwarf_addrdie and dwfl_module_addrdie.  The existing tests in
> run-getsrc-die.sh are also rerun with .debug_aranges removed from
> the testfiles.
> 
> https://sourceware.org/bugzilla/show_bug.cgi?id=22288
> https://sourceware.org/bugzilla/show_bug.cgi?id=30948
> 
> Signed-off-by: Aaron Merey <amerey@redhat.com>
> ---
> 
> v2 addresses feedback from Mark's review:
> https://sourceware.org/pipermail/elfutils-devel/2024q1/006853.html
> 
> Avoid calling free on arangelist when it's possibly corrupt.
> Run tests in run-getsrc-die.sh twice, once with .debug_aranges
> present in the testfile and once with the section removed.

This looks good to me.
Please also add a NEWS entry about this.

Thanks,

Mark
  

Patch

diff --git a/libdw/dwarf_addrdie.c b/libdw/dwarf_addrdie.c
index 3a08ab75..48a1aaea 100644
--- a/libdw/dwarf_addrdie.c
+++ b/libdw/dwarf_addrdie.c
@@ -41,7 +41,7 @@  dwarf_addrdie (Dwarf *dbg, Dwarf_Addr addr, Dwarf_Die *result)
   size_t naranges;
   Dwarf_Off off;
 
-  if (INTUSE(dwarf_getaranges) (dbg, &aranges, &naranges) != 0
+  if (__libdw_getdieranges (dbg, &aranges, &naranges) != 0
       || INTUSE(dwarf_getarangeinfo) (INTUSE(dwarf_getarange_addr) (aranges,
 								    addr),
 				      NULL, NULL, &off) != 0)
diff --git a/libdw/dwarf_getaranges.c b/libdw/dwarf_getaranges.c
index 27439d37..7a3cd36b 100644
--- a/libdw/dwarf_getaranges.c
+++ b/libdw/dwarf_getaranges.c
@@ -33,7 +33,6 @@ 
 #endif
 
 #include <stdlib.h>
-#include <assert.h>
 #include "libdwP.h"
 #include <dwarf.h>
 
@@ -54,6 +53,147 @@  compare_aranges (const void *a, const void *b)
   return 0;
 }
 
+/* Convert ARANGELIST into Dwarf_Aranges and store at ARANGES.  */
+static bool
+finalize_aranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges,
+		  struct arangelist *arangelist, unsigned int narangelist)
+{
+  /* Allocate the array for the result.  */
+  void *buf = libdw_alloc (dbg, Dwarf_Aranges,
+			   sizeof (Dwarf_Aranges)
+			   + narangelist * sizeof (Dwarf_Arange), 1);
+
+  /* First use the buffer for the pointers, and sort the entries.
+     We'll write the pointers in the end of the buffer, and then
+     copy into the buffer from the beginning so the overlap works.  */
+  eu_static_assert (sizeof (Dwarf_Arange) >= sizeof (Dwarf_Arange *));
+  struct arangelist **sortaranges
+    = (buf + sizeof (Dwarf_Aranges)
+       + ((sizeof (Dwarf_Arange) - sizeof sortaranges[0]) * narangelist));
+
+  /* The list is in LIFO order and usually they come in clumps with
+     ascending addresses.  So fill from the back to probably start with
+     runs already in order before we sort.  */
+  unsigned int i = narangelist;
+  while (i-- > 0)
+    {
+      sortaranges[i] = arangelist;
+      arangelist = arangelist->next;
+    }
+
+  /* Something went wrong if narangelist is less then the actual length
+     of arangelist. */
+  if (arangelist != NULL)
+    {
+      __libdw_seterrno (DWARF_E_UNKNOWN_ERROR);
+      return false;
+    }
+
+  /* Sort by ascending address.  */
+  qsort (sortaranges, narangelist, sizeof sortaranges[0], &compare_aranges);
+
+  /* Now that they are sorted, put them in the final array.
+     The buffers overlap, so we've clobbered the early elements
+     of SORTARANGES by the time we're reading the later ones.  */
+  *aranges = buf;
+  (*aranges)->dbg = dbg;
+  (*aranges)->naranges = narangelist;
+  if (naranges != NULL)
+    *naranges = narangelist;
+  for (i = 0; i < narangelist; ++i)
+    {
+      struct arangelist *elt = sortaranges[i];
+      (*aranges)->info[i] = elt->arange;
+      free (elt);
+    }
+
+  return true;
+}
+
+int
+__libdw_getdieranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges)
+{
+  if (dbg == NULL)
+    return -1;
+
+  if (dbg->dieranges != NULL)
+    {
+      *aranges = dbg->dieranges;
+      if (naranges != NULL)
+	*naranges = dbg->dieranges->naranges;
+      return 0;
+    }
+
+  struct arangelist *arangelist = NULL;
+  unsigned int narangelist = 0;
+
+  Dwarf_CU *cu = NULL;
+  while (INTUSE(dwarf_get_units) (dbg, cu, &cu, NULL, NULL, NULL, NULL) == 0)
+    {
+      Dwarf_Addr base;
+      Dwarf_Addr low;
+      Dwarf_Addr high;
+
+      Dwarf_Die cudie = CUDIE (cu);
+
+      /* Skip CUs that only contain type information.  */
+      if (!INTUSE(dwarf_hasattr) (&cudie, DW_AT_low_pc)
+	  && !INTUSE(dwarf_hasattr) (&cudie, DW_AT_ranges))
+	continue;
+
+      ptrdiff_t offset = 0;
+
+      /* Add arange for each range list entry or high_pc and low_pc.  */
+      while ((offset = INTUSE(dwarf_ranges) (&cudie, offset,
+					     &base, &low, &high)) > 0)
+	{
+	  if (offset == -1)
+	    {
+	      __libdw_seterrno (DWARF_E_INVALID_DWARF);
+	      goto fail;
+	    }
+
+	  struct arangelist *new_arange = malloc (sizeof *new_arange);
+	  if (unlikely (new_arange == NULL))
+	    {
+	      __libdw_seterrno (DWARF_E_NOMEM);
+	      goto fail;
+	    }
+
+	  new_arange->arange.addr = low;
+	  new_arange->arange.length = (Dwarf_Word) (high - low);
+	  new_arange->arange.offset = __libdw_first_die_off_from_cu (cu);
+
+	  new_arange->next = arangelist;
+	  arangelist = new_arange;
+	  ++narangelist;
+	}
+    }
+
+  if (narangelist == 0)
+    {
+      if (naranges != NULL)
+	*naranges = 0;
+      *aranges = NULL;
+      return 0;
+    }
+
+  if (!finalize_aranges (dbg, aranges, naranges, arangelist, narangelist))
+    goto fail;
+
+  dbg->dieranges = *aranges;
+  return 0;
+
+fail:
+  while (arangelist != NULL)
+    {
+      struct arangelist *next = arangelist->next;
+      free (arangelist);
+      arangelist = next;
+    }
+  return -1;
+}
+
 int
 dwarf_getaranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges)
 {
@@ -235,56 +375,16 @@  dwarf_getaranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges)
 
   if (narangelist == 0)
     {
-      assert (arangelist == NULL);
       if (naranges != NULL)
 	*naranges = 0;
       *aranges = NULL;
       return 0;
     }
 
-  /* Allocate the array for the result.  */
-  void *buf = libdw_alloc (dbg, Dwarf_Aranges,
-			   sizeof (Dwarf_Aranges)
-			   + narangelist * sizeof (Dwarf_Arange), 1);
+  if (!finalize_aranges (dbg, aranges, naranges, arangelist, narangelist))
+    goto fail;
 
-  /* First use the buffer for the pointers, and sort the entries.
-     We'll write the pointers in the end of the buffer, and then
-     copy into the buffer from the beginning so the overlap works.  */
-  assert (sizeof (Dwarf_Arange) >= sizeof (Dwarf_Arange *));
-  struct arangelist **sortaranges
-    = (buf + sizeof (Dwarf_Aranges)
-       + ((sizeof (Dwarf_Arange) - sizeof sortaranges[0]) * narangelist));
-
-  /* The list is in LIFO order and usually they come in clumps with
-     ascending addresses.  So fill from the back to probably start with
-     runs already in order before we sort.  */
-  unsigned int i = narangelist;
-  while (i-- > 0)
-    {
-      sortaranges[i] = arangelist;
-      arangelist = arangelist->next;
-    }
-  assert (arangelist == NULL);
-
-  /* Sort by ascending address.  */
-  qsort (sortaranges, narangelist, sizeof sortaranges[0], &compare_aranges);
-
-  /* Now that they are sorted, put them in the final array.
-     The buffers overlap, so we've clobbered the early elements
-     of SORTARANGES by the time we're reading the later ones.  */
-  *aranges = buf;
-  (*aranges)->dbg = dbg;
-  (*aranges)->naranges = narangelist;
   dbg->aranges = *aranges;
-  if (naranges != NULL)
-    *naranges = narangelist;
-  for (i = 0; i < narangelist; ++i)
-    {
-      struct arangelist *elt = sortaranges[i];
-      (*aranges)->info[i] = elt->arange;
-      free (elt);
-    }
-
   return 0;
 }
 INTDEF(dwarf_getaranges)
diff --git a/libdw/libdwP.h b/libdw/libdwP.h
index 75e0283b..714e95e3 100644
--- a/libdw/libdwP.h
+++ b/libdw/libdwP.h
@@ -232,9 +232,12 @@  struct Dwarf
   /* Search tree for decoded .debug_line units.  */
   void *files_lines;
 
-  /* Address ranges.  */
+  /* Address ranges read from .debug_aranges.  */
   Dwarf_Aranges *aranges;
 
+  /* Address ranges inferred from CUs.  */
+  Dwarf_Aranges *dieranges;
+
   /* Cached info from the CFI section.  */
   struct Dwarf_CFI_s *cfi;
 
@@ -1472,4 +1475,12 @@  void __libdw_set_debugdir (Dwarf *dbg);
 char * __libdw_filepath (const char *debugdir, const char *dir,
 			 const char *file)
   internal_function;
+
+/* Like dwarf_getaranges but aranges are generated from CU address
+   ranges instead of being read from .debug_aranges.
+
+   Returns 0 if successful and updates ARANGES and NARANGES.  Otherwise
+   returns -1 and sets libdw_errno.
+*/
+int __libdw_getdieranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges);
 #endif	/* libdwP.h */
diff --git a/libdwfl/cu.c b/libdwfl/cu.c
index b1afb19a..06684357 100644
--- a/libdwfl/cu.c
+++ b/libdwfl/cu.c
@@ -39,7 +39,7 @@ 
 static inline Dwarf_Arange *
 dwar (Dwfl_Module *mod, unsigned int idx)
 {
-  return &mod->dw->aranges->info[mod->aranges[idx].arange];
+  return &mod->dw->dieranges->info[mod->aranges[idx].arange];
 }
 
 
@@ -51,7 +51,7 @@  addrarange (Dwfl_Module *mod, Dwarf_Addr addr, struct dwfl_arange **arange)
       struct dwfl_arange *aranges = NULL;
       Dwarf_Aranges *dwaranges = NULL;
       size_t naranges;
-      if (INTUSE(dwarf_getaranges) (mod->dw, &dwaranges, &naranges) != 0)
+      if (__libdw_getdieranges (mod->dw, &dwaranges, &naranges) != 0)
 	return DWFL_E_LIBDW;
 
       /* If the module has no aranges (when no code is included) we
@@ -119,7 +119,7 @@  addrarange (Dwfl_Module *mod, Dwarf_Addr addr, struct dwfl_arange **arange)
 	    {
 	      /* It might be in the last range.  */
 	      const Dwarf_Arange *last
-		= &mod->dw->aranges->info[mod->dw->aranges->naranges - 1];
+		= &mod->dw->dieranges->info[mod->dw->dieranges->naranges - 1];
 	      if (addr > last->addr + last->length)
 		break;
 	    }
@@ -296,7 +296,7 @@  arangecu (Dwfl_Module *mod, struct dwfl_arange *arange, struct dwfl_cu **cu)
 {
   if (arange->cu == NULL)
     {
-      const Dwarf_Arange *dwarange = &mod->dw->aranges->info[arange->arange];
+      const Dwarf_Arange *dwarange = &mod->dw->dieranges->info[arange->arange];
       Dwfl_Error result = intern_cu (mod, dwarange->offset, &arange->cu);
       if (result != DWFL_E_NOERROR)
 	return result;
diff --git a/tests/run-getsrc-die.sh b/tests/run-getsrc-die.sh
index 4da16e7a..54c7ad8c 100755
--- a/tests/run-getsrc-die.sh
+++ b/tests/run-getsrc-die.sh
@@ -22,13 +22,23 @@ 
 # which uses dwfl_module_getsrc. This test uses dwarf_addrdie and
 # dwarf_getsrc_die
 testfiles testfile testfile-inlines testfile-lex-inlines
+tempfiles testfile-no-aranges testfile-inlines-no-aranges
+tempfiles testfile-lex-inlines-no-aranges good.out getsrc_die.out
 
-testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile 0x08048468 0x0804845c <<\EOF
+# Each test should also pass with no .debug_aranges present.
+objcopy --remove-section .debug_aranges testfile testfile-no-aranges
+objcopy --remove-section .debug_aranges testfile-inlines testfile-inlines-no-aranges
+objcopy --remove-section .debug_aranges testfile-lex-inlines testfile-lex-inlines-no-aranges
+
+cat > good.out <<\EOF
 /home/drepper/gnu/new-bu/build/ttt/f.c:3
 /home/drepper/gnu/new-bu/build/ttt/b.c:4
 EOF
 
-testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile-inlines 0x00000000000005a0 0x00000000000005a1 0x00000000000005b0 0x00000000000005b1 0x00000000000005c0 0x00000000000005d0 0x00000000000005e0 0x00000000000005e1 0x00000000000005f1 0x00000000000005f2 <<\EOF
+cat good.out | testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile 0x08048468 0x0804845c
+cat good.out | testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile-no-aranges 0x08048468 0x0804845c
+
+cat > good.out <<\EOF
 /tmp/x.cpp:5
 /tmp/x.cpp:6
 /tmp/x.cpp:10
@@ -41,11 +51,17 @@  testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile-inlines 0x00000000
 /tmp/x.cpp:5
 EOF
 
-testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile-lex-inlines 0x0000000000000680 0x0000000000000681 0x0000000000000690 0x0000000000000691 <<\EOF
+cat good.out | testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile-inlines 0x00000000000005a0 0x00000000000005a1 0x00000000000005b0 0x00000000000005b1 0x00000000000005c0 0x00000000000005d0 0x00000000000005e0 0x00000000000005e1 0x00000000000005f1 0x00000000000005f2
+cat good.out | testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile-inlines-no-aranges 0x00000000000005a0 0x00000000000005a1 0x00000000000005b0 0x00000000000005b1 0x00000000000005c0 0x00000000000005d0 0x00000000000005e0 0x00000000000005e1 0x00000000000005f1 0x00000000000005f2
+
+cat > good.out <<\EOF
 /tmp/x.cpp:5
 /tmp/x.cpp:5
 /tmp/x.cpp:5
 /tmp/x.cpp:5
 EOF
 
+cat good.out | testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile-lex-inlines 0x0000000000000680 0x0000000000000681 0x0000000000000690 0x0000000000000691
+cat good.out | testrun_compare ${abs_top_builddir}/tests/getsrc_die testfile-lex-inlines-no-aranges 0x0000000000000680 0x0000000000000681 0x0000000000000690 0x0000000000000691
+
 exit 0