[2/2] dwarf_getaranges: Build aranges list from CUs instead of .debug_aranges

Message ID 20231207013504.40300-3-amerey@redhat.com
State Superseded
Headers
Series dwarf_getaranges: Build aranges list from CUs |

Commit Message

Aaron Merey Dec. 7, 2023, 1:35 a.m. UTC
  No longer use .debug_aranges to build the aranges list since it could be
absent or incomplete.

Instead build the aranges list by iterating over each CU and recording
each address range.

https://sourceware.org/bugzilla/show_bug.cgi?id=22288
https://sourceware.org/bugzilla/show_bug.cgi?id=30948

Signed-off-by: Aaron Merey <amerey@redhat.com>

---

This patch's method of building the aranges list is slower than simply
reading .debug_aranges.  On my machine, running eu-stack on a 2.9G
firefox core file takes about 8.7 seconds with this patch applied,
compared to about 3.3 seconds without this patch.

Ideally we could assume that .debug_aranges is complete if it is present
and build the aranges list via CU iteration only when .debug_aranges
is absent.  This would let us save time on gcc-compiled binaries, which
include complete .debug_aranges by default.

However the DWARF spec appears to permit partially complete
.debug_aranges [1].  We could improve performance by starting with a
potentially incomplete list built from .debug_aranges.  If a lookup
fails then search the CUs for missing aranges and add to the list
when found.

This approach would complicate the dwarf_get_aranges interface.  The
list it initially provides could no longer be assumed to be complete.
The number of elements in the list could change during calls to
dwarf_getarange{info, _addr}.  This would invalidate the naranges value
set by dwarf_getaranges.  The current API doesn't include a way to
communicate to the caller when narages changes and by how much.

Due to these complications I think it's better to simply ignore
.debug_aranges altogether and build the aranges table via CU iteration,
as is done in this patch.

[1] https://sourceware.org/bugzilla/show_bug.cgi?id=22288#c5

 libdw/dwarf_getaranges.c | 215 ++++++++++-----------------------------
 1 file changed, 52 insertions(+), 163 deletions(-)
  

Patch

diff --git a/libdw/dwarf_getaranges.c b/libdw/dwarf_getaranges.c
index 27439d37..8676f93b 100644
--- a/libdw/dwarf_getaranges.c
+++ b/libdw/dwarf_getaranges.c
@@ -33,7 +33,6 @@ 
 #endif
 
 #include <stdlib.h>
-#include <assert.h>
 #include "libdwP.h"
 #include <dwarf.h>
 
@@ -68,174 +67,51 @@  dwarf_getaranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges)
       return 0;
     }
 
-  if (dbg->sectiondata[IDX_debug_aranges] == NULL)
-    {
-      /* No such section.  */
-      *aranges = NULL;
-      if (naranges != NULL)
-	*naranges = 0;
-      return 0;
-    }
-
-  if (dbg->sectiondata[IDX_debug_aranges]->d_buf == NULL)
-    return -1;
-
   struct arangelist *arangelist = NULL;
   unsigned int narangelist = 0;
 
-  const unsigned char *readp = dbg->sectiondata[IDX_debug_aranges]->d_buf;
-  const unsigned char *readendp
-    = readp + dbg->sectiondata[IDX_debug_aranges]->d_size;
-
-  while (readp < readendp)
+  Dwarf_CU *cu = NULL;
+  while (INTUSE(dwarf_get_units) (dbg, cu, &cu, NULL, NULL, NULL, NULL) == 0)
     {
-      const unsigned char *hdrstart = readp;
-
-      /* Each entry starts with a header:
-
-	 1. A 4-byte or 12-byte length containing the length of the
-	 set of entries for this compilation unit, not including the
-	 length field itself. [...]
-
-	 2. A 2-byte version identifier containing the value 2 for
-	 DWARF Version 2.1.
-
-	 3. A 4-byte or 8-byte offset into the .debug_info section. [...]
-
-	 4. A 1-byte unsigned integer containing the size in bytes of
-	 an address (or the offset portion of an address for segmented
-	 addressing) on the target system.
-
-	 5. A 1-byte unsigned integer containing the size in bytes of
-	 a segment descriptor on the target system.  */
-      if (unlikely (readp + 4 > readendp))
-	goto invalid;
-
-      Dwarf_Word length = read_4ubyte_unaligned_inc (dbg, readp);
-      unsigned int length_bytes = 4;
-      if (length == DWARF3_LENGTH_64_BIT)
-	{
-	  if (unlikely (readp + 8 > readendp))
-	    goto invalid;
-
-	  length = read_8ubyte_unaligned_inc (dbg, readp);
-	  length_bytes = 8;
-	}
-      else if (unlikely (length >= DWARF3_LENGTH_MIN_ESCAPE_CODE
-			 && length <= DWARF3_LENGTH_MAX_ESCAPE_CODE))
-	goto invalid;
-
-      const unsigned char *endp = readp + length;
-      if (unlikely (endp > readendp))
-	goto invalid;
-
-      if (unlikely (readp + 2 > readendp))
-	goto invalid;
-
-      unsigned int version = read_2ubyte_unaligned_inc (dbg, readp);
-      if (version != 2)
-	{
-	invalid:
-	  __libdw_seterrno (DWARF_E_INVALID_DWARF);
-	fail:
-	  while (arangelist != NULL)
-	    {
-	      struct arangelist *next = arangelist->next;
-	      free (arangelist);
-	      arangelist = next;
-	    }
-	  return -1;
-	}
-
-      Dwarf_Word offset = 0;
-      if (__libdw_read_offset_inc (dbg,
-				   IDX_debug_aranges, &readp,
-				   length_bytes, &offset, IDX_debug_info, 4))
-	goto fail;
-
-      /* Next up two bytes for address and segment size.  */
-      if (readp + 2 > readendp)
-	goto invalid;
-
-      unsigned int address_size = *readp++;
-      if (unlikely (address_size != 4 && address_size != 8))
-	goto invalid;
-
-      /* We don't actually support segment selectors.  */
-      unsigned int segment_size = *readp++;
-      if (segment_size != 0)
-	goto invalid;
-
-      /* Round the address to the next multiple of 2*address_size.  */
-      readp += ((2 * address_size - ((readp - hdrstart) % (2 * address_size)))
-		% (2 * address_size));
-
-      while (1)
-	{
-	  Dwarf_Word range_address;
-	  Dwarf_Word range_length;
-
-	  if (__libdw_read_address_inc (dbg, IDX_debug_aranges, &readp,
-					address_size, &range_address))
-	    goto fail;
-
-	  if (readp + address_size > readendp)
-	    goto invalid;
-
-	  if (address_size == 4)
-	    range_length = read_4ubyte_unaligned_inc (dbg, readp);
-	  else
-	    range_length = read_8ubyte_unaligned_inc (dbg, readp);
-
-	  /* Two zero values mark the end.  But in some cases (bugs)
-	     there might be such entries in the middle of the table.
-	     Ignore and continue, we'll check the actual length of
-	     the table to see if we are really at the end.  */
-	  if (range_address == 0 && range_length == 0)
-	    {
-	      if (readp >= endp)
-		break;
-	      else
-		continue;
-	    }
-
-	  /* We don't use alloca for these temporary structures because
-	     the total number of them can be quite large.  */
-	  struct arangelist *new_arange = malloc (sizeof *new_arange);
-	  if (unlikely (new_arange == NULL))
-	    {
-	      __libdw_seterrno (DWARF_E_NOMEM);
-	      goto fail;
-	    }
-
-	  new_arange->arange.addr = range_address;
-	  new_arange->arange.length = range_length;
-
-	  /* We store the actual CU DIE offset, not the CU header offset.  */
-	  Dwarf_CU *cu = __libdw_findcu (dbg, offset, false);
-	  if (unlikely (cu == NULL))
-	    {
-	      /* We haven't gotten a chance to link in the new_arange
-		 into the arangelist, don't leak it.  */
-	      free (new_arange);
-	      goto fail;
-	    }
-	  new_arange->arange.offset = __libdw_first_die_off_from_cu (cu);
-
-	  new_arange->next = arangelist;
-	  arangelist = new_arange;
-	  ++narangelist;
-
-	  /* Sanity-check the data.  */
-	  if (unlikely (new_arange->arange.offset
-			>= dbg->sectiondata[IDX_debug_info]->d_size))
-	    goto invalid;
-	}
+      Dwarf_Addr base;
+      Dwarf_Addr low;
+      Dwarf_Addr high;
+
+      Dwarf_Die cudie = CUDIE (cu);
+
+      /* Skip CUs that only contain type information.  */
+      if (!INTUSE(dwarf_hasattr) (&cudie, DW_AT_low_pc)
+	  && !INTUSE(dwarf_hasattr) (&cudie, DW_AT_ranges))
+	continue;
+
+      ptrdiff_t offset = 0;
+      while ((offset = INTUSE(dwarf_ranges) (&cudie, offset,
+					     &base, &low, &high)) > 0)
+        {
+          if (offset == -1)
+            goto fail;
+
+          struct arangelist *new_arange = malloc (sizeof *new_arange);
+          if (unlikely (new_arange == NULL))
+            {
+              __libdw_seterrno (DWARF_E_NOMEM);
+              goto fail;
+            }
+
+          new_arange->arange.addr = low;
+          new_arange->arange.length = (Dwarf_Word) (high - low);
+          new_arange->arange.offset = __libdw_first_die_off_from_cu (cu);
+
+          new_arange->next = arangelist;
+          arangelist = new_arange;
+          ++narangelist;
+        }
     }
 
   if (narangelist == 0)
     {
-      assert (arangelist == NULL);
+      if (arangelist != NULL)
+	goto fail;
       if (naranges != NULL)
 	*naranges = 0;
       *aranges = NULL;
@@ -250,7 +126,7 @@  dwarf_getaranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges)
   /* First use the buffer for the pointers, and sort the entries.
      We'll write the pointers in the end of the buffer, and then
      copy into the buffer from the beginning so the overlap works.  */
-  assert (sizeof (Dwarf_Arange) >= sizeof (Dwarf_Arange *));
+  eu_static_assert (sizeof (Dwarf_Arange) >= sizeof (Dwarf_Arange *));
   struct arangelist **sortaranges
     = (buf + sizeof (Dwarf_Aranges)
        + ((sizeof (Dwarf_Arange) - sizeof sortaranges[0]) * narangelist));
@@ -264,7 +140,11 @@  dwarf_getaranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges)
       sortaranges[i] = arangelist;
       arangelist = arangelist->next;
     }
-  assert (arangelist == NULL);
+
+  /* Something went wrong if narangelist is less then the actual length
+     of arangelist. */
+  if (arangelist != NULL)
+    goto fail;
 
   /* Sort by ascending address.  */
   qsort (sortaranges, narangelist, sizeof sortaranges[0], &compare_aranges);
@@ -286,5 +166,14 @@  dwarf_getaranges (Dwarf *dbg, Dwarf_Aranges **aranges, size_t *naranges)
     }
 
   return 0;
+
+fail:
+  while (arangelist != NULL)
+    {
+      struct arangelist *next = arangelist->next;
+      free (arangelist);
+      arangelist = next;
+    }
+  return -1;
 }
 INTDEF(dwarf_getaranges)