[v4,20/22] Convert dwarf_cu::die_hash to new hash table

Message ID 20240823184910.883268-21-simon.marchi@efficios.com
State New
Headers
Series Add a C++ hash table to gdbsupport |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gdb_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 success Test passed

Commit Message

Simon Marchi Aug. 23, 2024, 6:48 p.m. UTC
  Convert one use of htab_t, mapping offsets to die_info object, to
`gdb::unordered_map<sect_offset, die_info *>`.

Change-Id: Ic80df22bda551e2d4c2511d167e057f4d6cd2b3e
---
 gdb/dwarf2/cu.h   |  2 +-
 gdb/dwarf2/die.c  | 21 ---------------------
 gdb/dwarf2/die.h  |  8 --------
 gdb/dwarf2/read.c | 39 ++++++++++++---------------------------
 4 files changed, 13 insertions(+), 57 deletions(-)
  

Comments

Tom Tromey Oct. 4, 2024, 8:49 p.m. UTC | #1
>>>>> "Simon" == Simon Marchi <simon.marchi@efficios.com> writes:

Simon> Convert one use of htab_t, mapping offsets to die_info object, to
Simon> `gdb::unordered_map<sect_offset, die_info *>`.

Simon> -  htab_up die_hash;
Simon> +  gdb::unordered_map<sect_offset, die_info *> die_hash;
 
This is the one that seems to convert from a set (with identity based on
a member) to a map.

Unfortunately though this one seems like it could be pretty expensive?
In a large program there are a lot of DIEs.

Is it really not possible to do this using the DIE's own sect_off?

Tom
  
Simon Marchi Oct. 7, 2024, 5:35 p.m. UTC | #2
On 2024-10-04 16:49, Tom Tromey wrote:
>>>>>> "Simon" == Simon Marchi <simon.marchi@efficios.com> writes:
> 
> Simon> Convert one use of htab_t, mapping offsets to die_info object, to
> Simon> `gdb::unordered_map<sect_offset, die_info *>`.
> 
> Simon> -  htab_up die_hash;
> Simon> +  gdb::unordered_map<sect_offset, die_info *> die_hash;
>  
> This is the one that seems to convert from a set (with identity based on
> a member) to a map.
> 
> Unfortunately though this one seems like it could be pretty expensive?
> In a large program there are a lot of DIEs.
> 
> Is it really not possible to do this using the DIE's own sect_off?

It is possible, see below for what a patch doing this would look like.

It's a bit more explicit code, since you need to specify an explicit key
hash and equal type, and the code is a bit less obvious, but for
something like this, which is expected to have a lot of elements, I
agree it's probably worth the tradeoff.  Using a map means storing
`std::pair<sect_off, die_info *>` instead of just `die_info *`.

If there are cases where we expect to have only a few elements in the
map, I'd say the tradeoff balance could go towards simpler and clearer
code.


From c3fd4486cfaaed3baeffe1df08bef0577d38829d Mon Sep 17 00:00:00 2001
From: Simon Marchi <simon.marchi@efficios.com>
Date: Thu, 22 Aug 2024 13:37:28 -0400
Subject: [PATCH] Convert dwarf_cu::die_hash to new hash table

Convert one use of htab_t, mapping offsets to die_info object, to
`gdb::unordered_set`.

Change-Id: Ic80df22bda551e2d4c2511d167e057f4d6cd2b3e
---
 gdb/dwarf2/cu.h    |  5 ++++-
 gdb/dwarf2/die.c   | 21 ---------------------
 gdb/dwarf2/die.h   | 37 ++++++++++++++++++++++++++++---------
 gdb/dwarf2/read.c  | 40 ++++++++++++----------------------------
 gdb/dwarf2/types.h | 10 ++++++++++
 5 files changed, 54 insertions(+), 59 deletions(-)

diff --git a/gdb/dwarf2/cu.h b/gdb/dwarf2/cu.h
index ea8e14770bd2..23b06dc1fb68 100644
--- a/gdb/dwarf2/cu.h
+++ b/gdb/dwarf2/cu.h
@@ -25,6 +25,7 @@
 #include <optional>
 #include "language.h"
 #include "gdbsupport/unordered_set.h"
+#include "dwarf2/die.h"
 
 /* Type used for delaying computation of method physnames.
    See comments for compute_delayed_physnames.  */
@@ -153,7 +154,9 @@ struct dwarf2_cu
 
   /* A hash table of DIE cu_offset for following references with
      die_info->offset.sect_off as hash.  */
-  htab_up die_hash;
+  using die_hash_t = gdb::unordered_set<die_info *, die_info_hash_sect_off,
+					die_info_eq_sect_off>;
+  die_hash_t die_hash;
 
   /* Full DIEs if read in.  */
   struct die_info *dies = nullptr;
diff --git a/gdb/dwarf2/die.c b/gdb/dwarf2/die.c
index bfa54e517abb..500d7bfe0818 100644
--- a/gdb/dwarf2/die.c
+++ b/gdb/dwarf2/die.c
@@ -35,27 +35,6 @@ die_info::allocate (struct obstack *obstack, int num_attrs)
   return die;
 }
 
-/* See die.h.  */
-
-hashval_t
-die_info::hash (const void *item)
-{
-  const struct die_info *die = (const struct die_info *) item;
-
-  return to_underlying (die->sect_off);
-}
-
-/* See die.h.  */
-
-int
-die_info::eq (const void *item_lhs, const void *item_rhs)
-{
-  const struct die_info *die_lhs = (const struct die_info *) item_lhs;
-  const struct die_info *die_rhs = (const struct die_info *) item_rhs;
-
-  return die_lhs->sect_off == die_rhs->sect_off;
-}
-
 static void
 dump_die_shallow (struct ui_file *f, int indent, struct die_info *die)
 {
diff --git a/gdb/dwarf2/die.h b/gdb/dwarf2/die.h
index d4eab0838bfe..76aa8808062c 100644
--- a/gdb/dwarf2/die.h
+++ b/gdb/dwarf2/die.h
@@ -22,7 +22,6 @@
 
 #include "complaints.h"
 #include "dwarf2/attribute.h"
-#include "hashtab.h"
 
 /* This data structure holds a complete die structure.  */
 struct die_info
@@ -31,14 +30,6 @@ struct die_info
      attributes that are needed.  */
   static die_info *allocate (struct obstack *obstack, int num_attrs);
 
-  /* Trivial hash function for die_info: the hash value of a DIE is
-     its offset in .debug_info for this objfile.  */
-  static hashval_t hash (const void *item);
-
-  /* Trivial comparison function for die_info structures: two DIEs
-     are equal if they have the same offset.  */
-  static int eq (const void *item_lhs, const void *item_rhs);
-
   /* Dump this DIE and any children to MAX_LEVEL.  They are written to
      gdb_stdlog.  Note this is called from the pdie user command in
      gdb-gdb.gdb.  */
@@ -148,4 +139,32 @@ struct die_info
   struct attribute attrs[1];
 };
 
+/* Key hash type to store die_info objects in gdb::unordered_set, identified by
+   their section offsets.  */
+
+struct die_info_hash_sect_off final
+{
+  using is_transparent = void;
+
+  std::uint64_t operator() (const die_info *die) const noexcept
+  { return (*this) (die->sect_off); }
+
+  std::uint64_t operator() (sect_offset offset) const noexcept
+  { return sect_offset_hash (offset); }
+};
+
+/* Key equal type to store die_info objects in gdb::unordered_set, identified by
+   their section offsets.  */
+
+struct die_info_eq_sect_off final
+{
+  using is_transparent = void;
+
+  bool operator() (const die_info *a, const die_info *b) const noexcept
+  { return (*this) (a->sect_off, b); }
+
+  bool operator() (sect_offset offset, const die_info *die) const noexcept
+  { return offset == die->sect_off; }
+};
+
 #endif /* GDB_DWARF2_DIE_H */
diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c
index 3edfed504763..0a8de3de0087 100644
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -96,7 +96,6 @@
 #include "run-on-main-thread.h"
 #include "dwarf2/parent-map.h"
 #include "dwarf2/error.h"
-#include "gdbsupport/unordered_dense.h"
 
 /* When == 1, print basic high level tracing messages.
    When > 1, be more verbose.
@@ -5559,11 +5558,8 @@ load_full_comp_unit (dwarf2_per_cu_data *this_cu,
   struct dwarf2_cu *cu = reader.cu;
   const gdb_byte *info_ptr = reader.info_ptr;
 
-  gdb_assert (cu->die_hash == NULL);
-  cu->die_hash.reset (htab_create_alloc
-		      (cu->header.get_length_without_initial () / 12,
-		       die_info::hash, die_info::eq,
-		       nullptr, xcalloc, xfree));
+  gdb_assert (cu->die_hash.empty ());
+  cu->die_hash.reserve (cu->header.get_length_without_initial () / 12);
 
   if (reader.comp_unit_die->has_children)
     reader.comp_unit_die->child
@@ -15849,10 +15845,8 @@ read_die_and_children (const struct die_reader_specs *reader,
       return NULL;
     }
 
-  void **slot = htab_find_slot_with_hash (reader->cu->die_hash.get (), die,
-					  to_underlying (die->sect_off),
-					  INSERT);
-  *slot = die;
+  bool inserted = reader->cu->die_hash.emplace (die).second;
+  gdb_assert (inserted);
 
   if (die->has_children)
     die->child = read_die_and_siblings_1 (reader, cur_ptr, new_info_ptr, die);
@@ -20418,7 +20412,6 @@ static struct die_info *
 follow_die_offset (sect_offset sect_off, int offset_in_dwz,
 		   struct dwarf2_cu **ref_cu)
 {
-  struct die_info temp_die;
   struct dwarf2_cu *target_cu, *cu = *ref_cu;
   dwarf2_per_objfile *per_objfile = cu->per_objfile;
 
@@ -20479,11 +20472,9 @@ follow_die_offset (sect_offset sect_off, int offset_in_dwz,
     }
 
   *ref_cu = target_cu;
-  temp_die.sect_off = sect_off;
 
-  return (struct die_info *) htab_find_with_hash (target_cu->die_hash.get (),
-						  &temp_die,
-						  to_underlying (sect_off));
+  auto it = target_cu->die_hash.find (sect_off);
+  return it != target_cu->die_hash.end () ? *it : nullptr;
 }
 
 /* Follow reference attribute ATTR of SRC_DIE.
@@ -20842,9 +20833,7 @@ static struct die_info *
 follow_die_sig_1 (struct die_info *src_die, struct signatured_type *sig_type,
 		  struct dwarf2_cu **ref_cu)
 {
-  struct die_info temp_die;
   struct dwarf2_cu *sig_cu;
-  struct die_info *die;
   dwarf2_per_objfile *per_objfile = (*ref_cu)->per_objfile;
 
 
@@ -20865,11 +20854,9 @@ follow_die_sig_1 (struct die_info *src_die, struct signatured_type *sig_type,
   sig_cu = per_objfile->get_cu (sig_type);
   gdb_assert (sig_cu != NULL);
   gdb_assert (to_underlying (sig_type->type_offset_in_section) != 0);
-  temp_die.sect_off = sig_type->type_offset_in_section;
-  die = (struct die_info *) htab_find_with_hash (sig_cu->die_hash.get (),
-						 &temp_die,
-						 to_underlying (temp_die.sect_off));
-  if (die)
+
+  if (auto die_it = sig_cu->die_hash.find (sig_type->type_offset_in_section);
+      die_it != sig_cu->die_hash.end ())
     {
       /* For .gdb_index version 7 keep track of included TUs.
 	 http://sourceware.org/bugzilla/show_bug.cgi?id=15021.  */
@@ -20878,7 +20865,7 @@ follow_die_sig_1 (struct die_info *src_die, struct signatured_type *sig_type,
 	(*ref_cu)->per_cu->imported_symtabs.push_back (sig_cu->per_cu);
 
       *ref_cu = sig_cu;
-      return die;
+      return *die_it;
     }
 
   return NULL;
@@ -21060,11 +21047,8 @@ read_signatured_type (signatured_type *sig_type,
       struct dwarf2_cu *cu = reader.cu;
       const gdb_byte *info_ptr = reader.info_ptr;
 
-      gdb_assert (cu->die_hash == NULL);
-      cu->die_hash.reset (htab_create_alloc
-			  (cu->header.get_length_without_initial () / 12,
-			   die_info::hash, die_info::eq,
-			   nullptr, xcalloc, xfree));
+      gdb_assert (cu->die_hash.empty ());
+      cu->die_hash.reserve (cu->header.get_length_without_initial () / 12);
 
       if (reader.comp_unit_die->has_children)
 	reader.comp_unit_die->child
diff --git a/gdb/dwarf2/types.h b/gdb/dwarf2/types.h
index f0b9febba151..736873ef507f 100644
--- a/gdb/dwarf2/types.h
+++ b/gdb/dwarf2/types.h
@@ -37,4 +37,14 @@ sect_offset_str (sect_offset offset)
   return hex_string (to_underlying (offset));
 }
 
+/* Hash one sect_offset.  */
+
+static inline std::size_t
+sect_offset_hash (sect_offset offset)
+{
+  using underlying_type = std::underlying_type_t<sect_offset>;
+
+  return std::hash<underlying_type> () (to_underlying (offset));
+}
+
 #endif /* DWARF2_TYPES_H */

base-commit: e795770016b60b24845e49a58b00ff5ca27e9ca2
  
Simon Marchi Oct. 7, 2024, 6:05 p.m. UTC | #3
On 2024-10-07 13:35, Simon Marchi wrote:
> +static inline std::size_t
> +sect_offset_hash (sect_offset offset)
> +{
> +  using underlying_type = std::underlying_type_t<sect_offset>;
> +
> +  return std::hash<underlying_type> () (to_underlying (offset));
> +}

I think this new sect_offset_hash function is unnecessary, using
std::hash on `sect_offset` directly seems to work.

Simon
  

Patch

diff --git a/gdb/dwarf2/cu.h b/gdb/dwarf2/cu.h
index ea8e14770bd2..dc11be0e7d79 100644
--- a/gdb/dwarf2/cu.h
+++ b/gdb/dwarf2/cu.h
@@ -153,7 +153,7 @@  struct dwarf2_cu
 
   /* A hash table of DIE cu_offset for following references with
      die_info->offset.sect_off as hash.  */
-  htab_up die_hash;
+  gdb::unordered_map<sect_offset, die_info *> die_hash;
 
   /* Full DIEs if read in.  */
   struct die_info *dies = nullptr;
diff --git a/gdb/dwarf2/die.c b/gdb/dwarf2/die.c
index bfa54e517abb..500d7bfe0818 100644
--- a/gdb/dwarf2/die.c
+++ b/gdb/dwarf2/die.c
@@ -35,27 +35,6 @@  die_info::allocate (struct obstack *obstack, int num_attrs)
   return die;
 }
 
-/* See die.h.  */
-
-hashval_t
-die_info::hash (const void *item)
-{
-  const struct die_info *die = (const struct die_info *) item;
-
-  return to_underlying (die->sect_off);
-}
-
-/* See die.h.  */
-
-int
-die_info::eq (const void *item_lhs, const void *item_rhs)
-{
-  const struct die_info *die_lhs = (const struct die_info *) item_lhs;
-  const struct die_info *die_rhs = (const struct die_info *) item_rhs;
-
-  return die_lhs->sect_off == die_rhs->sect_off;
-}
-
 static void
 dump_die_shallow (struct ui_file *f, int indent, struct die_info *die)
 {
diff --git a/gdb/dwarf2/die.h b/gdb/dwarf2/die.h
index d4eab0838bfe..5a32aa94ea3c 100644
--- a/gdb/dwarf2/die.h
+++ b/gdb/dwarf2/die.h
@@ -31,14 +31,6 @@  struct die_info
      attributes that are needed.  */
   static die_info *allocate (struct obstack *obstack, int num_attrs);
 
-  /* Trivial hash function for die_info: the hash value of a DIE is
-     its offset in .debug_info for this objfile.  */
-  static hashval_t hash (const void *item);
-
-  /* Trivial comparison function for die_info structures: two DIEs
-     are equal if they have the same offset.  */
-  static int eq (const void *item_lhs, const void *item_rhs);
-
   /* Dump this DIE and any children to MAX_LEVEL.  They are written to
      gdb_stdlog.  Note this is called from the pdie user command in
      gdb-gdb.gdb.  */
diff --git a/gdb/dwarf2/read.c b/gdb/dwarf2/read.c
index 9ba995d654cc..4813dc23bb91 100644
--- a/gdb/dwarf2/read.c
+++ b/gdb/dwarf2/read.c
@@ -5491,11 +5491,8 @@  load_full_comp_unit (dwarf2_per_cu_data *this_cu,
   struct dwarf2_cu *cu = reader.cu;
   const gdb_byte *info_ptr = reader.info_ptr;
 
-  gdb_assert (cu->die_hash == NULL);
-  cu->die_hash.reset (htab_create_alloc
-		      (cu->header.get_length_without_initial () / 12,
-		       die_info::hash, die_info::eq,
-		       nullptr, xcalloc, xfree));
+  gdb_assert (cu->die_hash.empty ());
+  cu->die_hash.reserve (cu->header.get_length_without_initial () / 12);
 
   if (reader.comp_unit_die->has_children)
     reader.comp_unit_die->child
@@ -15767,10 +15764,8 @@  read_die_and_children (const struct die_reader_specs *reader,
       return NULL;
     }
 
-  void **slot = htab_find_slot_with_hash (reader->cu->die_hash.get (), die,
-					  to_underlying (die->sect_off),
-					  INSERT);
-  *slot = die;
+  bool inserted = reader->cu->die_hash.emplace (die->sect_off, die).second;
+  gdb_assert (inserted);
 
   if (die->has_children)
     die->child = read_die_and_siblings_1 (reader, cur_ptr, new_info_ptr, die);
@@ -20265,7 +20260,6 @@  static struct die_info *
 follow_die_offset (sect_offset sect_off, int offset_in_dwz,
 		   struct dwarf2_cu **ref_cu)
 {
-  struct die_info temp_die;
   struct dwarf2_cu *target_cu, *cu = *ref_cu;
   dwarf2_per_objfile *per_objfile = cu->per_objfile;
 
@@ -20321,11 +20315,9 @@  follow_die_offset (sect_offset sect_off, int offset_in_dwz,
     }
 
   *ref_cu = target_cu;
-  temp_die.sect_off = sect_off;
 
-  return (struct die_info *) htab_find_with_hash (target_cu->die_hash.get (),
-						  &temp_die,
-						  to_underlying (sect_off));
+  auto it = target_cu->die_hash.find (sect_off);
+  return it != target_cu->die_hash.end () ? it->second : nullptr;
 }
 
 /* Follow reference attribute ATTR of SRC_DIE.
@@ -20679,9 +20671,7 @@  static struct die_info *
 follow_die_sig_1 (struct die_info *src_die, struct signatured_type *sig_type,
 		  struct dwarf2_cu **ref_cu)
 {
-  struct die_info temp_die;
   struct dwarf2_cu *sig_cu;
-  struct die_info *die;
   dwarf2_per_objfile *per_objfile = (*ref_cu)->per_objfile;
 
 
@@ -20702,11 +20692,9 @@  follow_die_sig_1 (struct die_info *src_die, struct signatured_type *sig_type,
   sig_cu = per_objfile->get_cu (sig_type);
   gdb_assert (sig_cu != NULL);
   gdb_assert (to_underlying (sig_type->type_offset_in_section) != 0);
-  temp_die.sect_off = sig_type->type_offset_in_section;
-  die = (struct die_info *) htab_find_with_hash (sig_cu->die_hash.get (),
-						 &temp_die,
-						 to_underlying (temp_die.sect_off));
-  if (die)
+
+  auto die_it = sig_cu->die_hash.find (sig_type->type_offset_in_section);
+  if (die_it != sig_cu->die_hash.end ())
     {
       /* For .gdb_index version 7 keep track of included TUs.
 	 http://sourceware.org/bugzilla/show_bug.cgi?id=15021.  */
@@ -20715,7 +20703,7 @@  follow_die_sig_1 (struct die_info *src_die, struct signatured_type *sig_type,
 	(*ref_cu)->per_cu->imported_symtabs.push_back (sig_cu->per_cu);
 
       *ref_cu = sig_cu;
-      return die;
+      return die_it->second;
     }
 
   return NULL;
@@ -20891,11 +20879,8 @@  read_signatured_type (signatured_type *sig_type,
       struct dwarf2_cu *cu = reader.cu;
       const gdb_byte *info_ptr = reader.info_ptr;
 
-      gdb_assert (cu->die_hash == NULL);
-      cu->die_hash.reset (htab_create_alloc
-			  (cu->header.get_length_without_initial () / 12,
-			   die_info::hash, die_info::eq,
-			   nullptr, xcalloc, xfree));
+      gdb_assert (cu->die_hash.empty ());
+      cu->die_hash.reserve (cu->header.get_length_without_initial () / 12);
 
       if (reader.comp_unit_die->has_children)
 	reader.comp_unit_die->child