diff mbox series

[v1,14/16] symtab_reader: add support for ppc64 ELFv1 binaries

Message ID 20200619214305.562-15-maennich@google.com
State Superseded
Headers show
Series Refactor (k)symtab reader | expand

Commit Message

Matthias Maennich June 19, 2020, 9:43 p.m. UTC
When loading the symtab from an ppc64 binary, also keep track of the
function entry addresses as a key for the symbol lookup. That
accommodates the differences in DWARF pointing to the function entry
address while the symbol table points to the function pointer.

The implementation is mostly copied and adopted from abg-dwarf-reader's
read_context to add this functionality also to the new symtab reader.

	* src/abg-symtab-reader.cc (symtab::lookup_symbol): fall back to
	  lookup the address in entry_addr_symbol_map_.
	  (symtab::load): update the function entry address map for
	  ppc64 targets.
	  (symtab::update_function_entry_address_symbol_map): New
	  function implementation.
	* src/abg-symtab-reader.h
	  (symtab::entry_addr_symbol_map_): New data member.
	  (symtab::update_function_entry_address_symbol_map): New
	  function declaration.

Reviewed-by: Giuliano Procida <gprocida@google.com>
Signed-off-by: Matthias Maennich <maennich@google.com>
---
 include/abg-symtab-reader.h |  8 ++++
 src/abg-symtab-reader.cc    | 93 +++++++++++++++++++++++++++++++++++--
 2 files changed, 98 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/include/abg-symtab-reader.h b/include/abg-symtab-reader.h
index 86335617d46a..06001b26ebeb 100644
--- a/include/abg-symtab-reader.h
+++ b/include/abg-symtab-reader.h
@@ -342,6 +342,9 @@  private:
 		       addr_symbol_map_type;
   addr_symbol_map_type addr_symbol_map_;
 
+  /// Lookup map function entry address -> symbol
+  addr_symbol_map_type entry_addr_symbol_map_;
+
   /// Load the symtab representation from an Elf binary presented to us by an
   /// Elf* handle.
   ///
@@ -366,6 +369,11 @@  private:
   bool
   load_(string_elf_symbols_map_sptr function_symbol_map,
        string_elf_symbols_map_sptr variables_symbol_map);
+
+  void
+  update_function_entry_address_symbol_map(Elf*	     elf_handle,
+					   GElf_Sym* native_symbol,
+					   const elf_symbol_sptr& symbol_sptr);
 };
 
 /// Helper class to allow range-for loops on symtabs for C++11 and later code.
diff --git a/src/abg-symtab-reader.cc b/src/abg-symtab-reader.cc
index c98b9174490c..aefc8d6dcd86 100644
--- a/src/abg-symtab-reader.cc
+++ b/src/abg-symtab-reader.cc
@@ -91,11 +91,16 @@  const elf_symbol_sptr&
 symtab::lookup_symbol(GElf_Addr symbol_addr) const
 {
   static const elf_symbol_sptr empty_result;
-  const addr_symbol_map_type::const_iterator it =
+  const addr_symbol_map_type::const_iterator addr_it =
       addr_symbol_map_.find(symbol_addr);
-  if (it != addr_symbol_map_.end())
+  if (addr_it != addr_symbol_map_.end())
+    return addr_it->second;
+  else
     {
-      return it->second;
+      const addr_symbol_map_type::const_iterator entry_it =
+	entry_addr_symbol_map_.find(symbol_addr);
+      if (entry_it != entry_addr_symbol_map_.end())
+	return entry_it->second;
     }
   return empty_result;
 }
@@ -172,6 +177,8 @@  symtab::load_(Elf*	       elf_handle,
   const bool is_kernel = elf_helpers::is_linux_kernel(elf_handle);
   abg_compat::unordered_set<std::string> exported_kernel_symbols;
 
+  const bool is_ppc64 = elf_helpers::architecture_is_ppc64(elf_handle);
+
   for (size_t i = 0; i < number_syms; ++i)
     {
       GElf_Sym *sym, sym_mem;
@@ -277,6 +284,10 @@  symtab::load_(Elf*	       elf_handle,
 	      elf_helpers::maybe_adjust_et_rel_sym_addr_to_abs_addr(elf_handle,
 								    sym);
 
+	  if (is_ppc64 && symbol_sptr->is_function())
+	    update_function_entry_address_symbol_map(elf_handle, sym,
+						     symbol_sptr);
+
 	  const std::pair<addr_symbol_map_type::const_iterator, bool> result =
 	      addr_symbol_map_.insert(
 		  std::make_pair(symbol_value, symbol_sptr));
@@ -345,5 +356,81 @@  symtab::load_(string_elf_symbols_map_sptr function_symbol_map,
   return true;
 }
 
+void
+symtab::update_function_entry_address_symbol_map(
+    Elf*		   elf_handle,
+    GElf_Sym*		   native_symbol,
+    const elf_symbol_sptr& symbol_sptr)
+{
+
+  // For ppc64 ELFv1 binaries, we need to build a function entry point address
+  // -> function symbol map. This is in addition to the function pointer ->
+  // symbol map.  This is because on ppc64 ELFv1, a function pointer is
+  // different from a function entry point address.
+  //
+  // On ppc64 ELFv1, the DWARF DIE of a function references the address of the
+  // entry point of the function symbol; whereas the value of the function
+  // symbol is the function pointer. As these addresses are different, if I we
+  // want to get to the symbol of a function from its entry point address (as
+  // referenced by DWARF function DIEs) we must have the two maps I mentionned
+  // right above.
+  //
+  // In other words, we need a map that associates a function entry point
+  // address with the symbol of that function, to be able to get the function
+  // symbol that corresponds to a given function DIE, on ppc64.
+  //
+  // The value of the function pointer (the value of the symbol) usually refers
+  // to the offset of a table in the .opd section.  But sometimes, for a symbol
+  // named "foo", the corresponding symbol named ".foo" (note the dot before
+  // foo) which value is the entry point address of the function; that entry
+  // point address refers to a region in the .text section.
+  //
+  // So we are only interested in values of the symbol that are in the .opd
+  // section.
+  const GElf_Addr fn_desc_addr = native_symbol->st_value;
+  const GElf_Addr fn_entry_point_addr =
+    elf_helpers::lookup_ppc64_elf_fn_entry_point_address(elf_handle,
+							 fn_desc_addr);
+
+  const std::pair<addr_symbol_map_type::const_iterator, bool>& result =
+    entry_addr_symbol_map_.insert(
+      std::make_pair(fn_entry_point_addr, symbol_sptr));
+
+  const addr_symbol_map_type::const_iterator it = result.first;
+  const bool was_inserted = result.second;
+  if (!was_inserted
+      && elf_helpers::address_is_in_opd_section(elf_handle, fn_desc_addr))
+    {
+      // Either
+      //
+      // 'symbol' must have been registered as an alias for
+      // it->second->get_main_symbol()
+      //
+      // Or
+      //
+      // if the name of 'symbol' is foo, then the name of it2->second is
+      // ".foo". That is, foo is the name of the symbol when it refers to the
+      // function descriptor in the .opd section and ".foo" is an internal name
+      // for the address of the entry point of foo.
+      //
+      // In the latter case, we just want to keep a reference to "foo" as .foo
+      // is an internal name.
+
+      const bool two_symbols_alias =
+	it->second->get_main_symbol()->does_alias(*symbol_sptr);
+      const bool symbol_is_foo_and_prev_symbol_is_dot_foo =
+	(it->second->get_name() == std::string(".") + symbol_sptr->get_name());
+
+      ABG_ASSERT(two_symbols_alias
+		 || symbol_is_foo_and_prev_symbol_is_dot_foo);
+
+      if (symbol_is_foo_and_prev_symbol_is_dot_foo)
+	// Let's just keep a reference of the symbol that the user sees in the
+	// source code (the one named foo). The symbol which name is prefixed
+	// with a "dot" is an artificial one.
+	entry_addr_symbol_map_[fn_entry_point_addr] = symbol_sptr;
+    }
+}
+
 } // end namespace symtab_reader
 } // end namespace abigail