PR28873 - Implement eu-readelf -D

Message ID CAN-Pu7QTXbWVZWiokCskdWoKChwUnP4ccQ4q4GU9YCL+KYj5qA@mail.gmail.com
State Committed
Headers
Series PR28873 - Implement eu-readelf -D |

Commit Message

Di Chen Feb. 10, 2023, 4:17 p.m. UTC
  From bdc19de94bff8f8812611b9ba8c0116a650d0fb5 Mon Sep 17 00:00:00 2001
From: Di Chen <dichen@redhat.com>
Date: Fri, 13 Jan 2023 20:12:43 +0800
Subject: [PATCH] readelf: display dynamic symtab without section headers

This commit adds a new option "-D/--use-dynamic" to support printing the
dynamic symbol table from the PT_DYNAMIC segment. By using the
PT_DYNAMIC segment, eu-readelf can go through the contents of dynamic
section entries and the values of each tag. From that, we can get the
address and size of the dynamic symbol table, the address of the string
table, etc.

By using the new option "-D/--use-dynamic", eu-readelf can list the
symbols without section headers.

Example:
  $ ./src/readelf -Ds a.out
      0: 0000000000000000      0 NOTYPE  LOCAL  DEFAULT    UNDEF
      1: 0000000000000000      0 FUNC    GLOBAL DEFAULT    UNDEF
__libc_start_main@GLIBC_2.34 (2)
      2: 0000000000000000      0 NOTYPE  WEAK   DEFAULT    UNDEF
__gmon_start__

https://sourceware.org/bugzilla/show_bug.cgi?id=28873

Signed-off-by: Di Chen <dichen@redhat.com>
---
 src/readelf.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 283 insertions(+), 6 deletions(-)

 static void handle_verdef (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr);
@@ -327,7 +328,9 @@ enum dyn_idx
 {
   i_strsz,
   i_verneed,
+  i_verneednum,
   i_verdef,
+  i_verdefnum,
   i_versym,
   i_symtab,
   i_strtab,
@@ -1042,7 +1045,7 @@ process_elf_file (Dwfl_Module *dwflmod, int fd)
     symtab_printed |= print_symtab (ebl, SHT_DYNSYM);
   if (print_version_info)
     print_verinfo (ebl);
-  if (print_symbol_table)
+  if (print_symbol_table && !use_dynamic_segment)
     symtab_printed |= print_symtab (ebl, SHT_SYMTAB);

   if ((print_symbol_table || print_dynsym_table)
@@ -2442,6 +2445,12 @@ handle_relocs_rela (Ebl *ebl, GElf_Ehdr *ehdr,
Elf_Scn *scn, GElf_Shdr *shdr)
 static bool
 print_symtab (Ebl *ebl, int type)
 {
+  /* Use the dynamic section info to display symbol tables.  */
+  if (use_dynamic_segment && type == SHT_DYNSYM)
+  {
+ return handle_dynamic_symtab(ebl);
+  }
+
   /* Find the symbol table(s).  For this we have to search through the
      section table.  */
   Elf_Scn *scn = NULL;
@@ -2480,16 +2489,275 @@ print_symtab (Ebl *ebl, int type)
     _("cannot get section [%zd] header: %s"),
     elf_ndxscn (scn), elf_errmsg (-1));
     }
-  handle_symtab (ebl, scn, shdr);
-  symtab_printed = true;
+  symtab_printed = handle_symtab (ebl, scn, shdr);
  }
     }

   return symtab_printed;
 }

+static bool
+handle_dynamic_symtab (Ebl *ebl)
+{
+  GElf_Phdr *phdr = NULL;
+  /* phnum is a static variable which already fetched in function
process_elf_file.  */
+  for (size_t i = 0; i < phnum; ++i) {
+    GElf_Phdr phdr_mem;
+    phdr = gelf_getphdr(ebl->elf, i, &phdr_mem);
+    if (phdr->p_type == PT_DYNAMIC) {
+      break;
+    }
+  }
+  if (phdr == NULL)
+ return false;

-static void
+  GElf_Addr addrs[i_max] = {0,};
+  GElf_Off offs[i_max] = {0,};
+  get_dynscn_addrs(ebl->elf, phdr, addrs);
+  find_offsets(ebl->elf, 0, i_max, addrs, offs);
+
+  size_t syments;
+
+  GElf_Ehdr ehdr_mem;
+  GElf_Ehdr *ehdr = gelf_getehdr(ebl->elf, &ehdr_mem);
+
+  if (offs[i_hash] != 0) {
+    /* In the original format, .hash says the size of .dynsym.  */
+
+    size_t entsz = SH_ENTSIZE_HASH(ehdr);
+    Elf_Data *data =
+        elf_getdata_rawchunk(ebl->elf, offs[i_hash] + entsz, entsz,
+                             (entsz == 4 ? ELF_T_WORD : ELF_T_XWORD));
+    if (data != NULL)
+      syments = (entsz == 4 ? *(const GElf_Word *)data->d_buf
+                            : *(const GElf_Xword *)data->d_buf);
+  }
+  if (offs[i_gnu_hash] != 0 && syments == 0) {
+    /* In the new format, we can derive it with some work.  */
+
+    const struct {
+      Elf32_Word nbuckets;
+      Elf32_Word symndx;
+      Elf32_Word maskwords;
+      Elf32_Word shift2;
+    } * header;
+
+    Elf_Data *data = elf_getdata_rawchunk(ebl->elf, offs[i_gnu_hash],
+                                          sizeof *header, ELF_T_WORD);
+    if (data != NULL) {
+      header = data->d_buf;
+      Elf32_Word nbuckets = header->nbuckets;
+      Elf32_Word symndx = header->symndx;
+      GElf_Off buckets_at = (offs[i_gnu_hash] + sizeof *header +
+                             (gelf_getclass(ebl->elf) * sizeof(Elf32_Word)
*
+                              header->maskwords));
+
+      // elf_getdata_rawchunk takes a size_t, make sure it
+      // doesn't overflow.
+  #if SIZE_MAX <= UINT32_MAX
+      if (nbuckets > SIZE_MAX / sizeof(Elf32_Word))
+        data = NULL;
+      else
+  #endif
+        data = elf_getdata_rawchunk(ebl->elf, buckets_at,
+                                    nbuckets * sizeof(Elf32_Word),
ELF_T_WORD);
+      if (data != NULL && symndx < nbuckets) {
+        const Elf32_Word *const buckets = data->d_buf;
+        Elf32_Word maxndx = symndx;
+        for (Elf32_Word bucket = 0; bucket < nbuckets; ++bucket)
+          if (buckets[bucket] > maxndx)
+            maxndx = buckets[bucket];
+
+        GElf_Off hasharr_at = (buckets_at + nbuckets * sizeof(Elf32_Word));
+        hasharr_at += (maxndx - symndx) * sizeof(Elf32_Word);
+        do {
+          data = elf_getdata_rawchunk(ebl->elf, hasharr_at,
+                                      sizeof(Elf32_Word), ELF_T_WORD);
+          if (data != NULL && (*(const Elf32_Word *)data->d_buf & 1u)) {
+            syments = maxndx + 1;
+            break;
+          }
+          ++maxndx;
+          hasharr_at += sizeof(Elf32_Word);
+        } while (data != NULL);
+      }
+    }
+  }
+  if (offs[i_strtab] > offs[i_symtab] && syments == 0)
+    syments = ((offs[i_strtab] - offs[i_symtab]) /
+      gelf_fsize(ebl->elf, ELF_T_SYM, 1, EV_CURRENT));
+
+  if (syments <= 0 || offs[i_strtab] == 0 || offs[i_symtab] == 0) {
+    error_exit(0, _("Dynamic symbol information is not available for
displaying symbols."));
+  }
+
+  /* All the data chunk initializaion.  */
+  Elf_Data *symdata = NULL;
+  Elf_Data *symstrdata = NULL;
+  Elf_Data *versym_data = NULL;
+  Elf_Data *verdef_data = NULL;
+  Elf_Data *verneed_data = NULL;
+
+  symdata = elf_getdata_rawchunk(
+      ebl->elf, offs[i_symtab],
+      gelf_fsize(ebl->elf, ELF_T_SYM, syments, EV_CURRENT), ELF_T_SYM);
+  symstrdata = elf_getdata_rawchunk(ebl->elf, offs[i_strtab],
addrs[i_strsz],
+                                    ELF_T_BYTE);
+  versym_data = elf_getdata_rawchunk(ebl->elf, offs[i_versym],
+                                     syments * sizeof(Elf64_Half),
ELF_T_HALF);
+
+  /* Get the verneed_data without vernaux.  */
+  verneed_data = elf_getdata_rawchunk(
+      ebl->elf, offs[i_verneed], addrs[i_verneednum] *
sizeof(Elf64_Verneed),
+      ELF_T_VNEED);
+  size_t vernauxnum = 0;
+  size_t vn_next_offset = 0;
+
+  for (size_t i = 0; i < addrs[i_verneednum]; i++) {
+    GElf_Verneed *verneed =
+        (GElf_Verneed *)(verneed_data->d_buf + vn_next_offset);
+    vernauxnum += verneed->vn_cnt;
+    vn_next_offset += verneed->vn_next;
+  }
+
+  /* Update the verneed_data to include the vernaux.  */
+  verneed_data = elf_getdata_rawchunk(
+      ebl->elf, offs[i_verneed],
+      (addrs[i_verneednum] + vernauxnum) * sizeof(GElf_Verneed),
ELF_T_VNEED);
+
+  /* Get the verdef_data without verdaux.  */
+  verdef_data = elf_getdata_rawchunk(ebl->elf, offs[i_verdef],
+                                     addrs[i_verdefnum] *
sizeof(Elf64_Verdef),
+                                     ELF_T_VDEF);
+  size_t verdauxnum = 0;
+  size_t vd_next_offset = 0;
+
+  for (size_t i = 0; i < addrs[i_verdefnum]; i++) {
+    GElf_Verdef *verdef = (GElf_Verdef *)(verdef_data->d_buf +
vd_next_offset);
+    verdauxnum += verdef->vd_cnt;
+    vd_next_offset += verdef->vd_next;
+  }
+
+  /* Update the verdef_data to include the verdaux.  */
+  verdef_data = elf_getdata_rawchunk(
+      ebl->elf, offs[i_verdef],
+      (addrs[i_verdefnum] + verdauxnum) * sizeof(GElf_Verdef), ELF_T_VDEF);
+
+  for (size_t i = 0; i < syments; i++) {
+    /* Get the symbol table entry.  */
+    GElf_Sym sym_mem;
+    GElf_Sym *sym;
+    sym = gelf_getsym(symdata, i, &sym_mem);
+
+    char bindbuf[64];
+    char typebuf[64];
+    char scnbuf[64];
+    int class = gelf_getclass(ebl->elf);
+    Elf32_Word xndx;
+    xndx = sym->st_shndx;
+
+    printf(_("\
+%5ld: %0*" PRIx64 " %6" PRId64 " %-7s %-6s %-9s %6s %s"),
+           i, class == ELFCLASS32 ? 8 : 16, sym->st_value, sym->st_size,
+           ebl_symbol_type_name(ebl, GELF_ST_TYPE(sym->st_info), typebuf,
+                                sizeof(typebuf)),
+           ebl_symbol_binding_name(ebl, GELF_ST_BIND(sym->st_info),
bindbuf,
+                                   sizeof(bindbuf)),
+           get_visibility_type(GELF_ST_VISIBILITY(sym->st_other)),
+           ebl_section_name(ebl, sym->st_shndx, xndx, scnbuf,
sizeof(scnbuf),
+                            NULL, shnum),
+           ((char *)symstrdata->d_buf) + sym->st_name);
+
+    if (versym_data != NULL) {
+      /* Get the version information.  */
+      GElf_Versym versym_mem;
+      GElf_Versym *versym = gelf_getversym(versym_data, i, &versym_mem);
+
+      if (versym != NULL && ((*versym & 0x8000) != 0 || *versym > 1)) {
+        GElf_Vernaux vernaux_mem;
+        GElf_Vernaux *vernaux = NULL;
+        size_t vn_offset = 0;
+        GElf_Verneed verneed_mem;
+        GElf_Verneed *verneed = (GElf_Verneed *)verneed_data->d_buf;
+
+        while (verneed != NULL) {
+          size_t vna_offset = vn_offset;
+
+          vernaux = gelf_getvernaux(verneed_data, vna_offset +=
verneed->vn_aux,
+                                    &vernaux_mem);
+          while (vernaux != NULL && vernaux->vna_other != *versym &&
+                 vernaux->vna_next != 0 &&
+                 (verneed_data->d_size - vna_offset >= vernaux->vna_next))
{
+            /* Update the offset.  */
+            vna_offset += vernaux->vna_next;
+
+            vernaux =
+                (vernaux->vna_next == 0
+                     ? NULL
+                     : gelf_getvernaux(verneed_data, vna_offset,
&vernaux_mem));
+          }
+
+          /* Check whether we found the version.  */
+          if (vernaux != NULL && vernaux->vna_other == *versym)
+            /* Found it.  */
+            break;
+
+          if (verneed_data->d_size - vn_offset < verneed->vn_next)
+            break;
+
+          vn_offset += verneed->vn_next;
+          verneed =
+              (verneed->vn_next == 0
+                   ? NULL
+                   : gelf_getverneed(verneed_data, vn_offset,
&verneed_mem));
+        }
+
+        if (vernaux != NULL && vernaux->vna_other == *versym) {
+          printf("@%s (%u)", (char *)symstrdata->d_buf + vernaux->vna_name,
+                 (unsigned int)vernaux->vna_other);
+        }
+
+        if (addrs[i_verdefnum] && *versym != 0x8001) {
+          /* We must test both.  */
+          size_t vd_offset = 0;
+
+          GElf_Verdef verdef_mem;
+          GElf_Verdef *verdef = gelf_getverdef(verdef_data, 0,
&verdef_mem);
+          while (verdef != NULL) {
+            if (verdef->vd_ndx == (*versym & 0x7fff))
+              /* Found the definition.  */
+              break;
+
+            if (verdef_data->d_size - vd_offset < verdef->vd_next)
+              break;
+
+            vd_offset += verdef->vd_next;
+            verdef =
+                (verdef->vd_next == 0
+                     ? NULL
+                     : gelf_getverdef(verdef_data, vd_offset,
&verdef_mem));
+          }
+
+          if (verdef != NULL) {
+            GElf_Verdaux verdaux_mem;
+            GElf_Verdaux *verdaux = gelf_getverdaux(
+                verdef_data, vd_offset + verdef->vd_aux, &verdaux_mem);
+
+            if (verdaux != NULL)
+              printf((*versym & 0x8000) ? "@%s" : "@@%s",
+                     (char *)symstrdata->d_buf + verdaux->vda_name);
+          }
+        }
+      }
+    }
+    putchar_unlocked('\n');
+  }
+  return true;
+}
+
+
+
+static bool
 handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr)
 {
   Elf_Data *versym_data = NULL;
@@ -2503,7 +2771,7 @@ handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr
*shdr)
   /* Get the data of the section.  */
   Elf_Data *data = elf_getdata (scn, NULL);
   if (data == NULL)
-    return;
+    return false;

   /* Find out whether we have other sections we might need.  */
   Elf_Scn *runscn = NULL;
@@ -2730,6 +2998,7 @@ handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr
*shdr)

       putchar_unlocked ('\n');
     }
+    return true;
 }


@@ -4955,10 +5224,18 @@ get_dynscn_addrs(Elf *elf, GElf_Phdr *phdr,
GElf_Addr addrs[i_max])
       addrs[i_verdef] = dyn->d_un.d_ptr;
       break;

+    case DT_VERDEFNUM:
+      addrs[i_verdefnum] = dyn->d_un.d_val;
+      break;
+
     case DT_VERNEED:
       addrs[i_verneed] = dyn->d_un.d_ptr;
       break;

+    case DT_VERNEEDNUM:
+      addrs[i_verneednum] = dyn->d_un.d_val;
+      break;
+
     case DT_STRSZ:
       addrs[i_strsz] = dyn->d_un.d_val;
       break;
  

Comments

Mark Wielaard Feb. 16, 2023, 4:02 p.m. UTC | #1
Hi,

On Sat, 2023-02-11 at 00:17 +0800, Di Chen via Elfutils-devel wrote:
> From bdc19de94bff8f8812611b9ba8c0116a650d0fb5 Mon Sep 17 00:00:00 2001
> From: Di Chen <dichen@redhat.com>
> Date: Fri, 13 Jan 2023 20:12:43 +0800
> Subject: [PATCH] readelf: display dynamic symtab without section headers
> 
> This commit adds a new option "-D/--use-dynamic" to support printing the
> dynamic symbol table from the PT_DYNAMIC segment. By using the
> PT_DYNAMIC segment, eu-readelf can go through the contents of dynamic
> section entries and the values of each tag. From that, we can get the
> address and size of the dynamic symbol table, the address of the string
> table, etc.
> 
> By using the new option "-D/--use-dynamic", eu-readelf can list the
> symbols without section headers.
> 
> Example:
>   $ ./src/readelf -Ds a.out
>       0: 0000000000000000      0 NOTYPE  LOCAL  DEFAULT    UNDEF
>       1: 0000000000000000      0 FUNC    GLOBAL DEFAULT    UNDEF
> __libc_start_main@GLIBC_2.34 (2)
>       2: 0000000000000000      0 NOTYPE  WEAK   DEFAULT    UNDEF
> __gmon_start__
> 
> https://sourceware.org/bugzilla/show_bug.cgi?id=28873
> 
> Signed-off-by: Di Chen <dichen@redhat.com>

Very nice.

Note that my local build (gcc 12.2.1) says:

In function ‘handle_dynamic_symtab’,
    inlined from ‘print_symtab’ at
/home/mark/src/elfutils/src/readelf.c:2451:9:
/home/mark/src/elfutils/src/readelf.c:2535:29: error: ‘syments’ may be
used uninitialized [-Werror=maybe-uninitialized]
 2535 |   if (offs[i_gnu_hash] != 0 && syments == 0) {
      |       ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~
/home/mark/src/elfutils/src/readelf.c: In function ‘print_symtab’:
/home/mark/src/elfutils/src/readelf.c:2519:10: note: ‘syments’ was
declared here
 2519 |   size_t syments;
      |          ^~~~~~~

And I think it is correct. We have 3 different ways of trying to set
syments. We should initialize syments to zero so each check knows
whether a previous one succeeded or not.

I have not been very picky about missing ChangeLog entries, but in this
case it would really help understanding what was changed.

Specifically it would be nice to note that the code in
handle_dynamic_symtab to calculate the syments was lifted from
libdwfl/dwfl_module_getdwarf.c.

Lets keep the indentation GNU style, like in dwfl_module_getdwarf.c, I
don't think the new indentation is helpful.

Finally the actual printing of the symbols and processing the version
information in handle_dynamic_symtab look like they were mostly copied
from handle_symtab. Can that code be shared?

Thanks,

Mark
  
Di Chen April 2, 2023, 1:06 a.m. UTC | #2
Thank you Mark.

I made the following changes:
* ChangeLog&NEWS update.
* syments initialization to 0.
* new function format update to GNU style.
* extract the shared part to a new function, reduce code redundancy.

The patch is ready for review again.

From 70ac6993744985b29935df6010e36bf9dbbd96d9 Mon Sep 17 00:00:00 2001
From: Di Chen <dichen@redhat.com>
Date: Mon, 27 Mar 2023 10:01:05 +0800
Subject: [PATCH] readelf: display dynamic symtab without section headers

This commit adds a new option "-D/--use-dynamic" to support printing the
dynamic symbol table from the PT_DYNAMIC segment. By using the
PT_DYNAMIC segment, eu-readelf can go through the contents of dynamic
section entries and the values of each tag. From that, we can get the
address and size of the dynamic symbol table, the address of the string
table, etc.

By using the new option "-D/--use-dynamic", eu-readelf can list the
symbols without section headers.

Example:
  $ ./src/readelf -Ds a.out
      0: 0000000000000000      0 NOTYPE  LOCAL  DEFAULT    UNDEF
      1: 0000000000000000      0 FUNC    GLOBAL DEFAULT    UNDEF
__libc_start_main@GLIBC_2.34 (2)
      2: 0000000000000000      0 NOTYPE  WEAK   DEFAULT    UNDEF
__gmon_start__

https://sourceware.org/bugzilla/show_bug.cgi?id=28873

Signed-off-by: Di Chen <dichen@redhat.com>
---
 ChangeLog     |   5 +
 NEWS          |   2 +
 src/ChangeLog |   6 +
 src/readelf.c | 531 ++++++++++++++++++++++++++++++++++++--------------
 4 files changed, 393 insertions(+), 151 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 10c23002..ece07e97 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2023-03-27  Di Chen  <dichen@redhat.com>
+
+ * NEWS: Support readelf -Ds for using dynamic segment to
+    print symbol table.
+
 2023-03-03  Mark Wielaard  <mark@klomp.org>

  * NEWS: Add ELFCOMPRESS_ZSTD support for libelf and elfcompress.
diff --git a/NEWS b/NEWS
index 16e37eca..679d8bd5 100644
--- a/NEWS
+++ b/NEWS
@@ -17,6 +17,8 @@ elfcompress: -t, --type= now support zstd if libelf has
been build with

 backends: Add support for LoongArch and Synopsys ARCv2 processors.

+readelf: Support readelf -Ds, --use-dynamic --symbol.
+
 Version 0.188 "no section left behind"

 readelf: Add -D, --use-dynamic option.
diff --git a/src/ChangeLog b/src/ChangeLog
index c26dafdd..8aa269cd 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,9 @@
+2023-03-27  Di Chen  <dichen@redhat.com>
+
+ * readelf.c (options): Support dynamic symtab print with '-Ds'.
+ (process_symtab): New function.
+ (handle_dynamic_symtab): Likewise.
+
 2023-03-03  Mark Wielaard  <mark@klomp.org>

  * readelf (process_elf_file): ebl_closebackend only after
diff --git a/src/readelf.c b/src/readelf.c
index 6950204e..70848c21 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -307,7 +307,21 @@ static void handle_relocs_rel (Ebl *ebl, GElf_Ehdr
*ehdr, Elf_Scn *scn,
 static void handle_relocs_rela (Ebl *ebl, GElf_Ehdr *ehdr, Elf_Scn *scn,
  GElf_Shdr *shdr);
 static bool print_symtab (Ebl *ebl, int type);
-static void handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr);
+static bool handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr);
+static bool handle_dynamic_symtab (Ebl *ebl);
+static void
+process_symtab(
+ Ebl * ebl,
+ unsigned int nsyms,
+ Elf64_Word idx,
+ Elf32_Word verneed_stridx,
+ Elf32_Word verdef_stridx,
+ Elf_Data * symdata,
+ Elf_Data * versym_data,
+ Elf_Data * symstr_data,
+ Elf_Data * verneed_data,
+ Elf_Data * verdef_data,
+ Elf_Data * xndx_data);
 static void print_verinfo (Ebl *ebl);
 static void handle_verneed (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr);
 static void handle_verdef (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr);
@@ -325,9 +339,12 @@ static void dump_archive_index (Elf *, const char *);

 enum dyn_idx
 {
+  i_symtab_shndx,
   i_strsz,
   i_verneed,
+  i_verneednum,
   i_verdef,
+  i_verdefnum,
   i_versym,
   i_symtab,
   i_strtab,
@@ -1042,7 +1059,7 @@ process_elf_file (Dwfl_Module *dwflmod, int fd)
     symtab_printed |= print_symtab (ebl, SHT_DYNSYM);
   if (print_version_info)
     print_verinfo (ebl);
-  if (print_symbol_table)
+  if (print_symbol_table && !use_dynamic_segment)
     symtab_printed |= print_symtab (ebl, SHT_SYMTAB);

   if ((print_symbol_table || print_dynsym_table)
@@ -2443,6 +2460,12 @@ handle_relocs_rela (Ebl *ebl, GElf_Ehdr *ehdr,
Elf_Scn *scn, GElf_Shdr *shdr)
 static bool
 print_symtab (Ebl *ebl, int type)
 {
+  /* Use the dynamic section info to display symbol tables.  */
+  if (use_dynamic_segment && type == SHT_DYNSYM)
+  {
+ return handle_dynamic_symtab(ebl);
+  }
+
   /* Find the symbol table(s).  For this we have to search through the
      section table.  */
   Elf_Scn *scn = NULL;
@@ -2481,8 +2504,7 @@ print_symtab (Ebl *ebl, int type)
     _("cannot get section [%zd] header: %s"),
     elf_ndxscn (scn), elf_errmsg (-1));
     }
-  handle_symtab (ebl, scn, shdr);
-  symtab_printed = true;
+  symtab_printed = handle_symtab (ebl, scn, shdr);
  }
     }

@@ -2491,6 +2513,179 @@ print_symtab (Ebl *ebl, int type)


 static void
+process_symtab (Ebl *ebl, unsigned int nsyms, Elf64_Word idx,
+                Elf32_Word verneed_stridx, Elf32_Word verdef_stridx,
+                Elf_Data *symdata, Elf_Data *versym_data,
+                Elf_Data *symstr_data, Elf_Data *verneed_data,
+                Elf_Data *verdef_data, Elf_Data *xndx_data)
+{
+  for (unsigned int cnt = 0; cnt < nsyms; ++cnt)
+    {
+      char typebuf[64];
+      char bindbuf[64];
+      char scnbuf[64];
+      Elf32_Word xndx;
+      GElf_Sym sym_mem;
+      GElf_Sym *sym
+          = gelf_getsymshndx (symdata, xndx_data, cnt, &sym_mem, &xndx);
+
+      if (unlikely (sym == NULL))
+        continue;
+
+      /* Determine the real section index.  */
+      if (likely (sym->st_shndx != SHN_XINDEX))
+        xndx = sym->st_shndx;
+
+      printf (_ ("\
+%5u: %0*" PRIx64 " %6" PRId64 " %-7s %-6s %-9s %6s %s"),
+              cnt, gelf_getclass (ebl->elf) == ELFCLASS32 ? 8 : 16,
+              sym->st_value, sym->st_size,
+              ebl_symbol_type_name (ebl, GELF_ST_TYPE (sym->st_info),
typebuf,
+                                    sizeof (typebuf)),
+              ebl_symbol_binding_name (ebl, GELF_ST_BIND (sym->st_info),
+                                       bindbuf, sizeof (bindbuf)),
+              get_visibility_type (GELF_ST_VISIBILITY (sym->st_other)),
+              ebl_section_name (ebl, sym->st_shndx, xndx, scnbuf,
+                                sizeof (scnbuf), NULL, shnum),
+              use_dynamic_segment == true
+                  ? (char *)symstr_data->d_buf + sym->st_name
+                  : elf_strptr (ebl->elf, idx, sym->st_name));
+
+      if (versym_data != NULL)
+        {
+          /* Get the version information.  */
+          GElf_Versym versym_mem;
+          GElf_Versym *versym = gelf_getversym (versym_data, cnt,
&versym_mem);
+
+          if (versym != NULL && ((*versym & 0x8000) != 0 || *versym > 1))
+            {
+              bool is_nobits = false;
+              bool check_def = xndx != SHN_UNDEF;
+
+              if (xndx < SHN_LORESERVE || sym->st_shndx == SHN_XINDEX)
+                {
+                  GElf_Shdr symshdr_mem;
+                  GElf_Shdr *symshdr = gelf_getshdr (
+                      elf_getscn (ebl->elf, xndx), &symshdr_mem);
+
+                  is_nobits
+                      = (symshdr != NULL && symshdr->sh_type ==
SHT_NOBITS);
+                }
+
+              if (is_nobits || !check_def)
+                {
+                  /* We must test both.  */
+                  GElf_Vernaux vernaux_mem;
+                  GElf_Vernaux *vernaux = NULL;
+                  size_t vn_offset = 0;
+
+                  GElf_Verneed verneed_mem;
+                  GElf_Verneed *verneed
+                      = gelf_getverneed (verneed_data, 0, &verneed_mem);
+                  while (verneed != NULL)
+                    {
+                      size_t vna_offset = vn_offset;
+
+                      vernaux = gelf_getvernaux (verneed_data,
+                                                 vna_offset +=
verneed->vn_aux,
+                                                 &vernaux_mem);
+                      while (vernaux != NULL && vernaux->vna_other !=
*versym
+                             && vernaux->vna_next != 0
+                             && (verneed_data->d_size - vna_offset
+                                 >= vernaux->vna_next))
+                        {
+                          /* Update the offset.  */
+                          vna_offset += vernaux->vna_next;
+
+                          vernaux = (vernaux->vna_next == 0
+                                         ? NULL
+                                         : gelf_getvernaux (verneed_data,
+                                                            vna_offset,
+                                                            &vernaux_mem));
+                        }
+
+                      /* Check whether we found the version.  */
+                      if (vernaux != NULL && vernaux->vna_other == *versym)
+                        /* Found it.  */
+                        break;
+
+                      if (verneed_data->d_size - vn_offset <
verneed->vn_next)
+                        break;
+
+                      vn_offset += verneed->vn_next;
+                      verneed
+                          = (verneed->vn_next == 0
+                                 ? NULL
+                                 : gelf_getverneed (verneed_data,
vn_offset,
+                                                    &verneed_mem));
+                    }
+
+                  if (vernaux != NULL && vernaux->vna_other == *versym)
+                    {
+                      printf ("@%s (%u)",
+                              use_dynamic_segment == true
+                                  ? (char *)symstr_data->d_buf
+                                        + vernaux->vna_name
+                                  : elf_strptr (ebl->elf, verneed_stridx,
+                                                vernaux->vna_name),
+                              (unsigned int)vernaux->vna_other);
+                      check_def = 0;
+                    }
+                  else if (unlikely (!is_nobits))
+                    error (0, 0, _ ("bad dynamic symbol"));
+                  else
+                    check_def = 1;
+                }
+
+              if (check_def && *versym != 0x8001)
+                {
+                  /* We must test both.  */
+                  size_t vd_offset = 0;
+
+                  GElf_Verdef verdef_mem;
+                  GElf_Verdef *verdef
+                      = gelf_getverdef (verdef_data, 0, &verdef_mem);
+                  while (verdef != NULL)
+                    {
+                      if (verdef->vd_ndx == (*versym & 0x7fff))
+                        /* Found the definition.  */
+                        break;
+
+                      if (verdef_data->d_size - vd_offset <
verdef->vd_next)
+                        break;
+
+                      vd_offset += verdef->vd_next;
+                      verdef = (verdef->vd_next == 0
+                                    ? NULL
+                                    : gelf_getverdef (verdef_data,
vd_offset,
+                                                      &verdef_mem));
+                    }
+
+                  if (verdef != NULL)
+                    {
+                      GElf_Verdaux verdaux_mem;
+                      GElf_Verdaux *verdaux = gelf_getverdaux (
+                          verdef_data, vd_offset + verdef->vd_aux,
+                          &verdaux_mem);
+
+                      if (verdaux != NULL)
+                        printf ((*versym & 0x8000) ? "@%s" : "@@%s",
+                                use_dynamic_segment == true
+                                    ? (char *)symstr_data->d_buf
+                                          + verdaux->vda_name
+                                    : elf_strptr (ebl->elf, verdef_stridx,
+                                                  verdaux->vda_name));
+                    }
+                }
+            }
+        }
+
+      putchar_unlocked ('\n');
+    }
+}
+
+
+static bool
 handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr)
 {
   Elf_Data *versym_data = NULL;
@@ -2504,7 +2699,7 @@ handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr
*shdr)
   /* Get the data of the section.  */
   Elf_Data *data = elf_getdata (scn, NULL);
   if (data == NULL)
-    return;
+    return false;

   /* Find out whether we have other sections we might need.  */
   Elf_Scn *runscn = NULL;
@@ -2574,163 +2769,186 @@ handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr
*shdr)
   Num:            Value   Size Type    Bind   Vis          Ndx Name\n"),
   stdout);

-  for (unsigned int cnt = 0; cnt < nsyms; ++cnt)
-    {
-      char typebuf[64];
-      char bindbuf[64];
-      char scnbuf[64];
-      Elf32_Word xndx;
-      GElf_Sym sym_mem;
-      GElf_Sym *sym = gelf_getsymshndx (data, xndx_data, cnt, &sym_mem,
&xndx);
-
-      if (unlikely (sym == NULL))
- continue;
-
-      /* Determine the real section index.  */
-      if (likely (sym->st_shndx != SHN_XINDEX))
- xndx = sym->st_shndx;
-
-      printf (_("\
-%5u: %0*" PRIx64 " %6" PRId64 " %-7s %-6s %-9s %6s %s"),
-      cnt,
-      class == ELFCLASS32 ? 8 : 16,
-      sym->st_value,
-      sym->st_size,
-      ebl_symbol_type_name (ebl, GELF_ST_TYPE (sym->st_info),
-    typebuf, sizeof (typebuf)),
-      ebl_symbol_binding_name (ebl, GELF_ST_BIND (sym->st_info),
-       bindbuf, sizeof (bindbuf)),
-      get_visibility_type (GELF_ST_VISIBILITY (sym->st_other)),
-      ebl_section_name (ebl, sym->st_shndx, xndx, scnbuf,
- sizeof (scnbuf), NULL, shnum),
-      elf_strptr (ebl->elf, shdr->sh_link, sym->st_name));
-
-      if (versym_data != NULL)
- {
-  /* Get the version information.  */
-  GElf_Versym versym_mem;
-  GElf_Versym *versym = gelf_getversym (versym_data, cnt, &versym_mem);
-
-  if (versym != NULL && ((*versym & 0x8000) != 0 || *versym > 1))
-    {
-      bool is_nobits = false;
-      bool check_def = xndx != SHN_UNDEF;
-
-      if (xndx < SHN_LORESERVE || sym->st_shndx == SHN_XINDEX)
- {
-  GElf_Shdr symshdr_mem;
-  GElf_Shdr *symshdr =
-    gelf_getshdr (elf_getscn (ebl->elf, xndx), &symshdr_mem);
-
-  is_nobits = (symshdr != NULL
-       && symshdr->sh_type == SHT_NOBITS);
- }
+ process_symtab(ebl, nsyms, shdr->sh_link, verneed_stridx, verdef_stridx,
+ data, versym_data, NULL, verneed_data, verdef_data, xndx_data);
+    return true;
+}

-      if (is_nobits || ! check_def)
- {
-  /* We must test both.  */
-  GElf_Vernaux vernaux_mem;
-  GElf_Vernaux *vernaux = NULL;
-  size_t vn_offset = 0;
-
-  GElf_Verneed verneed_mem;
-  GElf_Verneed *verneed = gelf_getverneed (verneed_data, 0,
-   &verneed_mem);
-  while (verneed != NULL)
-    {
-      size_t vna_offset = vn_offset;
-
-      vernaux = gelf_getvernaux (verneed_data,
- vna_offset += verneed->vn_aux,
- &vernaux_mem);
-      while (vernaux != NULL
-     && vernaux->vna_other != *versym
-     && vernaux->vna_next != 0
-     && (verneed_data->d_size - vna_offset
- >= vernaux->vna_next))
- {
-  /* Update the offset.  */
-  vna_offset += vernaux->vna_next;
-
-  vernaux = (vernaux->vna_next == 0
-     ? NULL
-     : gelf_getvernaux (verneed_data,
- vna_offset,
- &vernaux_mem));
- }

-      /* Check whether we found the version.  */
-      if (vernaux != NULL && vernaux->vna_other == *versym)
- /* Found it.  */
- break;
+static bool
+handle_dynamic_symtab (Ebl *ebl)
+{
+  GElf_Phdr *phdr = NULL;
+  /* phnum is a static variable which already fetched in function
+   * process_elf_file.  */
+  for (size_t i = 0; i < phnum; ++i)
+    {
+      GElf_Phdr phdr_mem;
+      phdr = gelf_getphdr (ebl->elf, i, &phdr_mem);
+      if (phdr->p_type == PT_DYNAMIC)
+        {
+          break;
+        }
+    }
+  if (phdr == NULL)
+    return false;

-      if (verneed_data->d_size - vn_offset < verneed->vn_next)
- break;
+  GElf_Addr addrs[i_max] = {
+    0,
+  };
+  GElf_Off offs[i_max] = {
+    0,
+  };
+  get_dynscn_addrs (ebl->elf, phdr, addrs);
+  find_offsets (ebl->elf, 0, i_max, addrs, offs);

-      vn_offset += verneed->vn_next;
-      verneed = (verneed->vn_next == 0
- ? NULL
- : gelf_getverneed (verneed_data, vn_offset,
-    &verneed_mem));
-    }
+  size_t syments = 0;

-  if (vernaux != NULL && vernaux->vna_other == *versym)
-    {
-      printf ("@%s (%u)",
-      elf_strptr (ebl->elf, verneed_stridx,
-  vernaux->vna_name),
-      (unsigned int) vernaux->vna_other);
-      check_def = 0;
-    }
-  else if (unlikely (! is_nobits))
-    error (0, 0, _("bad dynamic symbol"));
-  else
-    check_def = 1;
- }
+  GElf_Ehdr ehdr_mem;
+  GElf_Ehdr *ehdr = gelf_getehdr (ebl->elf, &ehdr_mem);

-      if (check_def && *versym != 0x8001)
- {
-  /* We must test both.  */
-  size_t vd_offset = 0;
+  if (offs[i_hash] != 0)
+    {
+      /* In the original format, .hash says the size of .dynsym.  */

-  GElf_Verdef verdef_mem;
-  GElf_Verdef *verdef = gelf_getverdef (verdef_data, 0,
- &verdef_mem);
-  while (verdef != NULL)
-    {
-      if (verdef->vd_ndx == (*versym & 0x7fff))
- /* Found the definition.  */
- break;
+      size_t entsz = SH_ENTSIZE_HASH (ehdr);
+      Elf_Data *data
+          = elf_getdata_rawchunk (ebl->elf, offs[i_hash] + entsz, entsz,
+                                  (entsz == 4 ? ELF_T_WORD : ELF_T_XWORD));
+      if (data != NULL)
+        syments = (entsz == 4 ? *(const GElf_Word *)data->d_buf
+                              : *(const GElf_Xword *)data->d_buf);
+    }
+  if (offs[i_gnu_hash] != 0 && syments == 0)
+    {
+      /* In the new format, we can derive it with some work.  */

-      if (verdef_data->d_size - vd_offset < verdef->vd_next)
- break;
+      const struct
+      {
+        Elf32_Word nbuckets;
+        Elf32_Word symndx;
+        Elf32_Word maskwords;
+        Elf32_Word shift2;
+      } * header;
+
+      Elf_Data *data = elf_getdata_rawchunk (ebl->elf, offs[i_gnu_hash],
+                                             sizeof *header, ELF_T_WORD);
+      if (data != NULL)
+        {
+          header = data->d_buf;
+          Elf32_Word nbuckets = header->nbuckets;
+          Elf32_Word symndx = header->symndx;
+          GElf_Off buckets_at
+              = (offs[i_gnu_hash] + sizeof *header
+                 + (gelf_getclass (ebl->elf) * sizeof (Elf32_Word)
+                    * header->maskwords));
+
+          // elf_getdata_rawchunk takes a size_t, make sure it
+          // doesn't overflow.
+#if SIZE_MAX <= UINT32_MAX
+          if (nbuckets > SIZE_MAX / sizeof (Elf32_Word))
+            data = NULL;
+          else
+#endif
+            data = elf_getdata_rawchunk (ebl->elf, buckets_at,
+                                         nbuckets * sizeof (Elf32_Word),
+                                         ELF_T_WORD);
+          if (data != NULL && symndx < nbuckets)
+            {
+              const Elf32_Word *const buckets = data->d_buf;
+              Elf32_Word maxndx = symndx;
+              for (Elf32_Word bucket = 0; bucket < nbuckets; ++bucket)
+                if (buckets[bucket] > maxndx)
+                  maxndx = buckets[bucket];
+
+              GElf_Off hasharr_at
+                  = (buckets_at + nbuckets * sizeof (Elf32_Word));
+              hasharr_at += (maxndx - symndx) * sizeof (Elf32_Word);
+              do
+                {
+                  data = elf_getdata_rawchunk (
+                      ebl->elf, hasharr_at, sizeof (Elf32_Word),
ELF_T_WORD);
+                  if (data != NULL && (*(const Elf32_Word *)data->d_buf &
1u))
+                    {
+                      syments = maxndx + 1;
+                      break;
+                    }
+                  ++maxndx;
+                  hasharr_at += sizeof (Elf32_Word);
+                }
+              while (data != NULL);
+            }
+        }
+    }
+  if (offs[i_strtab] > offs[i_symtab] && syments == 0)
+    syments = ((offs[i_strtab] - offs[i_symtab])
+               / gelf_fsize (ebl->elf, ELF_T_SYM, 1, EV_CURRENT));

-      vd_offset += verdef->vd_next;
-      verdef = (verdef->vd_next == 0
- ? NULL
- : gelf_getverdef (verdef_data, vd_offset,
-  &verdef_mem));
-    }
+  if (syments <= 0 || offs[i_strtab] == 0 || offs[i_symtab] == 0)
+    {
+      error_exit (0, _ ("Dynamic symbol information is not available for "
+                        "displaying symbols."));
+    }

-  if (verdef != NULL)
-    {
-      GElf_Verdaux verdaux_mem;
-      GElf_Verdaux *verdaux
- = gelf_getverdaux (verdef_data,
-   vd_offset + verdef->vd_aux,
-   &verdaux_mem);
-
-      if (verdaux != NULL)
- printf ((*versym & 0x8000) ? "@%s" : "@@%s",
- elf_strptr (ebl->elf, verdef_stridx,
-    verdaux->vda_name));
-    }
- }
-    }
- }
+  /* All the data chunk initializaion.  */
+  Elf_Data *symdata = NULL;
+  Elf_Data *symstrdata = NULL;
+  Elf_Data *versym_data = NULL;
+  Elf_Data *verdef_data = NULL;
+  Elf_Data *verneed_data = NULL;

-      putchar_unlocked ('\n');
-    }
+  symdata = elf_getdata_rawchunk (
+      ebl->elf, offs[i_symtab],
+      gelf_fsize (ebl->elf, ELF_T_SYM, syments, EV_CURRENT), ELF_T_SYM);
+  symstrdata = elf_getdata_rawchunk (ebl->elf, offs[i_strtab],
addrs[i_strsz],
+                                     ELF_T_BYTE);
+  versym_data = elf_getdata_rawchunk (
+      ebl->elf, offs[i_versym], syments * sizeof (Elf64_Half), ELF_T_HALF);
+
+  /* Get the verneed_data without vernaux.  */
+  verneed_data = elf_getdata_rawchunk (
+      ebl->elf, offs[i_verneed], addrs[i_verneednum] * sizeof
(Elf64_Verneed),
+      ELF_T_VNEED);
+  size_t vernauxnum = 0;
+  size_t vn_next_offset = 0;
+
+  for (size_t i = 0; i < addrs[i_verneednum]; i++)
+    {
+      GElf_Verneed *verneed
+          = (GElf_Verneed *)(verneed_data->d_buf + vn_next_offset);
+      vernauxnum += verneed->vn_cnt;
+      vn_next_offset += verneed->vn_next;
+    }
+
+  /* Update the verneed_data to include the vernaux.  */
+  verneed_data = elf_getdata_rawchunk (
+      ebl->elf, offs[i_verneed],
+      (addrs[i_verneednum] + vernauxnum) * sizeof (GElf_Verneed),
ELF_T_VNEED);
+
+  /* Get the verdef_data without verdaux.  */
+  verdef_data = elf_getdata_rawchunk (
+      ebl->elf, offs[i_verdef], addrs[i_verdefnum] * sizeof (Elf64_Verdef),
+      ELF_T_VDEF);
+  size_t verdauxnum = 0;
+  size_t vd_next_offset = 0;
+
+  for (size_t i = 0; i < addrs[i_verdefnum]; i++)
+    {
+      GElf_Verdef *verdef
+          = (GElf_Verdef *)(verdef_data->d_buf + vd_next_offset);
+      verdauxnum += verdef->vd_cnt;
+      vd_next_offset += verdef->vd_next;
+    }
+
+  /* Update the verdef_data to include the verdaux.  */
+  verdef_data = elf_getdata_rawchunk (
+      ebl->elf, offs[i_verdef],
+      (addrs[i_verdefnum] + verdauxnum) * sizeof (GElf_Verdef),
ELF_T_VDEF);
+
+  unsigned int nsyms = (unsigned int)syments;
+  process_symtab (ebl, nsyms, 0, 0, 0, symdata, versym_data, symstrdata,
+                  verneed_data, verdef_data, NULL);
+  return true;
 }


@@ -4990,13 +5208,24 @@ get_dynscn_addrs(Elf *elf, GElf_Phdr *phdr,
GElf_Addr addrs[i_max])
       addrs[i_verdef] = dyn->d_un.d_ptr;
       break;

+    case DT_VERDEFNUM:
+      addrs[i_verdefnum] = dyn->d_un.d_val;
+      break;
+
     case DT_VERNEED:
       addrs[i_verneed] = dyn->d_un.d_ptr;
       break;

+    case DT_VERNEEDNUM:
+      addrs[i_verneednum] = dyn->d_un.d_val;
+      break;
+
     case DT_STRSZ:
       addrs[i_strsz] = dyn->d_un.d_val;
       break;
+    case DT_SYMTAB_SHNDX:
+      addrs[i_symtab_shndx] = dyn->d_un.d_ptr;
+      break;
     }
   }
 }
  
Mark Wielaard April 18, 2023, 7:49 p.m. UTC | #3
Hi,

On Sun, Apr 02, 2023 at 09:06:11AM +0800, Di Chen wrote:
> I made the following changes:
> * ChangeLog&NEWS update.
> * syments initialization to 0.
> * new function format update to GNU style.
> * extract the shared part to a new function, reduce code redundancy.
> 
> The patch is ready for review again.

Looks really good. I had a few small nitpicks (see attached), but just
made those changes and checked it in.

Thanks a lot for all this work. This was way more complicated than I
expected at first. I am really happy you didn't get discouraged by
that.

Cheers,

Mark
diff --git a/ChangeLog b/ChangeLog
index ece07e97..6aed95b6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,7 +1,7 @@
 2023-03-27  Di Chen  <dichen@redhat.com>
 
 	* NEWS: Support readelf -Ds for using dynamic segment to
-    print symbol table.
+	print symbol table.
 
 2023-03-03  Mark Wielaard  <mark@klomp.org>
 
diff --git a/NEWS b/NEWS
index 679d8bd5..3c63a660 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,7 @@
+Version 0.190
+
+readelf: Support readelf -Ds, --use-dynamic --symbol.
+
 Version 0.189 "Don't deflate!"
 
 configure: eu-nm, eu-addr2line and eu-stack can provide demangled symbols
@@ -17,8 +21,6 @@ elfcompress: -t, --type= now support zstd if libelf has been build with
 
 backends: Add support for LoongArch and Synopsys ARCv2 processors.
 
-readelf: Support readelf -Ds, --use-dynamic --symbol.
-
 Version 0.188 "no section left behind"
 
 readelf: Add -D, --use-dynamic option.
diff --git a/src/ChangeLog b/src/ChangeLog
index 8aa269cd..ae62f4ed 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -3,6 +3,16 @@
 	* readelf.c (options): Support dynamic symtab print with '-Ds'.
 	(process_symtab): New function.
 	(handle_dynamic_symtab): Likewise.
+	(handle_symtab): Return true if symtab was printed. Move printing
+	code to process_symtab and call that function.
+	(print_symtab): Call handle_dynamic_symtab for SHT_DYNSYM when
+	using dynamic segment.
+	(enum dyn_idx): Include i_symtab_shndx, i_verneednum and
+	i_verdefnum.
+	(process_elf_file): Only call print_symtab for SHT_SYMTAB when not
+	use_dynamic_segment.
+	(get_dynscn_addrs): Handle DT_VERDEFNUM, DT_VERNEEDNUM and
+	DT_SYMTAB_SHNDX.
 
 2023-03-03  Mark Wielaard  <mark@klomp.org>
 
diff --git a/src/readelf.c b/src/readelf.c
index 70848c21..e717954d 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -2462,9 +2462,7 @@ print_symtab (Ebl *ebl, int type)
 {
   /* Use the dynamic section info to display symbol tables.  */
   if (use_dynamic_segment && type == SHT_DYNSYM)
-  {
-	return handle_dynamic_symtab(ebl);
-  }
+    return handle_dynamic_symtab(ebl);
 
   /* Find the symbol table(s).  For this we have to search through the
      section table.  */
@@ -2778,17 +2776,15 @@ handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr)
 static bool
 handle_dynamic_symtab (Ebl *ebl)
 {
+  GElf_Phdr phdr_mem;
   GElf_Phdr *phdr = NULL;
-  /* phnum is a static variable which already fetched in function
-   * process_elf_file.  */
+  /* phnum is a static variable which was already fetched in function
+     process_elf_file.  */
   for (size_t i = 0; i < phnum; ++i)
     {
-      GElf_Phdr phdr_mem;
       phdr = gelf_getphdr (ebl->elf, i, &phdr_mem);
       if (phdr->p_type == PT_DYNAMIC)
-        {
-          break;
-        }
+	break;
     }
   if (phdr == NULL)
     return false;
@@ -5223,6 +5219,7 @@ get_dynscn_addrs(Elf *elf, GElf_Phdr *phdr, GElf_Addr addrs[i_max])
     case DT_STRSZ:
       addrs[i_strsz] = dyn->d_un.d_val;
       break;
+
     case DT_SYMTAB_SHNDX:
       addrs[i_symtab_shndx] = dyn->d_un.d_ptr;
       break;
  

Patch

From bdc19de94bff8f8812611b9ba8c0116a650d0fb5 Mon Sep 17 00:00:00 2001
From: Di Chen <dichen@redhat.com>
Date: Fri, 13 Jan 2023 20:12:43 +0800
Subject: [PATCH] readelf: display dynamic symtab without section headers

This commit adds a new option "-D/--use-dynamic" to support printing the
dynamic symbol table from the PT_DYNAMIC segment. By using the
PT_DYNAMIC segment, eu-readelf can go through the contents of dynamic
section entries and the values of each tag. From that, we can get the
address and size of the dynamic symbol table, the address of the string
table, etc.

By using the new option "-D/--use-dynamic", eu-readelf can list the
symbols without section headers.

Example:
  $ ./src/readelf -Ds a.out
      0: 0000000000000000      0 NOTYPE  LOCAL  DEFAULT    UNDEF
      1: 0000000000000000      0 FUNC    GLOBAL DEFAULT    UNDEF __libc_start_main@GLIBC_2.34 (2)
      2: 0000000000000000      0 NOTYPE  WEAK   DEFAULT    UNDEF __gmon_start__

https://sourceware.org/bugzilla/show_bug.cgi?id=28873

Signed-off-by: Di Chen <dichen@redhat.com>
---
 src/readelf.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 283 insertions(+), 6 deletions(-)

diff --git a/src/readelf.c b/src/readelf.c
index 451f8400..9e1e9b73 100644
--- a/src/readelf.c
+++ b/src/readelf.c
@@ -307,7 +307,8 @@  static void handle_relocs_rel (Ebl *ebl, GElf_Ehdr *ehdr, Elf_Scn *scn,
 static void handle_relocs_rela (Ebl *ebl, GElf_Ehdr *ehdr, Elf_Scn *scn,
 				GElf_Shdr *shdr);
 static bool print_symtab (Ebl *ebl, int type);
-static void handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr);
+static bool handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr);
+static bool handle_dynamic_symtab (Ebl *ebl);
 static void print_verinfo (Ebl *ebl);
 static void handle_verneed (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr);
 static void handle_verdef (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr);
@@ -327,7 +328,9 @@  enum dyn_idx
 {
   i_strsz,
   i_verneed,
+  i_verneednum,
   i_verdef,
+  i_verdefnum,
   i_versym,
   i_symtab,
   i_strtab,
@@ -1042,7 +1045,7 @@  process_elf_file (Dwfl_Module *dwflmod, int fd)
     symtab_printed |= print_symtab (ebl, SHT_DYNSYM);
   if (print_version_info)
     print_verinfo (ebl);
-  if (print_symbol_table)
+  if (print_symbol_table && !use_dynamic_segment)
     symtab_printed |= print_symtab (ebl, SHT_SYMTAB);
 
   if ((print_symbol_table || print_dynsym_table)
@@ -2442,6 +2445,12 @@  handle_relocs_rela (Ebl *ebl, GElf_Ehdr *ehdr, Elf_Scn *scn, GElf_Shdr *shdr)
 static bool
 print_symtab (Ebl *ebl, int type)
 {
+  /* Use the dynamic section info to display symbol tables.  */
+  if (use_dynamic_segment && type == SHT_DYNSYM)
+  {
+	return handle_dynamic_symtab(ebl);
+  }
+
   /* Find the symbol table(s).  For this we have to search through the
      section table.  */
   Elf_Scn *scn = NULL;
@@ -2480,16 +2489,275 @@  print_symtab (Ebl *ebl, int type)
 			    _("cannot get section [%zd] header: %s"),
 			    elf_ndxscn (scn), elf_errmsg (-1));
 	    }
-	  handle_symtab (ebl, scn, shdr);
-	  symtab_printed = true;
+	  symtab_printed = handle_symtab (ebl, scn, shdr);
 	}
     }
 
   return symtab_printed;
 }
 
+static bool
+handle_dynamic_symtab (Ebl *ebl)
+{
+  GElf_Phdr *phdr = NULL;
+  /* phnum is a static variable which already fetched in function process_elf_file.  */
+  for (size_t i = 0; i < phnum; ++i) {
+    GElf_Phdr phdr_mem;
+    phdr = gelf_getphdr(ebl->elf, i, &phdr_mem);
+    if (phdr->p_type == PT_DYNAMIC) {
+      break;
+    }
+  }
+  if (phdr == NULL)
+	return false;
 
-static void
+  GElf_Addr addrs[i_max] = {0,};
+  GElf_Off offs[i_max] = {0,};
+  get_dynscn_addrs(ebl->elf, phdr, addrs);
+  find_offsets(ebl->elf, 0, i_max, addrs, offs);
+
+  size_t syments;
+
+  GElf_Ehdr ehdr_mem;
+  GElf_Ehdr *ehdr = gelf_getehdr(ebl->elf, &ehdr_mem);
+
+  if (offs[i_hash] != 0) {
+    /* In the original format, .hash says the size of .dynsym.  */
+
+    size_t entsz = SH_ENTSIZE_HASH(ehdr);
+    Elf_Data *data =
+        elf_getdata_rawchunk(ebl->elf, offs[i_hash] + entsz, entsz,
+                             (entsz == 4 ? ELF_T_WORD : ELF_T_XWORD));
+    if (data != NULL)
+      syments = (entsz == 4 ? *(const GElf_Word *)data->d_buf
+                            : *(const GElf_Xword *)data->d_buf);
+  }
+  if (offs[i_gnu_hash] != 0 && syments == 0) {
+    /* In the new format, we can derive it with some work.  */
+
+    const struct {
+      Elf32_Word nbuckets;
+      Elf32_Word symndx;
+      Elf32_Word maskwords;
+      Elf32_Word shift2;
+    } * header;
+
+    Elf_Data *data = elf_getdata_rawchunk(ebl->elf, offs[i_gnu_hash],
+                                          sizeof *header, ELF_T_WORD);
+    if (data != NULL) {
+      header = data->d_buf;
+      Elf32_Word nbuckets = header->nbuckets;
+      Elf32_Word symndx = header->symndx;
+      GElf_Off buckets_at = (offs[i_gnu_hash] + sizeof *header +
+                             (gelf_getclass(ebl->elf) * sizeof(Elf32_Word) *
+                              header->maskwords));
+
+      // elf_getdata_rawchunk takes a size_t, make sure it
+      // doesn't overflow.
+  #if SIZE_MAX <= UINT32_MAX
+      if (nbuckets > SIZE_MAX / sizeof(Elf32_Word))
+        data = NULL;
+      else
+  #endif
+        data = elf_getdata_rawchunk(ebl->elf, buckets_at,
+                                    nbuckets * sizeof(Elf32_Word), ELF_T_WORD);
+      if (data != NULL && symndx < nbuckets) {
+        const Elf32_Word *const buckets = data->d_buf;
+        Elf32_Word maxndx = symndx;
+        for (Elf32_Word bucket = 0; bucket < nbuckets; ++bucket)
+          if (buckets[bucket] > maxndx)
+            maxndx = buckets[bucket];
+
+        GElf_Off hasharr_at = (buckets_at + nbuckets * sizeof(Elf32_Word));
+        hasharr_at += (maxndx - symndx) * sizeof(Elf32_Word);
+        do {
+          data = elf_getdata_rawchunk(ebl->elf, hasharr_at,
+                                      sizeof(Elf32_Word), ELF_T_WORD);
+          if (data != NULL && (*(const Elf32_Word *)data->d_buf & 1u)) {
+            syments = maxndx + 1;
+            break;
+          }
+          ++maxndx;
+          hasharr_at += sizeof(Elf32_Word);
+        } while (data != NULL);
+      }
+    }
+  }
+  if (offs[i_strtab] > offs[i_symtab] && syments == 0)
+    syments = ((offs[i_strtab] - offs[i_symtab]) /
+      gelf_fsize(ebl->elf, ELF_T_SYM, 1, EV_CURRENT));
+
+  if (syments <= 0 || offs[i_strtab] == 0 || offs[i_symtab] == 0) {
+    error_exit(0, _("Dynamic symbol information is not available for displaying symbols."));
+  }
+
+  /* All the data chunk initializaion.  */
+  Elf_Data *symdata = NULL;
+  Elf_Data *symstrdata = NULL;
+  Elf_Data *versym_data = NULL;
+  Elf_Data *verdef_data = NULL;
+  Elf_Data *verneed_data = NULL;
+
+  symdata = elf_getdata_rawchunk(
+      ebl->elf, offs[i_symtab],
+      gelf_fsize(ebl->elf, ELF_T_SYM, syments, EV_CURRENT), ELF_T_SYM);
+  symstrdata = elf_getdata_rawchunk(ebl->elf, offs[i_strtab], addrs[i_strsz],
+                                    ELF_T_BYTE);
+  versym_data = elf_getdata_rawchunk(ebl->elf, offs[i_versym],
+                                     syments * sizeof(Elf64_Half), ELF_T_HALF);
+
+  /* Get the verneed_data without vernaux.  */
+  verneed_data = elf_getdata_rawchunk(
+      ebl->elf, offs[i_verneed], addrs[i_verneednum] * sizeof(Elf64_Verneed),
+      ELF_T_VNEED);
+  size_t vernauxnum = 0;
+  size_t vn_next_offset = 0;
+
+  for (size_t i = 0; i < addrs[i_verneednum]; i++) {
+    GElf_Verneed *verneed =
+        (GElf_Verneed *)(verneed_data->d_buf + vn_next_offset);
+    vernauxnum += verneed->vn_cnt;
+    vn_next_offset += verneed->vn_next;
+  }
+
+  /* Update the verneed_data to include the vernaux.  */
+  verneed_data = elf_getdata_rawchunk(
+      ebl->elf, offs[i_verneed],
+      (addrs[i_verneednum] + vernauxnum) * sizeof(GElf_Verneed), ELF_T_VNEED);
+
+  /* Get the verdef_data without verdaux.  */
+  verdef_data = elf_getdata_rawchunk(ebl->elf, offs[i_verdef],
+                                     addrs[i_verdefnum] * sizeof(Elf64_Verdef),
+                                     ELF_T_VDEF);
+  size_t verdauxnum = 0;
+  size_t vd_next_offset = 0;
+
+  for (size_t i = 0; i < addrs[i_verdefnum]; i++) {
+    GElf_Verdef *verdef = (GElf_Verdef *)(verdef_data->d_buf + vd_next_offset);
+    verdauxnum += verdef->vd_cnt;
+    vd_next_offset += verdef->vd_next;
+  }
+
+  /* Update the verdef_data to include the verdaux.  */
+  verdef_data = elf_getdata_rawchunk(
+      ebl->elf, offs[i_verdef],
+      (addrs[i_verdefnum] + verdauxnum) * sizeof(GElf_Verdef), ELF_T_VDEF);
+
+  for (size_t i = 0; i < syments; i++) {
+    /* Get the symbol table entry.  */
+    GElf_Sym sym_mem;
+    GElf_Sym *sym;
+    sym = gelf_getsym(symdata, i, &sym_mem);
+
+    char bindbuf[64];
+    char typebuf[64];
+    char scnbuf[64];
+    int class = gelf_getclass(ebl->elf);
+    Elf32_Word xndx;
+    xndx = sym->st_shndx;
+
+    printf(_("\
+%5ld: %0*" PRIx64 " %6" PRId64 " %-7s %-6s %-9s %6s %s"),
+           i, class == ELFCLASS32 ? 8 : 16, sym->st_value, sym->st_size,
+           ebl_symbol_type_name(ebl, GELF_ST_TYPE(sym->st_info), typebuf,
+                                sizeof(typebuf)),
+           ebl_symbol_binding_name(ebl, GELF_ST_BIND(sym->st_info), bindbuf,
+                                   sizeof(bindbuf)),
+           get_visibility_type(GELF_ST_VISIBILITY(sym->st_other)),
+           ebl_section_name(ebl, sym->st_shndx, xndx, scnbuf, sizeof(scnbuf),
+                            NULL, shnum),
+           ((char *)symstrdata->d_buf) + sym->st_name);
+
+    if (versym_data != NULL) {
+      /* Get the version information.  */
+      GElf_Versym versym_mem;
+      GElf_Versym *versym = gelf_getversym(versym_data, i, &versym_mem);
+
+      if (versym != NULL && ((*versym & 0x8000) != 0 || *versym > 1)) {
+        GElf_Vernaux vernaux_mem;
+        GElf_Vernaux *vernaux = NULL;
+        size_t vn_offset = 0;
+        GElf_Verneed verneed_mem;
+        GElf_Verneed *verneed = (GElf_Verneed *)verneed_data->d_buf;
+
+        while (verneed != NULL) {
+          size_t vna_offset = vn_offset;
+
+          vernaux = gelf_getvernaux(verneed_data, vna_offset += verneed->vn_aux,
+                                    &vernaux_mem);
+          while (vernaux != NULL && vernaux->vna_other != *versym &&
+                 vernaux->vna_next != 0 &&
+                 (verneed_data->d_size - vna_offset >= vernaux->vna_next)) {
+            /* Update the offset.  */
+            vna_offset += vernaux->vna_next;
+
+            vernaux =
+                (vernaux->vna_next == 0
+                     ? NULL
+                     : gelf_getvernaux(verneed_data, vna_offset, &vernaux_mem));
+          }
+
+          /* Check whether we found the version.  */
+          if (vernaux != NULL && vernaux->vna_other == *versym)
+            /* Found it.  */
+            break;
+
+          if (verneed_data->d_size - vn_offset < verneed->vn_next)
+            break;
+
+          vn_offset += verneed->vn_next;
+          verneed =
+              (verneed->vn_next == 0
+                   ? NULL
+                   : gelf_getverneed(verneed_data, vn_offset, &verneed_mem));
+        }
+
+        if (vernaux != NULL && vernaux->vna_other == *versym) {
+          printf("@%s (%u)", (char *)symstrdata->d_buf + vernaux->vna_name,
+                 (unsigned int)vernaux->vna_other);
+        }
+
+        if (addrs[i_verdefnum] && *versym != 0x8001) {
+          /* We must test both.  */
+          size_t vd_offset = 0;
+
+          GElf_Verdef verdef_mem;
+          GElf_Verdef *verdef = gelf_getverdef(verdef_data, 0, &verdef_mem);
+          while (verdef != NULL) {
+            if (verdef->vd_ndx == (*versym & 0x7fff))
+              /* Found the definition.  */
+              break;
+
+            if (verdef_data->d_size - vd_offset < verdef->vd_next)
+              break;
+
+            vd_offset += verdef->vd_next;
+            verdef =
+                (verdef->vd_next == 0
+                     ? NULL
+                     : gelf_getverdef(verdef_data, vd_offset, &verdef_mem));
+          }
+
+          if (verdef != NULL) {
+            GElf_Verdaux verdaux_mem;
+            GElf_Verdaux *verdaux = gelf_getverdaux(
+                verdef_data, vd_offset + verdef->vd_aux, &verdaux_mem);
+
+            if (verdaux != NULL)
+              printf((*versym & 0x8000) ? "@%s" : "@@%s",
+                     (char *)symstrdata->d_buf + verdaux->vda_name);
+          }
+        }
+      }
+    }
+    putchar_unlocked('\n');
+  }
+  return true;
+}
+
+
+
+static bool
 handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr)
 {
   Elf_Data *versym_data = NULL;
@@ -2503,7 +2771,7 @@  handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr)
   /* Get the data of the section.  */
   Elf_Data *data = elf_getdata (scn, NULL);
   if (data == NULL)
-    return;
+    return false;
 
   /* Find out whether we have other sections we might need.  */
   Elf_Scn *runscn = NULL;
@@ -2730,6 +2998,7 @@  handle_symtab (Ebl *ebl, Elf_Scn *scn, GElf_Shdr *shdr)
 
       putchar_unlocked ('\n');
     }
+    return true;
 }
 
 
@@ -4955,10 +5224,18 @@  get_dynscn_addrs(Elf *elf, GElf_Phdr *phdr, GElf_Addr addrs[i_max])
       addrs[i_verdef] = dyn->d_un.d_ptr;
       break;
 
+    case DT_VERDEFNUM:
+      addrs[i_verdefnum] = dyn->d_un.d_val;
+      break;
+
     case DT_VERNEED:
       addrs[i_verneed] = dyn->d_un.d_ptr;
       break;
 
+    case DT_VERNEEDNUM:
+      addrs[i_verneednum] = dyn->d_un.d_val;
+      break;
+
     case DT_STRSZ:
       addrs[i_strsz] = dyn->d_un.d_val;
       break;
-- 
2.39.1