[07/14] libdw: Recognize .debug_[ct]u_index sections in dwarf_elf_begin
Commit Message
From: Omar Sandoval <osandov@fb.com>
DWARF package (.dwp) files have a .debug_cu_index section and,
optionally, a .debug_tu_index section. Add them to the list of DWARF
sections.
Unfortunately, it's not that simple: the other debug sections in a dwp
file have names ending with .dwo, which confuses the checks introduced
by commit 5b21e70216b8 ("libdw: dwarf_elf_begin should use either plain,
dwo or lto DWARF sections."). So, we also have to special case
.debug_cu_index and .debug_tu_index in scn_dwarf_type and check_section
to treat them as TYPE_DWO sections.
Signed-off-by: Omar Sandoval <osandov@fb.com>
---
libdw/ChangeLog | 8 +++++++
libdw/dwarf_begin_elf.c | 53 +++++++++++++++++++++--------------------
libdw/libdwP.h | 2 ++
3 files changed, 37 insertions(+), 26 deletions(-)
Comments
Hi Omar,
On Wed, 2023-09-27 at 11:20 -0700, Omar Sandoval wrote:
> From: Omar Sandoval <osandov@fb.com>
>
> DWARF package (.dwp) files have a .debug_cu_index section and,
> optionally, a .debug_tu_index section. Add them to the list of DWARF
> sections.
>
> Unfortunately, it's not that simple: the other debug sections in a dwp
> file have names ending with .dwo, which confuses the checks introduced
> by commit 5b21e70216b8 ("libdw: dwarf_elf_begin should use either plain,
> dwo or lto DWARF sections."). So, we also have to special case
> .debug_cu_index and .debug_tu_index in scn_dwarf_type and check_section
> to treat them as TYPE_DWO sections.
This seems to work, but I wonder if we should have a specific TYPE_DWP?
It doesn't really matter now, because the enum dwarf_type is only used
internally. But I was hoping to extend the dwarf_begin interface with a
flag so that you can open a DWARF as a specific type. For example there
are single file split DWARF files. Which contain both "plain" and
".dwo" sections. Currently you can only open them as "plain", but there
are actually two "views" of such files.
https://sourceware.org/bugzilla/show_bug.cgi?id=28573
Do you think there are reasons to open a file as either TYPE_DWO or
TYPE_DWP? Or doesn't that not make sense?
> Signed-off-by: Omar Sandoval <osandov@fb.com>
> ---
> libdw/ChangeLog | 8 +++++++
> libdw/dwarf_begin_elf.c | 53 +++++++++++++++++++++--------------------
> libdw/libdwP.h | 2 ++
> 3 files changed, 37 insertions(+), 26 deletions(-)
>
> diff --git a/libdw/ChangeLog b/libdw/ChangeLog
> index be1e40bc..52327688 100644
> --- a/libdw/ChangeLog
> +++ b/libdw/ChangeLog
> @@ -20,6 +20,14 @@
> instead of dbg parameter, which is now unused.
> * libdwP.h (Dwarf_Macro_Op_Table): Replace is_64bit with address_size
> and offset_size. Add dbg.
> + Add IDX_debug_cu_index and IDX_debug_tu_index.
> + * dwarf_begin_elf.c (dwarf_scnnames): Add IDX_debug_cu_index and
> + IDX_debug_tu_index.
> + (scn_to_string_section_idx): Ditto.
> + (scn_dwarf_type): Check for .debug_cu_index, .debug_tu_index,
> + .zdebug_cu_index, and .zdebug_tu_index.
> + (check_section): Change .dwo suffix matching to account for
> + .debug_cu_index and .debug_tu_index.
>
> 2023-02-22 Mark Wielaard <mark@klomp.org>
>
> diff --git a/libdw/dwarf_begin_elf.c b/libdw/dwarf_begin_elf.c
> index 92d76d24..7936d343 100644
> --- a/libdw/dwarf_begin_elf.c
> +++ b/libdw/dwarf_begin_elf.c
> @@ -67,6 +67,8 @@ static const char dwarf_scnnames[IDX_last][19] =
> [IDX_debug_macro] = ".debug_macro",
> [IDX_debug_ranges] = ".debug_ranges",
> [IDX_debug_rnglists] = ".debug_rnglists",
> + [IDX_debug_cu_index] = ".debug_cu_index",
> + [IDX_debug_tu_index] = ".debug_tu_index",
> [IDX_gnu_debugaltlink] = ".gnu_debugaltlink"
> };
> #define ndwarf_scnnames (sizeof (dwarf_scnnames) / sizeof (dwarf_scnnames[0]))
> @@ -92,6 +94,8 @@ static const enum string_section_index scn_to_string_section_idx[IDX_last] =
> [IDX_debug_macro] = STR_SCN_IDX_last,
> [IDX_debug_ranges] = STR_SCN_IDX_last,
> [IDX_debug_rnglists] = STR_SCN_IDX_last,
> + [IDX_debug_cu_index] = STR_SCN_IDX_last,
> + [IDX_debug_tu_index] = STR_SCN_IDX_last,
> [IDX_gnu_debugaltlink] = STR_SCN_IDX_last
> };
OK
> @@ -109,6 +113,11 @@ scn_dwarf_type (Dwarf *result, size_t shstrndx, Elf_Scn *scn)
> {
> if (startswith (scnname, ".gnu.debuglto_.debug"))
> return TYPE_GNU_LTO;
> + else if (strcmp (scnname, ".debug_cu_index") == 0
> + || strcmp (scnname, ".debug_tu_index") == 0
> + || strcmp (scnname, ".zdebug_cu_index") == 0
> + || strcmp (scnname, ".zdebug_tu_index") == 0)
> + return TYPE_DWO;
> else if (startswith (scnname, ".debug_") || startswith (scnname, ".zdebug_"))
> {
> size_t len = strlen (scnname);
OK
> @@ -173,42 +182,34 @@ check_section (Dwarf *result, size_t shstrndx, Elf_Scn *scn, bool inscngrp)
> bool gnu_compressed = false;
> for (cnt = 0; cnt < ndwarf_scnnames; ++cnt)
> {
> + /* .debug_cu_index and .debug_tu_index don't have a .dwo suffix,
> + but they are for DWO. */
> + if (result->type != TYPE_DWO
> + && (cnt == IDX_debug_cu_index || cnt == IDX_debug_tu_index))
> + continue;
> + bool need_dot_dwo =
> + (result->type == TYPE_DWO
> + && cnt != IDX_debug_cu_index
> + && cnt != IDX_debug_tu_index);
> size_t dbglen = strlen (dwarf_scnnames[cnt]);
> size_t scnlen = strlen (scnname);
> if (strncmp (scnname, dwarf_scnnames[cnt], dbglen) == 0
> - && (dbglen == scnlen
> - || (scnlen == dbglen + 4
> + && ((!need_dot_dwo && dbglen == scnlen)
> + || (need_dot_dwo
> + && scnlen == dbglen + 4
> && strstr (scnname, ".dwo") == scnname + dbglen)))
> - {
> - if (dbglen == scnlen)
> - {
> - if (result->type == TYPE_PLAIN)
> - break;
> - }
> - else if (result->type == TYPE_DWO)
> - break;
> - }
> + break;
> else if (scnname[0] == '.' && scnname[1] == 'z'
> && (strncmp (&scnname[2], &dwarf_scnnames[cnt][1],
> dbglen - 1) == 0
> - && (scnlen == dbglen + 1
> - || (scnlen == dbglen + 5
> + && ((!need_dot_dwo && scnlen == dbglen + 1)
> + || (need_dot_dwo
> + && scnlen == dbglen + 5
> && strstr (scnname,
> ".dwo") == scnname + dbglen + 1))))
> {
> - if (scnlen == dbglen + 1)
> - {
> - if (result->type == TYPE_PLAIN)
> - {
> - gnu_compressed = true;
> - break;
> - }
> - }
> - else if (result->type <= TYPE_DWO)
> - {
> - gnu_compressed = true;
> - break;
> - }
> + gnu_compressed = true;
> + break;
> }
> else if (scnlen > 14 /* .gnu.debuglto_ prefix. */
> && startswith (scnname, ".gnu.debuglto_")
OK. It took me a bit to realize how this works. But the idea is that we
do two scans over the section names in global_read the first one sets
the result->type and then in the second scan this function is called to
check the found section is part of the type. That we also set
gnu_compressed in this loop makes thing extra funny. But you did
simplify that check. Thanks.
> diff --git a/libdw/libdwP.h b/libdw/libdwP.h
> index 77959b3b..0c44d40c 100644
> --- a/libdw/libdwP.h
> +++ b/libdw/libdwP.h
> @@ -82,6 +82,8 @@ enum
> IDX_debug_macro,
> IDX_debug_ranges,
> IDX_debug_rnglists,
> + IDX_debug_cu_index,
> + IDX_debug_tu_index,
> IDX_gnu_debugaltlink,
> IDX_last
> };
OK.
On Wed, Nov 01, 2023 at 03:03:57PM +0100, Mark Wielaard wrote:
> Hi Omar,
>
> On Wed, 2023-09-27 at 11:20 -0700, Omar Sandoval wrote:
> > From: Omar Sandoval <osandov@fb.com>
> >
> > DWARF package (.dwp) files have a .debug_cu_index section and,
> > optionally, a .debug_tu_index section. Add them to the list of DWARF
> > sections.
> >
> > Unfortunately, it's not that simple: the other debug sections in a dwp
> > file have names ending with .dwo, which confuses the checks introduced
> > by commit 5b21e70216b8 ("libdw: dwarf_elf_begin should use either plain,
> > dwo or lto DWARF sections."). So, we also have to special case
> > .debug_cu_index and .debug_tu_index in scn_dwarf_type and check_section
> > to treat them as TYPE_DWO sections.
>
> This seems to work, but I wonder if we should have a specific TYPE_DWP?
I tried this, and it made check_section even more confusing (because
then we need a more complicated check than result->type == section
type). I came to the same conclusion that since this is internal for
now, it didn't really matter. Although maybe the name TYPE_SPLIT would
make more sense now rather than overloading TYPE_DWO.
When this becomes public, separate TYPE_DWO and TYPE_DWP types might be
nicer so that a hypothetical dwarf_get_type function could tell you
whether you got a dwo file or a dwp file.
> It doesn't really matter now, because the enum dwarf_type is only used
> internally. But I was hoping to extend the dwarf_begin interface with a
> flag so that you can open a DWARF as a specific type. For example there
> are single file split DWARF files. Which contain both "plain" and
> ".dwo" sections. Currently you can only open them as "plain", but there
> are actually two "views" of such files.
>
> https://sourceware.org/bugzilla/show_bug.cgi?id=28573
>
> Do you think there are reasons to open a file as either TYPE_DWO or
> TYPE_DWP? Or doesn't that not make sense?
If we were to treat a dwp file as a dwo file, I guess we'd have to
pretend it only contained one unit and ignore the rest, which I don't
think makes much sense. So even if we had separate types for them, I
don't know if we should allow that.
@@ -20,6 +20,14 @@
instead of dbg parameter, which is now unused.
* libdwP.h (Dwarf_Macro_Op_Table): Replace is_64bit with address_size
and offset_size. Add dbg.
+ Add IDX_debug_cu_index and IDX_debug_tu_index.
+ * dwarf_begin_elf.c (dwarf_scnnames): Add IDX_debug_cu_index and
+ IDX_debug_tu_index.
+ (scn_to_string_section_idx): Ditto.
+ (scn_dwarf_type): Check for .debug_cu_index, .debug_tu_index,
+ .zdebug_cu_index, and .zdebug_tu_index.
+ (check_section): Change .dwo suffix matching to account for
+ .debug_cu_index and .debug_tu_index.
2023-02-22 Mark Wielaard <mark@klomp.org>
@@ -67,6 +67,8 @@ static const char dwarf_scnnames[IDX_last][19] =
[IDX_debug_macro] = ".debug_macro",
[IDX_debug_ranges] = ".debug_ranges",
[IDX_debug_rnglists] = ".debug_rnglists",
+ [IDX_debug_cu_index] = ".debug_cu_index",
+ [IDX_debug_tu_index] = ".debug_tu_index",
[IDX_gnu_debugaltlink] = ".gnu_debugaltlink"
};
#define ndwarf_scnnames (sizeof (dwarf_scnnames) / sizeof (dwarf_scnnames[0]))
@@ -92,6 +94,8 @@ static const enum string_section_index scn_to_string_section_idx[IDX_last] =
[IDX_debug_macro] = STR_SCN_IDX_last,
[IDX_debug_ranges] = STR_SCN_IDX_last,
[IDX_debug_rnglists] = STR_SCN_IDX_last,
+ [IDX_debug_cu_index] = STR_SCN_IDX_last,
+ [IDX_debug_tu_index] = STR_SCN_IDX_last,
[IDX_gnu_debugaltlink] = STR_SCN_IDX_last
};
@@ -109,6 +113,11 @@ scn_dwarf_type (Dwarf *result, size_t shstrndx, Elf_Scn *scn)
{
if (startswith (scnname, ".gnu.debuglto_.debug"))
return TYPE_GNU_LTO;
+ else if (strcmp (scnname, ".debug_cu_index") == 0
+ || strcmp (scnname, ".debug_tu_index") == 0
+ || strcmp (scnname, ".zdebug_cu_index") == 0
+ || strcmp (scnname, ".zdebug_tu_index") == 0)
+ return TYPE_DWO;
else if (startswith (scnname, ".debug_") || startswith (scnname, ".zdebug_"))
{
size_t len = strlen (scnname);
@@ -173,42 +182,34 @@ check_section (Dwarf *result, size_t shstrndx, Elf_Scn *scn, bool inscngrp)
bool gnu_compressed = false;
for (cnt = 0; cnt < ndwarf_scnnames; ++cnt)
{
+ /* .debug_cu_index and .debug_tu_index don't have a .dwo suffix,
+ but they are for DWO. */
+ if (result->type != TYPE_DWO
+ && (cnt == IDX_debug_cu_index || cnt == IDX_debug_tu_index))
+ continue;
+ bool need_dot_dwo =
+ (result->type == TYPE_DWO
+ && cnt != IDX_debug_cu_index
+ && cnt != IDX_debug_tu_index);
size_t dbglen = strlen (dwarf_scnnames[cnt]);
size_t scnlen = strlen (scnname);
if (strncmp (scnname, dwarf_scnnames[cnt], dbglen) == 0
- && (dbglen == scnlen
- || (scnlen == dbglen + 4
+ && ((!need_dot_dwo && dbglen == scnlen)
+ || (need_dot_dwo
+ && scnlen == dbglen + 4
&& strstr (scnname, ".dwo") == scnname + dbglen)))
- {
- if (dbglen == scnlen)
- {
- if (result->type == TYPE_PLAIN)
- break;
- }
- else if (result->type == TYPE_DWO)
- break;
- }
+ break;
else if (scnname[0] == '.' && scnname[1] == 'z'
&& (strncmp (&scnname[2], &dwarf_scnnames[cnt][1],
dbglen - 1) == 0
- && (scnlen == dbglen + 1
- || (scnlen == dbglen + 5
+ && ((!need_dot_dwo && scnlen == dbglen + 1)
+ || (need_dot_dwo
+ && scnlen == dbglen + 5
&& strstr (scnname,
".dwo") == scnname + dbglen + 1))))
{
- if (scnlen == dbglen + 1)
- {
- if (result->type == TYPE_PLAIN)
- {
- gnu_compressed = true;
- break;
- }
- }
- else if (result->type <= TYPE_DWO)
- {
- gnu_compressed = true;
- break;
- }
+ gnu_compressed = true;
+ break;
}
else if (scnlen > 14 /* .gnu.debuglto_ prefix. */
&& startswith (scnname, ".gnu.debuglto_")
@@ -82,6 +82,8 @@ enum
IDX_debug_macro,
IDX_debug_ranges,
IDX_debug_rnglists,
+ IDX_debug_cu_index,
+ IDX_debug_tu_index,
IDX_gnu_debugaltlink,
IDX_last
};