gdb/dwarf: call other cutu_reader constructor in ensure_lang and dw2_get_file_names
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gdb_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gdb_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gdb_check--master-aarch64 |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gdb_check--master-arm |
fail
|
Patch failed to apply
|
Commit Message
From: Simon Marchi <simon.marchi@polymtl.ca>
PR 32742 shows this failing:
$ make check TESTS="gdb.ada/access_to_unbounded_array.exp" RUNTESTFLAGS="--target_board=fission"
Running /home/simark/src/binutils-gdb/gdb/testsuite/gdb.ada/access_to_unbounded_array.exp ...
FAIL: gdb.ada/access_to_unbounded_array.exp: scenario=all: gdb_breakpoint: set breakpoint at foo.adb:23 (GDB internal error)
Or, interactively:
$ ./gdb -q -nx --data-directory=data-directory testsuite/outputs/gdb.ada/access_to_unbounded_array/foo-all -ex 'b foo.adb:23' -batch
/home/simark/src/binutils-gdb/gdb/dwarf2/read.c:19567: internal-error: set_lang: Assertion `old_value == language_unknown || old_value == language_minimal || old_value == lang' failed.
The symptom is that for a given dwarf2_per_cu, the language gets set
twice. First, set to `language_ada`, and then, to `language_minimal`.
It's unexpected for the language of a CU to get changed like this.
The CU at offset 0x0 in the main file looks like:
0x00000000: Compile Unit: length = 0x00000030, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x08 (next unit at 0x00000034)
0x0000000b: DW_TAG_compile_unit
DW_AT_low_pc [DW_FORM_addr] (0x000000000000339a)
DW_AT_high_pc [DW_FORM_data8] (0x0000000000000432)
DW_AT_stmt_list [DW_FORM_sec_offset] (0x00000000)
DW_AT_GNU_dwo_name [DW_FORM_strp] ("b~foo.dwo")
DW_AT_comp_dir [DW_FORM_strp] ("/home/simark/build/binutils-gdb/gdb/testsuite/outputs/gdb.ada/access_to_unbounded_array")
DW_AT_GNU_pubnames [DW_FORM_flag_present] (true)
DW_AT_GNU_addr_base [DW_FORM_sec_offset] (0x00000000)
DW_AT_GNU_dwo_id [DW_FORM_data8] (0x277aee54e7bd47f7)
This refers to the DWO file b~foo.dwo, whose top-level DIE is:
.debug_info.dwo contents:
0x00000000: Compile Unit: length = 0x00000b63, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x08 (next unit at 0x00000b67)
0x0000000b: DW_TAG_compile_unit
DW_AT_producer [DW_FORM_GNU_str_index] ("GNU Ada 14.2.1 20250207 -fgnat-encodings=minimal -gdwarf-4 -fdebug-types-section -fuse-ld=gold -gnatA -gnatWb -gnatiw -gdwarf-4 -gsplit-dwarf -ggnu-pubnames -gnatws -mtune=generic -march=x86-64")
DW_AT_language [DW_FORM_data1] (DW_LANG_Ada95)
DW_AT_name [DW_FORM_GNU_str_index] ("/home/simark/build/binutils-gdb/gdb/testsuite/outputs/gdb.ada/access_to_unbounded_array/b~foo.adb")
DW_AT_comp_dir [DW_FORM_GNU_str_index] ("/home/simark/build/binutils-gdb/gdb/testsuite/outputs/gdb.ada/access_to_unbounded_array")
DW_AT_GNU_dwo_id [DW_FORM_data8] (0xdbeffefab180a2cb)
The thing to note is that the language attribute is only present in the
DIE in the DWO file, not on the DIE in the main file.
The first time the language gets set is here:
#0 dwarf2_per_cu::set_lang (this=0x50f0000044b0, lang=language_ada, dw_lang=DW_LANG_Ada95) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:20788
#1 0x0000555561666af6 in cutu_reader::prepare_one_comp_unit (this=0x7ffff10bf2b0, cu=0x51700008e000, pretend_language=language_minimal) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:21029
#2 0x000055556159f740 in cutu_reader::cutu_reader (this=0x7ffff10bf2b0, this_cu=0x50f0000044b0, per_objfile=0x516000066080, abbrev_table=0x510000004640, existing_cu=0x0, skip_partial=false, pretend_language=language_minimal, cache=0x7ffff11b95e0) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:3371
#3 0x00005555615a547a in process_psymtab_comp_unit (this_cu=0x50f0000044b0, per_objfile=0x516000066080, storage=0x7ffff11b95e0) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:3799
#4 0x00005555615a9292 in cooked_index_worker_debug_info::process_cus (this=0x51700008dc80, task_number=0, first=std::unique_ptr<dwarf2_per_cu> = {...}, end=std::unique_ptr<dwarf2_per_cu> = {...}) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:4122
In this code path (particularly this specific cutu_reader constructir),
the work is done to find and read the DWO file. So the language is
properly identifier as language_ada, all good so far.
The second time the language gets set is:
#0 dwarf2_per_cu::set_lang (this=0x50f0000044b0, lang=language_minimal, dw_lang=0) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:20788
#1 0x0000555561666af6 in cutu_reader::prepare_one_comp_unit (this=0x7ffff0f42730, cu=0x517000091b80, pretend_language=language_minimal) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:21029
#2 0x00005555615a1822 in cutu_reader::cutu_reader (this=0x7ffff0f42730, this_cu=0x50f0000044b0, per_objfile=0x516000066080, pretend_language=language_minimal, parent_cu=0x0, dwo_file=0x0) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:3464
#3 0x000055556158c850 in dw2_get_file_names (this_cu=0x50f0000044b0, per_objfile=0x516000066080) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:1956
#4 0x000055556158f4f5 in dw_expand_symtabs_matching_file_matcher (per_objfile=0x516000066080, file_matcher=...) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:2157
#5 0x00005555616329e2 in cooked_index_functions::expand_symtabs_matching (this=0x50200002ab50, objfile=0x516000065780, file_matcher=..., lookup_name=0x0, symbol_matcher=..., expansion_notify=..., search_flags=..., domain=..., lang_matcher=...) at /home/smarchi/src/binutils-gdb/gdb/dwarf2/read.c:15912
#6 0x0000555562ca8a14 in objfile::map_symtabs_matching_filename (this=0x516000065780, name=0x50200002ad90 "break pck.adb", real_path=0x0, callback=...) at /home/smarchi/src/binutils-gdb/gdb/symfile-debug.c:207
#7 0x0000555562d68775 in iterate_over_symtabs (pspace=0x513000005600, name=0x50200002ad90 "break pck.adb", callback=...) at /home/smarchi/src/binutils-gdb/gdb/symtab.c:727
Here, we use the other cutu_reader constructor, the one that does not
look up the DWO file for the passed CU. If a DWO file exists for this
CU, the caller is expected to pass it as a parameter. That cutu_reader
constructor also ends up setting the language of the CU. But because it
didn't read the DWO file, it didn't figure out the language is
language_ada, so it tries to set the language to the default,
language_minimal.
A question is: why do we end up trying to set the CU's language is this
context. This is completely unrelated to what we're trying to do, that
is get the file names from the line table. Setting the language is a
side-effect of just constructing a cutu_reader, which we need to look up
attributes in dw2_get_file_names_reader. There are probably some
cleanups to be done here, to avoid doing useless work like looking up
and setting the CU's language when all we need is an object to help
reading the DIEs and attributes. But that is future work.
The same cutu_reader constructor is used in
`dwarf2_per_cu::ensure_lang`. Not sure if that can actually happen, but
imagine that the CU's language is not set yet, and that method gets
called. Since this is the version of cutu_reader that does not look up
the DWO file, it will conclude that the language is language_minimal and
set that as the CU's language. In other words,
`dwarf2_per_cu::ensure_lang` will get the language wrong, pretty ironic.
Fix this by using the other cutu_reader constructor in those two spots.
Pass `per_objfile->get_cu (this_cu)`, as the `existing_cu` parameter. I
think this is necessary, because that constructor has an assert to check
that if `existing_cu` is nullptr, then there must not be an existing
`dwarf2_cu` in the per_objfile.
To avoid getting things wrong like this, I think that the second
cutu_reader constructor should be reserved for the spots that do pass a
non-nullptr dwo_file. The only spot at the moment in
create_cus_hash_table, where we read multiple units from the same DWO
file. In this context, I guess it makes sense for efficiency to get the
dwo_file once and pass it down to cutu_reader. For that constructor,
make the parameters non-optional, add "non-nullptr" asserts, and update
the code to assume the passed values are not nullptr.
What I don't know is if this change is problematic thread-wise, if the
functions I have modified to use the other cutu_reader constructor can
be called concurrently in worker threads. If so, I think it would be
problematic.
Bug: https://sourceware.org/bugzilla/show_bug.cgi?id=32742
Change-Id: I980d16875b9a43ab90e251504714d0d41165c7c8
---
gdb/dwarf2/read.c | 23 ++++++++++++-----------
gdb/dwarf2/read.h | 4 ++--
2 files changed, 14 insertions(+), 13 deletions(-)
base-commit: 34177f19831dc1740fcf3741f9971254b1d3eb4f
Comments
>>>>> "Simon" == simon marchi <simon.marchi@polymtl.ca> writes:
Simon> The same cutu_reader constructor is used in
Simon> `dwarf2_per_cu::ensure_lang`. Not sure if that can actually happen, but
Simon> imagine that the CU's language is not set yet, and that method gets
Simon> called.
Supposedly, but I don't recall how. My other branch can trigger this
for sure though.
Simon> To avoid getting things wrong like this, I think that the second
Simon> cutu_reader constructor should be reserved for the spots that do pass a
Simon> non-nullptr dwo_file. The only spot at the moment in
Simon> create_cus_hash_table, where we read multiple units from the same DWO
Simon> file. In this context, I guess it makes sense for efficiency to get the
Simon> dwo_file once and pass it down to cutu_reader. For that constructor,
Simon> make the parameters non-optional, add "non-nullptr" asserts, and update
Simon> the code to assume the passed values are not nullptr.
Not something to solve in this patch, but a classic C++ way to enforce
this is to use references rather than pointers. Then it's more or less
solved by the type system.
Simon> What I don't know is if this change is problematic thread-wise, if the
Simon> functions I have modified to use the other cutu_reader constructor can
Simon> be called concurrently in worker threads. If so, I think it would be
Simon> problematic.
I think it's just dw2_get_file_names and ensure_lang, right?
Looking at the callers, those seem fine.
Approved-By: Tom Tromey <tom@tromey.com>
Tom
On 2025-03-07 11:55, Tom Tromey wrote:
>>>>>> "Simon" == simon marchi <simon.marchi@polymtl.ca> writes:
>
> Simon> The same cutu_reader constructor is used in
> Simon> `dwarf2_per_cu::ensure_lang`. Not sure if that can actually happen, but
> Simon> imagine that the CU's language is not set yet, and that method gets
> Simon> called.
>
> Supposedly, but I don't recall how. My other branch can trigger this
> for sure though.
Ehh yeah, re-reading my sentence, I realized it sounds a bit stupid. If
the language was always set when this method gets called, we wouldn't
need this method at all. I will just remove that sentence.
> Simon> To avoid getting things wrong like this, I think that the second
> Simon> cutu_reader constructor should be reserved for the spots that do pass a
> Simon> non-nullptr dwo_file. The only spot at the moment in
> Simon> create_cus_hash_table, where we read multiple units from the same DWO
> Simon> file. In this context, I guess it makes sense for efficiency to get the
> Simon> dwo_file once and pass it down to cutu_reader. For that constructor,
> Simon> make the parameters non-optional, add "non-nullptr" asserts, and update
> Simon> the code to assume the passed values are not nullptr.
>
> Not something to solve in this patch, but a classic C++ way to enforce
> this is to use references rather than pointers. Then it's more or less
> solved by the type system.
I agree, and it's good documentation for the humans who read the code.
> Simon> What I don't know is if this change is problematic thread-wise, if the
> Simon> functions I have modified to use the other cutu_reader constructor can
> Simon> be called concurrently in worker threads. If so, I think it would be
> Simon> problematic.
>
> I think it's just dw2_get_file_names and ensure_lang, right?
> Looking at the callers, those seem fine.
Yes, there were 3 callers of that constructor, I removed these two, the
only one is create_cus_hash_table, which is "legitimate".
> Approved-By: Tom Tromey <tom@tromey.com>
Thanks, will push.
Simon
@@ -1859,7 +1859,9 @@ dw2_get_file_names (dwarf2_per_cu *this_cu, dwarf2_per_objfile *per_objfile)
if (this_cu->files_read)
return this_cu->file_names;
- cutu_reader reader (this_cu, per_objfile, language_minimal);
+ cutu_reader reader (this_cu, per_objfile, nullptr,
+ per_objfile->get_cu (this_cu), true, language_minimal,
+ nullptr);
if (!reader.is_dummy ())
dw2_get_file_names_reader (reader.cu (), reader.comp_unit_die ());
@@ -3296,9 +3298,11 @@ cutu_reader::cutu_reader (dwarf2_per_cu *this_cu,
struct objfile *objfile = per_objfile->objfile;
struct dwarf2_section_info *section = this_cu->section;
bfd *abfd = section->get_bfd_owner ();
- struct dwarf2_section_info *abbrev_section;
const gdb_byte *begin_info_ptr, *info_ptr;
+ gdb_assert (parent_cu != nullptr);
+ gdb_assert (dwo_file != nullptr);
+
if (dwarf_die_debug)
gdb_printf (gdb_stdlog, "Reading %s unit at offset %s\n",
this_cu->is_debug_types ? "type" : "comp",
@@ -3306,9 +3310,7 @@ cutu_reader::cutu_reader (dwarf2_per_cu *this_cu,
gdb_assert (per_objfile->get_cu (this_cu) == nullptr);
- abbrev_section = (dwo_file != NULL
- ? &dwo_file->sections.abbrev
- : get_abbrev_section_for_cu (this_cu));
+ dwarf2_section_info *abbrev_section = &dwo_file->sections.abbrev;
/* This is cheap if the section is already read in. */
section->read (objfile);
@@ -3322,11 +3324,9 @@ cutu_reader::cutu_reader (dwarf2_per_cu *this_cu,
? rcuh_kind::TYPE
: rcuh_kind::COMPILE));
- if (parent_cu != nullptr)
- {
- m_new_cu->str_offsets_base = parent_cu->str_offsets_base;
- m_new_cu->addr_base = parent_cu->addr_base;
- }
+ m_new_cu->str_offsets_base = parent_cu->str_offsets_base;
+ m_new_cu->addr_base = parent_cu->addr_base;
+
this_cu->set_length (m_new_cu->header.get_length_with_initial ());
/* Skip dummy compilation units. */
@@ -19658,7 +19658,8 @@ dwarf2_per_cu::ensure_lang (dwarf2_per_objfile *per_objfile)
/* Constructing this object will set the language as a side
effect. */
- cutu_reader reader (this, per_objfile, language_minimal);
+ cutu_reader reader (this, per_objfile, nullptr, per_objfile->get_cu (this),
+ true, language_minimal, nullptr);
}
/* A helper function for dwarf2_find_containing_comp_unit that returns
@@ -938,8 +938,8 @@ class cutu_reader
cutu_reader (dwarf2_per_cu *this_cu,
dwarf2_per_objfile *per_objfile,
enum language pretend_language,
- struct dwarf2_cu *parent_cu = nullptr,
- struct dwo_file *dwo_file = nullptr);
+ struct dwarf2_cu *parent_cu,
+ struct dwo_file *dwo_file);
DISABLE_COPY_AND_ASSIGN (cutu_reader);