[RFC] Really fix gdb/23010: SYMBOL_LANGUAGE != DICT_LANGUAGE assertion

Message ID 20180605180912.3256-1-keiths@redhat.com
State New, archived
Headers

Commit Message

Keith Seitz June 5, 2018, 6:09 p.m. UTC
  This patch is another attempt at really fixing the multitude of assertions
being seen where symbols of one language are being added to symbol lists of
another language.

In short, this is happening because read_file_scope was reading DIEs without
having its language set. As a result, the default language, language_minimal,
was being passed to imported DIE trees and, in certain specific situations,
causing the SYMBOL_LANGUAGE != DICT_LANGUAGE assertion being widely reported.

For a more thorough explanation, see the following mailing list
post:

  https://sourceware.org/ml/gdb-patches/2018-05/msg00703.html

Since a test reproducer for this has been so elusive, this patch also adds a
wrapper function around add_symbol_to_list which asserts  when adding a symbol
of one language to a list containing symbols of a different language.

For this RFC, I have left both the assertion (which I prefer) and the
complaint variation in the patch. I will remove the unused bits upon
approval.

gdb/ChangeLog:

	PR gdb/23010
	* dwarf2read.c (dw2_add_symbol_to_list): New function.
	(fixup_go_packaging, new_symbol): Use dw2_add_symbol_to_list
	instead of add_symbol_to_list.
	(read_file_scope): Call prepare_one_comp_unit before reading
	any other DIEs.
---
 gdb/dwarf2read.c | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)
  

Comments

Keith Seitz June 6, 2018, 5:48 p.m. UTC | #1
On 06/05/2018 11:09 AM, Keith Seitz wrote:
> 
> Since a test reproducer for this has been so elusive, this patch also adds a
> wrapper function around add_symbol_to_list which asserts  when adding a symbol
> of one language to a list containing symbols of a different language.

Pedro asked me to post my work-in-progress (but currently abandoned) test case for this. NOTE this is for archival purposes only. The test does /not/ currently work.

It is an attempt to reproduce the DIE tree from libwebkit on Fedora 28.

Keith





# Copyright 2018 Free Software Foundation, Inc.

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Tests involving DW_TAG_partial_units.

load_lib dwarf.exp

if {![dwarf2_support]} { return 0 }

standard_testfile partial-unit-dw.S

# Make some DWARF for the test.
set asm_file [standard_output_file $srcfile]
Dwarf::assemble $asm_file {
    # These labels are already defined
    declare_labels \
	d29 d32 d45 d4e d9f da8 d1d7 d1e0 d234f d238c d2541 d13777e d137787 \
	d6d70fd d6d7d70 d6d7d78 d6d7d7f d6d7d8c d6d7d99 d6220f94 \
	d624db4a d624db62 d624db70 d624db82 d624db98 d624dba1 d624dbb3 \
	d624dbbe d624dbd3 d624dbd8 d624dbe1 d624dbe6 d624dbfc d624dc01 \
	d624dc0f d624dc18 d624dc21 d624dc39 d624dc43 d624dc62 d624dc74 \
	d6d7124 d6d712b d6d76c7 d6d7cf1 d6d7cf4 d6d76d9 d6d76dc d624dc59 \
	d6d789d d6d78a0 d6d788c d624dbec d6d7e3c 6d7e44 d6d7d02 d6d7cfe \
	d6d79af d6d7dde d6d7df6 d6d7dfd d6d7e04 d6d796d d6d797e d6d7985 \
	d6d7988 d6d7ac4 d6d7ada d6d7cdf d6d7baf d6d79b1 d6d7b62 d6s7b71 \
	d6d7b80 d6d7b8f d6d7b9e d6d7ba5 d6d7de6 d6d7ce3 d6d7eca d6d7ed2 \
	d6d7507 d6d751b d6d74e1 d6d7eda d6d7ee2 d6d74ef d6d7503 d6d7e4c \
	d6d7e54 d6d749b d6d74b1 d496ce d496d7 d6d7b71 d6d7dee d6d7e11 d6d7373 \
	d6d7389 d6d7e44 d624dbf2 d624dc0a

    cu {} {
	d29: partial_unit {
	    {comp_dir ".../Source/WebKit"}
	} {
	    d32: base_type {
		{byte_size 8 sdata}
		{encoding @DW_ATE_unsigned}
		{name "long unsigned int"}
	    }
	}
    }

    cu {} {
	d45: partial_unit {
	    {comp_dir ".../Source/WebKit"}
	} {
	    d4e: base_type {
		{byte_size 4 sdata}
		{encoding @DW_ATE_unsigned}
		{name "unsigned int"}
	    }
	}
    }

    cu {} {
	d9f: partial_unit {
	    {comp_dir ".../Source_WebKit"}
	} {
	    da8: base_type {
		{byte_size 8 sdata}
		{encoding @DW_ATE_signed}
		{name "long int"}
	    }
	}
    }

    cu {} {
	d1d7: partial_unit {
	    {comp_dir ".../Source/WebKit"}
	} {
	    d1e0: typedef {
		{name "size_t"}
		{type %$d32}
	    }
	}
    }

    cu {} {
	d234f: partial_unit {
	    {comp_dir ".../Source/WebKit"}
	} {
	    d238c: DW_TAG_namespace {
		{name "WTF"}
	    } {
		d2541: structure_type {
		    {name "FastMalloc"}
		    {byte_size 1 sdata}
		} {
		}
	    }
	}
    }

    cu {} {
	d496ce: partial_unit {
	    {comp_dir ".../Source/WebKit"}
	} {
	    d496d7: rvalue_reference_type {
		{byte_size 8 sdata}
		{type %$da8}
	    }
	}
    }

    cu {} {
	d13777e: partial_unit {
	    {comp_dir ".../Source/WebKit"}
	} {
	    d137787: pointer_type {
		{byte_size 8 sdata}
		{type %$da8}
	    }
	}
    }

    cu {} {
	d6d70fd: partial_unit {} {
	    d6d7124: DW_TAG_namespace {
		{name "WTF"}
	    } {
		d6d712b: class_type {
		    {name "Vector<long int, 0, WTF::CrashOnOverflow, 16, WTF::FastMalloc>"}
		    {byte_size 24 sdata}
		} {
		    d6d7373: subprogram {
			{external 1 flag}
			{name "capacity"}
			{linkage_name "_ZNK3WTF6VectorIlLm0ENS_15CrashOnOverflowELm16ENS_10FastMallocEE8capacityEv"}
			{type %$d1e0}
			{accessibility @DW_ACCESS_public}
			{declaration 1 flag}
			{object_pointer %$d6d7389}
		    } {
			d6d7389: formal_parameter {
			    {type %$d6d7cfe}
			    {artificial 1 flag}
			}
		    }
		    d6d749b: subprogram {
			{external 1 flag}
			{name "data"}
			{linkage_name "_ZN3WTF6VectorIlLm0ENS_15CrashOnOverflowELm16ENS_10FastMallocEE4dataEv"}
			{type %$d137787}
			{accessibility @DW_ACCESS_public}
			{declaration 1 flag}
			{object_pointer %$d6d74b1}
		    } {
			d6d74b1: formal_parameter {
			    {type %$d6d7cf1}
			    {artificial 1 flag}
			}
		    }
		    d6d74e1: typedef {
			{name "iterator"}
			{type %$d137787}
			{accessibility @DW_ACCESS_public}
		    }
		    d6d74ef: subprogram {
			{external 1 flag}
			{name "begin"}
			{linkage_name "_ZN3WTF6VectorIlLm0ENS_15CrashOnOverflowELm16ENS_10FastMallocEE5beginEv"}
			{type %$d6d74e1}
			{accessibility @DW_ACCESS_public}
			{declaration 1 flag}
			{object_pointer %$d6d7503}
		    } {
			d6d7503: formal_parameter {
			    {type %$d6d7cf1}
			    {artificial 1 flag}
			}
		    }
		    d6d7507: subprogram {
			{external 1 flag}
			{name "end"}
			{linkage_name "_ZN3WTF6VectorIlLm0ENS_15CrashOnOverflowELm16ENS_10FastMallocEE3endEv"}
			{type %$d6d74e1}
			{accessibility @DW_ACCESS_public}
			{declaration 1 flag}
			{object_pointer %$d6d751b}
		    } {
			d6d751b: formal_parameter {
			    {type %$d6d7cf1}
			    {artificial 1 flag}
			}
		    }
		    d6d76c7: subprogram {
			{external 1 flag}
			{name "reserveCapacity"}
			{linkage_name "_ZN3WTF6VectorIlLm0ENS_15CrashOnOverflowELm16ENS_10FastMallocEE15reserveCapacityEm"}
			{accessibility @DW_ACCESS_public}
			{declaration 1 flag}
			{object_pointer %$d6d76d9}
		    } {
			d6d76d9: formal_parameter {
			    {type %$d6d7cf1}
			    {artificial 1 flag}
			}
			d6d76dc: formal_parameter {
			    {type %$d1e0}
			}
		    }
		    d6d788c: subprogram {
			{external 1 flag}
			{name "expandCapacity"}
			{linkage_name "_ZN3WTF6VectorIlLm0ENS_15CrashOnOverflowELm16ENS_10FastMallocEE14expandCapacityEm"}
			{declaration 1 flag}
			{object_pointer %$d6d789d}
		    } {
			d6d789d: formal_parameter {
			    {type %$d6d7cf1}
			    {artificial 1 flag}
			}
			d6d78a0: formal_parameter {
			    {type %$d1e0}
			}
		    }
		}
		d6d796d: subprogram {
		    {external 1 flag}
		    {name "appendSlowCase<long int>"}
		    {linkage_name "_ZN3WTF6VectorIlLm0ENS_15CrashOnOverflowELm16ENS_10FastMallocEE14appendSlowCaseIlEEvOT_"}
		    {declaration 1 flag}
		    {object_pointer %$d6d7985}
		} {
		    d6d797e: template_type_param {
			{name "U"}
			{type %$da8}
		    }
		    d6d7985: formal_parameter {
			{type %$d6d7cf1}
			{artificial 1 flag}
		    }
		    d6d7988: formal_parameter {
			{type %$d496d7}
		    }
		}
		d6d7ac4: subprogram {
		    {external 1 flag}
		    {name "capacity"}
		    {linkage_name "_ZNK3WTF16VectorBufferBaseIlNS_10FastMallocEE8capacityEv"}
		    {type %$d1e0}
		    {accessibility @DW_ACCESS_public}
		    {declaration 1 flag}
		    {object_pointer %$d6d7ada}
		} {
		    d6d7ada: formal_parameter {
			{type %$d6d7cdf}
			{artificial 1 flag}
		    }
		}
	    }
	    d6d79af: const_type {
		{type %$d6d712b}
	    }
	    d6d79b1: class_type {
		{name "VectorBufferBase<long int, WTF::FastMalloc>"}
		{byte_size 24 sdata}
	    } {
		d6d7b62: member {
		    {name "m_buffer"}
		    {type %$d137787}
		    {data_member_location 0 sdata}
		    {accessibility @DW_ACCESS_protected}
		}
		d6d7b71: member {
		    {name "m_capacity"}
		    {type %$d4e}
		    {data_member_location 8 sdata}
		    {accessibility @DW_ACCESS_protected}
		}
		d6d7b80: member {
		    {name "m_size"}
		    {type %$d4e}
		    {data_member_location 12 sdata}
		    {accessibility @DW_ACCESS_protected}
		}
		d6d7b8f: member {
		    {name "m_mask"}
		    {type %$d4e}
		    {data_member_location 16 sdata}
		    {accessibility @DW_ACCESS_protected}
		}
		d6d7b9e: template_type_param {
		    {name "T"}
		    {type %$da8}
		}
		d6d7ba5: template_type_param {
		    {name "Malloc"}
		    {type %$d2541}
		}
	    }
	    d6d7baf: const_type {
		{type %$d6d79b1}
	    }
	    d6d7cdf: pointer_type {
		{byte_size 8 sdata}
		{type %$d6d7baf}
	    }
	    d6d7ce3: const_type {
		{type %$d6d7cdf}
	    }
	    d6d7cf1: pointer_type {
		{byte_size 8 sdata}
		{type %$d6d712b}
	    }
	    d6d7cf4: const_type {
		{type %$d6d7cf1}
	    }
	    d6d7cfe: pointer_type {
		{byte_size 8 sdata}
		{type %$d6d79af}
	    }
	    d6d7d02: const_type {
		{type %$d6d7cfe}
	    }
	    d6d7d70: subprogram {
		{specification %$d6d76c7}
		{object_pointer %$d6d7d78}
		{inline @DW_INL_inlined}
	    } {
		d6d7d78: formal_parameter {
		    {name "this"}
		    {type %$d6d7cf4}
		    {artificial 1 flag}
		}
		d6d7d7f: formal_parameter {
		    {name "newCapacity"}
		    {type %$d1e0}
		}
		d6d7d8c: variable {
		    {name "oldBuffer"}
		    {type %$d137787}
		}
		d6d7d99: variable {
		    {name "oldEnd"}
		    {type %$d137787}
		}
	    }
	    d6d7dde: subprogram {
		{specification %$d6d7ac4}
		{object_pointer %$d6d7de6}
		{inline @DW_INL_inlined}
	    } {
		d6d7de6: formal_parameter {
		    {name "this"}
		    {type %$d6d7ce3}
		    {artificial 1 flag}
		}
	    }
	    d6d7dee: subprogram {
		{specification %$d6d796d}
		{object_pointer %$d6d7dfd}
		{inline @DW_INL_inlined}
	    } {
		d6d7df6: template_type_param {
		    {name "U"}
		    {type %$da8}
		}
		d6d7dfd: formal_parameter {
		    {name "this"}
		    {type %$d6d7cf4}
		}
		d6d7e04: formal_parameter {
		    {name "value"}
		    {type %$d496d7}
		}
		d6d7e11: variable {
		    {name "ptr"}
		    {type %$d137787}
		}
	    }
	    d6d7e3c: subprogram {
		{specification %$d6d7373}
		{object_pointer %$d6d7e44}
		{inline @DW_INL_declared_inlined}
	    } {
		d6d7e44: formal_parameter {
		    {name "this"}
		    {type %$d6d7d02}
		    {artificial 1 flag}
		}
	    }
	    d6d7e4c: subprogram {
		{specification %$d6d749b}
		{object_pointer %$d6d7e54}
		{inline @DW_INL_declared_inlined}
	    } {
		d6d7e54: formal_parameter {
		    {name "this"}
		    {type %$d6d7cf4}
		    {artificial 1 flag}
		}
	    }
	    d6d7eca: subprogram {
		{specification %$d6d7507}
		{object_pointer %$d6d7ed2}
		{inline @DW_INL_declared_inlined}
	    } {
		d6d7ed2: formal_parameter {
		    {name "this"}
		    {type %$d6d7cf4}
		    {artificial 1 flag}
		}
	    }
	    d6d7eda: subprogram {
		{specification %$d6d74ef}
		{object_pointer %$d6d7ee2}
		{inline @DW_INL_declared_inlined}
	    } {
		d6d7ee2: formal_parameter {
		    {name "this"}
		    {type %$d6d7cf4}
		    {artificial 1 flag}
		}
	    }
	}
    }

    cu {} {
	d6220f94: compile_unit {
	    {producer "GNU C++14 8.0.1 20180324"}
	    {language @DW_LANG_C_plus_plus}
	    {name ".../IconDatabase.cpp"}
	    {comp_dir "Source/WebKit"}
	} {
	    d624db4a: subprogram {
		{specification %$d6d788c}
		{object_pointer %$d624db62}
		{low_pc 0x937870 addr}
		{high_pc 219 sdata}
		{GNU_all_call_sites 1 flag}
	    } {
		d624db62: formal_parameter {
		    {name "this"}
		    {type %$d6d7cf4}
		    {artificial 1 flag}
		}
		d624db70: formal_parameter {
		    {name "newMinCapacity"}
		    {type %$d1e0}
		}
		d624db82: inlined_subroutine {
		    {abstract_origin %$d6d7e3c}
		    {low_pc 0x937874 addr}
		    {high_pc 8 sdata}
		} {
		    d624db98: formal_parameter {
			{abstract_origin %$d6d7e44}
		    }
		    d624dba1: inlined_subroutine {
			{abstract_origin %$d6d7dde}
			{low_pc 0x937874 addr}
			{high_pc 8 sdata}
		    } {
			d624dbb3: formal_parameter {
			    {abstract_origin %$d6d7de6}
			}
		    }
		}
		d624dbbe: inlined_subroutine {
		    {abstract_origin %$d6d7d70}
		    {low_pc 0x937897 addr}
		    {high_pc 4 sdata}
		} {
		    d624dbd3: formal_parameter {
			{abstract_origin %$d6d7d7f}
		    }
		    d624dbd8: formal_parameter {
			{abstract_origin %$d6d7d78}
		    }
		    d624dbe1: lexical_block {
			{low_pc 0x937897 addr}
			{high_pc 4 sdata}
		    } {
			d624dbe6: variable {
			    {abstract_origin %$d6d7d8c}
			}
			d624dbec: variable {
			    {abstract_origin %$d6d7d99}
			}
			d624dbf2: lexical_block {
			    {abstract_origin %$d6d7d70}
			    {low_pc 0x937897 addr}
			    {high_pc 4 sdata}
			} {
			    d624dbfc: formal_parameter {
				{abstract_origin %$d6d7d7f}
			    }
			    d624dc01: formal_parameter {
				{abstract_origin %$d6d7d78}
			    }
			    d624dc0a: lexical_block {
				{low_pc 0x937897 addr}
				{high_pc 4 addr}
			    } {
				d624dc0f: variable {
				    {abstract_origin %$d6d7d8c}
				}
				d624dc18: variable {
				    {abstract_origin %$d6d7d99}
				}
				d624dc21: inlined_subroutine {
				    {abstract_origin %$d6d7eca}
				    {low_pc 0x9378a7 addr}
				    {high_pc 4 sdata}
				} {
				    d624dc39: formal_parameter {
					{abstract_origin %$d6d7ed2}
				    }
				}
				d624dc43: inlined_subroutine {
				    {abstract_origin %$d6d7eda}
				    {low_pc 0x9378aa addr}
				    {high_pc 3 sdata}
				} {
				    d624dc59: formal_parameter {
					{abstract_origin %$d6d7ee2}
				    }
				    d624dc62: inlined_subroutine {
					{abstract_origin %$d6d7e4c}
					{low_pc 0x9378aa addr}
					{high_pc 3 sdata}
				    } {
					d624dc74: formal_parameter {
					    {abstract_origin %$d6d7e54}
					}
				    }
				}
			    }
			}
		    }
		}
	    }
	}
    }
}

if {[gdb_compile $asm_file ${binfile}.o object {nodebug}] != ""} {
    return -1
}

# This is not complete.
#set saved_flags $GDBFLAGS
#append GDBFLAGS " -readnow"
clean_restart ${testfile}.o
#set GDBFLAGS $saved_flags
  
Joel Brobecker June 11, 2018, 10:26 p.m. UTC | #2
Hey Keith,

First of all, thanks a lot again for all your work on this PR.
Anyone, please feel free to comment as well!

> gdb/ChangeLog:
> 
> 	PR gdb/23010
> 	* dwarf2read.c (dw2_add_symbol_to_list): New function.
> 	(fixup_go_packaging, new_symbol): Use dw2_add_symbol_to_list
> 	instead of add_symbol_to_list.
> 	(read_file_scope): Call prepare_one_comp_unit before reading
> 	any other DIEs.
> ---
>  gdb/dwarf2read.c | 38 ++++++++++++++++++++++++++++++++------
>  1 file changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
> index 1cabfbb0d4..04f22114f8 100644
> --- a/gdb/dwarf2read.c
> +++ b/gdb/dwarf2read.c
> @@ -9701,6 +9701,33 @@ compute_delayed_physnames (struct dwarf2_cu *cu)
>    cu->method_list.clear ();
>  }
>  
> +/* A wrapper for add_symbol_to_list to issue a complaint if a symbol
> +   with a different language is added to LISTHEAD.  */
> +
> +static inline void
> +dw2_add_symbol_to_list (struct symbol *symbol, struct pending **listhead)
> +{
> +  /* Only complain if LISTHEAD already contains symbols of a different
> +     language.  */

I confess I got confused a bit by this comment about thinking this was
a comment about the function's behavior, as in "only complain once",
and therefore belonging in the function's introductory comment.  But
thinking more clearly about this, you have to wait for at least
one symbol to be in the list; otherwise, talking to myself what else
would you compare it too, right?

But I am wondering if we could do it a bit better. In particular,
at the moment, it seems like all you have is the existing list of
symbols; but more interesting would be the CU's language, right?
What about passing the dwarf_cu? From what I can tell, we seem to
have it each time we call this function. Or is that exactly the
issue we're dealing with?

> +#if USE_ASSERT
> +  gdb_assert ((*listhead) == NULL
> +	      || (*listhead)->nsyms == 0
> +	      || (SYMBOL_LANGUAGE ((*listhead)->symbol[0])
> +		  == SYMBOL_LANGUAGE (symbol)));

So, if I understand your preliminary explanation, this part will be
removed, right?

> +#else
> +  if ((*listhead) != NULL && (*listhead)->nsyms > 0
> +      && SYMBOL_LANGUAGE ((*listhead)->symbol[0]) != SYMBOL_LANGUAGE (symbol))
> +    {
> +      complaint (_("recording symbol \"%s\" with language %s "
> +		   "into list of langauge %s"), SYMBOL_LINKAGE_NAME (symbol),
                                 ^^^^^^^^
Small typo in "langauge"

Other than that, no other comment for branch "master".

Do we want something for the gdb-8.1.1 release? I would have thought
so. But I might suggest instead the shorter version, without the
complaint (just because it gives us a bit more time on master to
double-check that the overhead is indeed minimal -- not as obvious
as I might have thought when I first thought about it).

Thanks!
  
Keith Seitz June 26, 2018, 9:40 p.m. UTC | #3
On 06/11/2018 03:26 PM, Joel Brobecker wrote:
>> +/* A wrapper for add_symbol_to_list to issue a complaint if a symbol
>> +   with a different language is added to LISTHEAD.  */
>> +
>> +static inline void
>> +dw2_add_symbol_to_list (struct symbol *symbol, struct pending **listhead)
>> +{
>> +  /* Only complain if LISTHEAD already contains symbols of a different
>> +     language.  */
> 
> I confess I got confused a bit by this comment about thinking this was
> a comment about the function's behavior, as in "only complain once",
> and therefore belonging in the function's introductory comment.  But
> thinking more clearly about this, you have to wait for at least
> one symbol to be in the list; otherwise, talking to myself what else
> would you compare it too, right?

Yes, that's just about right. As you note below, we could use the new symbol's CU.

> But I am wondering if we could do it a bit better. In particular,
> at the moment, it seems like all you have is the existing list of
> symbols; but more interesting would be the CU's language, right?
> What about passing the dwarf_cu? From what I can tell, we seem to
> have it each time we call this function. Or is that exactly the
> issue we're dealing with?

I'm not sure I completely follow, but let me see if I can reason this out a little.

When add_symbol_to_list is called, we have (among other things), the symbol, the CU from which the symbol came, and the symbol list we are going to add the symbol to.

From this data, we need to check that either the CU's language or the symbol's language is the same as the language of the symbol list. *ALL* those symbols in the list must have the same language (or we hit the now infamous assertion).

So that suggests two options for ensuring that all symbols added to any list are the same, provided that there are already symbols in the list (if there are no symbols, then this new symbol will define the list's language):

1) We check SYMBOL_LANGUAGE of the new symbol to the SYMBOL_LANGUAGE of first symbol in the list.
   This is the approach used in the current version.
2) We can check the CU's langauge against the SYMBOL_LANGUAGE of the first symbol in the list.

I don't see a clear winner here.

>> +#if USE_ASSERT
>> +  gdb_assert ((*listhead) == NULL
>> +	      || (*listhead)->nsyms == 0
>> +	      || (SYMBOL_LANGUAGE ((*listhead)->symbol[0])
>> +		  == SYMBOL_LANGUAGE (symbol)));
> 
> So, if I understand your preliminary explanation, this part will be
> removed, right?

Yes -- one of these branches would be removed. I prefer the assertion, but I could make a case for simply issuing the complaint. So I've left it up to maintainers. :-) [Biggest plus for assertion: issuing only the complaint here will cause the DICT_LANGUAGE != SYMBOL_LANGUAGE assertion we've been seeing. I'd rather see this happen sooner than later.]

>> +#else
>> +  if ((*listhead) != NULL && (*listhead)->nsyms > 0
>> +      && SYMBOL_LANGUAGE ((*listhead)->symbol[0]) != SYMBOL_LANGUAGE (symbol))
>> +    {
>> +      complaint (_("recording symbol \"%s\" with language %s "
>> +		   "into list of langauge %s"), SYMBOL_LINKAGE_NAME (symbol),
>                                  ^^^^^^^^
> Small typo in "langauge"

Fixed.

> Do we want something for the gdb-8.1.1 release? I would have thought
> so. But I might suggest instead the shorter version, without the
> complaint (just because it gives us a bit more time on master to
> double-check that the overhead is indeed minimal -- not as obvious
> as I might have thought when I first thought about it).

Yes, I can commit the simple one-line change for 8.1.1. I think it important that this fix get pushed everywhere.

Keith
  
Joel Brobecker June 27, 2018, 4:24 p.m. UTC | #4
> > But I am wondering if we could do it a bit better. In particular,
> > at the moment, it seems like all you have is the existing list of
> > symbols; but more interesting would be the CU's language, right?
> > What about passing the dwarf_cu? From what I can tell, we seem to
> > have it each time we call this function. Or is that exactly the
> > issue we're dealing with?
> 
> I'm not sure I completely follow, but let me see if I can reason this out a little.
> 
> When add_symbol_to_list is called, we have (among other things), the symbol, the CU from which the symbol came, and the symbol list we are going to add the symbol to.
> 
> >From this data, we need to check that either the CU's language or the symbol's language is the same as the language of the symbol list. *ALL* those symbols in the list must have the same language (or we hit the now infamous assertion).
> 
> So that suggests two options for ensuring that all symbols added to any list are the same, provided that there are already symbols in the list (if there are no symbols, then this new symbol will define the list's language):
> 
> 1) We check SYMBOL_LANGUAGE of the new symbol to the SYMBOL_LANGUAGE of first symbol in the list.
>    This is the approach used in the current version.
> 2) We can check the CU's langauge against the SYMBOL_LANGUAGE of the first symbol in the list.
> 
> I don't see a clear winner here.

We are on the same page as to what I was suggesting. Option (2) felt
a bit more natural to me, but I am happy with (1) also. Since that's
what you've been testing so far, let's go with that.

> 
> >> +#if USE_ASSERT
> >> +  gdb_assert ((*listhead) == NULL
> >> +	      || (*listhead)->nsyms == 0
> >> +	      || (SYMBOL_LANGUAGE ((*listhead)->symbol[0])
> >> +		  == SYMBOL_LANGUAGE (symbol)));
> > 
> > So, if I understand your preliminary explanation, this part will be
> > removed, right?
> 
> Yes -- one of these branches would be removed. I prefer the assertion, but I could make a case for simply issuing the complaint. So I've left it up to maintainers. :-) [Biggest plus for assertion: issuing only the complaint here will cause the DICT_LANGUAGE != SYMBOL_LANGUAGE assertion we've been seeing. I'd rather see this happen sooner than later.]

I like assertions too, but have started to shy away from them for
situation that we can recover from. But indeed, your point about
the advantage of the assert is quite true, especially since the
alternative is actually not trying to handle the situation, so not
very useful in the grand scheme of things. To be more precise, choosing
to generate complaint means that the user would see the complaint
followed by the failed assertion, as opposed to what we have now,
which is the failed assertion. Might as well generate a failed assertion
here, which will be clearer for us to understand what is going on.

Perhaps we can slightly extend the comment you alread added to confirm
we expect all symbols of a given unit to be of the same language:

 /* Only complain if LISTHEAD already contains symbols of a different
    language (all symbols within this list should have the same language).  */

Something like that? (just a thought, feel free to ignore if you think
it is redundant.

> > Do we want something for the gdb-8.1.1 release? I would have thought
> > so. But I might suggest instead the shorter version, without the
> > complaint (just because it gives us a bit more time on master to
> > double-check that the overhead is indeed minimal -- not as obvious
> > as I might have thought when I first thought about it).
> 
> Yes, I can commit the simple one-line change for 8.1.1. I think it
> important that this fix get pushed everywhere.

Nice.

So I'll wait for the final version of the patch, just
Thanks Keith!
  
Keith Seitz July 9, 2018, 7:50 p.m. UTC | #5
On 06/27/2018 09:24 AM, Joel Brobecker wrote:
> So I'll wait for the final version of the patch, just

I don't normally ping patches so quickly, but this is important enough to warrant one:

https://sourceware.org/ml/gdb-patches/2018-06/msg00665.html

Highly desired for 8.2, too.

Keith
  

Patch

diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c
index 1cabfbb0d4..04f22114f8 100644
--- a/gdb/dwarf2read.c
+++ b/gdb/dwarf2read.c
@@ -9701,6 +9701,33 @@  compute_delayed_physnames (struct dwarf2_cu *cu)
   cu->method_list.clear ();
 }
 
+/* A wrapper for add_symbol_to_list to issue a complaint if a symbol
+   with a different language is added to LISTHEAD.  */
+
+static inline void
+dw2_add_symbol_to_list (struct symbol *symbol, struct pending **listhead)
+{
+  /* Only complain if LISTHEAD already contains symbols of a different
+     language.  */
+#if USE_ASSERT
+  gdb_assert ((*listhead) == NULL
+	      || (*listhead)->nsyms == 0
+	      || (SYMBOL_LANGUAGE ((*listhead)->symbol[0])
+		  == SYMBOL_LANGUAGE (symbol)));
+#else
+  if ((*listhead) != NULL && (*listhead)->nsyms > 0
+      && SYMBOL_LANGUAGE ((*listhead)->symbol[0]) != SYMBOL_LANGUAGE (symbol))
+    {
+      complaint (_("recording symbol \"%s\" with language %s "
+		   "into list of langauge %s"), SYMBOL_LINKAGE_NAME (symbol),
+		 language_str (SYMBOL_LANGUAGE (symbol)),
+		 language_str (SYMBOL_LANGUAGE ((*listhead)->symbol[0])));
+    }
+#endif
+
+  add_symbol_to_list (symbol, listhead);
+}
+
 /* Go objects should be embedded in a DW_TAG_module DIE,
    and it's not clear if/how imported objects will appear.
    To keep Go support simple until that's worked out,
@@ -9772,7 +9799,7 @@  fixup_go_packaging (struct dwarf2_cu *cu)
       SYMBOL_ACLASS_INDEX (sym) = LOC_TYPEDEF;
       SYMBOL_TYPE (sym) = type;
 
-      add_symbol_to_list (sym, &global_symbols);
+      dw2_add_symbol_to_list (sym, &global_symbols);
 
       xfree (package_name);
     }
@@ -11438,6 +11465,7 @@  read_file_scope (struct die_info *die, struct dwarf2_cu *cu)
   struct die_info *child_die;
   CORE_ADDR baseaddr;
 
+  prepare_one_comp_unit (cu, die, cu->language);
   baseaddr = ANOFFSET (objfile->section_offsets, SECT_OFF_TEXT (objfile));
 
   get_scope_pc_bounds (die, &lowpc, &highpc, cu);
@@ -11450,8 +11478,6 @@  read_file_scope (struct die_info *die, struct dwarf2_cu *cu)
 
   file_and_directory fnd = find_file_and_directory (die, cu);
 
-  prepare_one_comp_unit (cu, die, cu->language);
-
   /* The XLCL doesn't generate DW_LANG_OpenCL because this attribute is not
      standardised yet.  As a workaround for the language detection we fall
      back to the DW_AT_producer string.  */
@@ -21208,7 +21234,7 @@  new_symbol (struct die_info *die, struct type *type, struct dwarf2_cu *cu,
 	  SYMBOL_TYPE (sym) = objfile_type (objfile)->builtin_core_addr;
 	  SYMBOL_DOMAIN (sym) = LABEL_DOMAIN;
 	  SYMBOL_ACLASS_INDEX (sym) = LOC_LABEL;
-	  add_symbol_to_list (sym, cu->list_in_scope);
+	  dw2_add_symbol_to_list (sym, cu->list_in_scope);
 	  break;
 	case DW_TAG_subprogram:
 	  /* SYMBOL_BLOCK_VALUE (sym) will be filled in later by
@@ -21466,7 +21492,7 @@  new_symbol (struct die_info *die, struct type *type, struct dwarf2_cu *cu,
 	case DW_TAG_common_block:
 	  SYMBOL_ACLASS_INDEX (sym) = LOC_COMMON_BLOCK;
 	  SYMBOL_DOMAIN (sym) = COMMON_BLOCK_DOMAIN;
-	  add_symbol_to_list (sym, cu->list_in_scope);
+	  dw2_add_symbol_to_list (sym, cu->list_in_scope);
 	  break;
 	default:
 	  /* Not a tag we recognize.  Hopefully we aren't processing
@@ -21486,7 +21512,7 @@  new_symbol (struct die_info *die, struct type *type, struct dwarf2_cu *cu,
 	}
 
       if (list_to_add != NULL)
-	add_symbol_to_list (sym, list_to_add);
+	dw2_add_symbol_to_list (sym, list_to_add);
 
       /* For the benefit of old versions of GCC, check for anonymous
 	 namespaces based on the demangled name.  */