Handle loading improper core files gracefully in the mips backend.

Message ID 5693CE90.1060709@codesourcery.com
State New, archived
Headers

Commit Message

Luis Machado Jan. 11, 2016, 3:47 p.m. UTC
  On 01/09/2016 01:02 AM, Maciej W. Rozycki wrote:
> On Fri, 8 Jan 2016, Luis Machado wrote:
>
>> Maciej, do you have more input on additional incompatibilities that we need
>> to check for?
>
>   Why do we get this far in the first place where e_machine != EM_MIPS?  I
> think rather than adding additional checks here (unless they're needed for
> a valid MIPS ELF binary; but that would be a separate problem to fix) we
> should reject such a core file outright.

I thought about that, but no targets other than v850 use e_machine 
information during arch initialization, which is strange. It seems we 
just don't enforce compatibility, at least when loading core files.

Does the attached patch match what you have in mind? I also adjusted the 
testcase to expect core file rejection and also got rid of duplicated 
test strings.

Now a mips-targeted GDB says the following when trying to load a x86 
core file:

gdb.arch/i386-biarch-core.core": no core file handler recognizes format
  

Comments

Pedro Alves Jan. 12, 2016, 12:46 p.m. UTC | #1
On 01/11/2016 03:47 PM, Luis Machado wrote:
> diff --git a/gdb/mips-tdep.c b/gdb/mips-tdep.c
> index ca17864..cdfd80e 100644
> --- a/gdb/mips-tdep.c
> +++ b/gdb/mips-tdep.c
> @@ -8208,6 +8208,12 @@ mips_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
>    int dspacc;
>    int dspctl;
>  
> +  /* Sanity check the e_machine field.  */
> +  if (info.abfd
> +      && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
> +      && elf_elfheader (info.abfd)->e_machine != EM_MIPS)
> +    return NULL;

This callback is registered for bfd_arch_mips:

  gdbarch_register (bfd_arch_mips, mips_gdbarch_init, mips_dump_tdep);

Does bfd think this a bfd_arch_mips binary?  How so?

Thanks,
Pedro Alves
  
Luis Machado Jan. 12, 2016, 1:25 p.m. UTC | #2
On 01/12/2016 10:46 AM, Pedro Alves wrote:
> On 01/11/2016 03:47 PM, Luis Machado wrote:
>> diff --git a/gdb/mips-tdep.c b/gdb/mips-tdep.c
>> index ca17864..cdfd80e 100644
>> --- a/gdb/mips-tdep.c
>> +++ b/gdb/mips-tdep.c
>> @@ -8208,6 +8208,12 @@ mips_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
>>     int dspacc;
>>     int dspctl;
>>
>> +  /* Sanity check the e_machine field.  */
>> +  if (info.abfd
>> +      && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
>> +      && elf_elfheader (info.abfd)->e_machine != EM_MIPS)
>> +    return NULL;
>
> This callback is registered for bfd_arch_mips:
>
>    gdbarch_register (bfd_arch_mips, mips_gdbarch_init, mips_dump_tdep);
>
> Does bfd think this a bfd_arch_mips binary?  How so?

In the second time we call gdbarch_info_fill, when opening the core file 
alone, we have this:

p *info
$8 = {bfd_arch_info = 0x0, byte_order = BFD_ENDIAN_UNKNOWN, 
byte_order_for_code = BFD_ENDIAN_UNKNOWN, abfd = 0xe1ce80, tdep_info = 
0x0, osabi = GDB_OSABI_UNINITIALIZED, target_desc = 0x0}

p *info->abfd->arch_info
$10 = {bits_per_word = 32, bits_per_address = 32, bits_per_byte = 8, 
arch = bfd_arch_unknown, mach = 0, arch_name = 0x9b799f "unknown", 
printable_name = 0x9b799f "unknown", section_align_power = 2, 
the_default = 1, compatible = 0x78a592 <bfd_default_compatible>,
   scan = 0x78a60a <bfd_default_scan>, fill = 0x78acc6 
<bfd_arch_default_fill>, next = 0x0}

p *default_bfd_arch
$12 = {bits_per_word = 32, bits_per_address = 32, bits_per_byte = 8, 
arch = bfd_arch_mips, mach = 0, arch_name = 0x9d98e0 "mips", 
printable_name = 0x9d98e0 "mips", section_align_power = 3, the_default = 
1, compatible = 0x832b40 <mips_compatible>,
   scan = 0x78a60a <bfd_default_scan>, fill = 0x78acc6 
<bfd_arch_default_fill>, next = 0x9d9b00 <arch_info_struct>}

The data above leads gdbarch_info_fill to assign default_bfd_arch to 
info->bfd_arch_info here:

   /* From the default.  */
   if (info->bfd_arch_info == NULL)
     info->bfd_arch_info = default_bfd_arch;

So the core file essentially turns into a mips-compatible core file. 
This also happens with a powerpc-targeted gdb and likely any other 
architecture.

For powerpc we get lucky and end up "passing" this test because it has 
no fatal failing conditions. It ends up displaying frame -1 for me, like so:

PC not available^M
#-1 <unavailable> in ?? ()
  
Pedro Alves Jan. 12, 2016, 2:10 p.m. UTC | #3
On 01/12/2016 01:25 PM, Luis Machado wrote:
> On 01/12/2016 10:46 AM, Pedro Alves wrote:
>> On 01/11/2016 03:47 PM, Luis Machado wrote:
>>> diff --git a/gdb/mips-tdep.c b/gdb/mips-tdep.c
>>> index ca17864..cdfd80e 100644
>>> --- a/gdb/mips-tdep.c
>>> +++ b/gdb/mips-tdep.c
>>> @@ -8208,6 +8208,12 @@ mips_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
>>>     int dspacc;
>>>     int dspctl;
>>>
>>> +  /* Sanity check the e_machine field.  */
>>> +  if (info.abfd
>>> +      && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
>>> +      && elf_elfheader (info.abfd)->e_machine != EM_MIPS)
>>> +    return NULL;
>>
>> This callback is registered for bfd_arch_mips:
>>
>>    gdbarch_register (bfd_arch_mips, mips_gdbarch_init, mips_dump_tdep);
>>
>> Does bfd think this a bfd_arch_mips binary?  How so?
> 
> In the second time we call gdbarch_info_fill, when opening the core file 
> alone, we have this:
> 
> p *info
> $8 = {bfd_arch_info = 0x0, byte_order = BFD_ENDIAN_UNKNOWN, 
> byte_order_for_code = BFD_ENDIAN_UNKNOWN, abfd = 0xe1ce80, tdep_info = 
> 0x0, osabi = GDB_OSABI_UNINITIALIZED, target_desc = 0x0}
> 
> p *info->abfd->arch_info
> $10 = {bits_per_word = 32, bits_per_address = 32, bits_per_byte = 8, 
> arch = bfd_arch_unknown, mach = 0, arch_name = 0x9b799f "unknown", 
> printable_name = 0x9b799f "unknown", section_align_power = 2, 
> the_default = 1, compatible = 0x78a592 <bfd_default_compatible>,
>    scan = 0x78a60a <bfd_default_scan>, fill = 0x78acc6 
> <bfd_arch_default_fill>, next = 0x0}
> 
> p *default_bfd_arch
> $12 = {bits_per_word = 32, bits_per_address = 32, bits_per_byte = 8, 
> arch = bfd_arch_mips, mach = 0, arch_name = 0x9d98e0 "mips", 
> printable_name = 0x9d98e0 "mips", section_align_power = 3, the_default = 
> 1, compatible = 0x832b40 <mips_compatible>,
>    scan = 0x78a60a <bfd_default_scan>, fill = 0x78acc6 
> <bfd_arch_default_fill>, next = 0x9d9b00 <arch_info_struct>}
> 
> The data above leads gdbarch_info_fill to assign default_bfd_arch to 
> info->bfd_arch_info here:
> 
>    /* From the default.  */
>    if (info->bfd_arch_info == NULL)
>      info->bfd_arch_info = default_bfd_arch;
> 
> So the core file essentially turns into a mips-compatible core file. 

Hmmm.  I see.  I think we can't really change this, given that there
are formats that don't have an architecture.  Like, e.g., srec:

 (gdb) file testsuite/gdb.base/intstr2.srec
 Reading symbols from testsuite/gdb.base/intstr2.srec...(no debugging symbols found)...done.

I take it that a --enable-targets=all wouldn't fail like this?

Also, sounds like you should be able to trigger these incompatibilities
and assertion by loading a 32-bit MIPS binary and playing with
"set mips abi n64/o64", etc?

All in all, maybe your original patch that flagged incompatible
abi/isa combination is the way to go?

I also wonder whether the bfd arch detection couldn't be always
compiled in, at least for elf.  Why does bfd fail to detect that this
is an bfd_arch_i386 file in the first place?

> This also happens with a powerpc-targeted gdb and likely any other 
> architecture.
> 
> For powerpc we get lucky and end up "passing" this test because it has 
> no fatal failing conditions. It ends up displaying frame -1 for me, like so:
> 
> PC not available^M
> #-1 <unavailable> in ?? ()

Which is obviously bogus.

Thanks,
Pedro Alves
  
Luis Machado Jan. 12, 2016, 3:43 p.m. UTC | #4
On 01/12/2016 12:10 PM, Pedro Alves wrote:
> On 01/12/2016 01:25 PM, Luis Machado wrote:
>> On 01/12/2016 10:46 AM, Pedro Alves wrote:
>>> On 01/11/2016 03:47 PM, Luis Machado wrote:
>>>> diff --git a/gdb/mips-tdep.c b/gdb/mips-tdep.c
>>>> index ca17864..cdfd80e 100644
>>>> --- a/gdb/mips-tdep.c
>>>> +++ b/gdb/mips-tdep.c
>>>> @@ -8208,6 +8208,12 @@ mips_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
>>>>      int dspacc;
>>>>      int dspctl;
>>>>
>>>> +  /* Sanity check the e_machine field.  */
>>>> +  if (info.abfd
>>>> +      && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
>>>> +      && elf_elfheader (info.abfd)->e_machine != EM_MIPS)
>>>> +    return NULL;
>>>
>>> This callback is registered for bfd_arch_mips:
>>>
>>>     gdbarch_register (bfd_arch_mips, mips_gdbarch_init, mips_dump_tdep);
>>>
>>> Does bfd think this a bfd_arch_mips binary?  How so?
>>
>> In the second time we call gdbarch_info_fill, when opening the core file
>> alone, we have this:
>>
>> p *info
>> $8 = {bfd_arch_info = 0x0, byte_order = BFD_ENDIAN_UNKNOWN,
>> byte_order_for_code = BFD_ENDIAN_UNKNOWN, abfd = 0xe1ce80, tdep_info =
>> 0x0, osabi = GDB_OSABI_UNINITIALIZED, target_desc = 0x0}
>>
>> p *info->abfd->arch_info
>> $10 = {bits_per_word = 32, bits_per_address = 32, bits_per_byte = 8,
>> arch = bfd_arch_unknown, mach = 0, arch_name = 0x9b799f "unknown",
>> printable_name = 0x9b799f "unknown", section_align_power = 2,
>> the_default = 1, compatible = 0x78a592 <bfd_default_compatible>,
>>     scan = 0x78a60a <bfd_default_scan>, fill = 0x78acc6
>> <bfd_arch_default_fill>, next = 0x0}
>>
>> p *default_bfd_arch
>> $12 = {bits_per_word = 32, bits_per_address = 32, bits_per_byte = 8,
>> arch = bfd_arch_mips, mach = 0, arch_name = 0x9d98e0 "mips",
>> printable_name = 0x9d98e0 "mips", section_align_power = 3, the_default =
>> 1, compatible = 0x832b40 <mips_compatible>,
>>     scan = 0x78a60a <bfd_default_scan>, fill = 0x78acc6
>> <bfd_arch_default_fill>, next = 0x9d9b00 <arch_info_struct>}
>>
>> The data above leads gdbarch_info_fill to assign default_bfd_arch to
>> info->bfd_arch_info here:
>>
>>     /* From the default.  */
>>     if (info->bfd_arch_info == NULL)
>>       info->bfd_arch_info = default_bfd_arch;
>>
>> So the core file essentially turns into a mips-compatible core file.
>
> Hmmm.  I see.  I think we can't really change this, given that there
> are formats that don't have an architecture.  Like, e.g., srec:
>
>   (gdb) file testsuite/gdb.base/intstr2.srec
>   Reading symbols from testsuite/gdb.base/intstr2.srec...(no debugging symbols found)...done.
>
> I take it that a --enable-targets=all wouldn't fail like this?
>

Yes, because, at least in my case, we default to the proper i386 
architecture.

> Also, sounds like you should be able to trigger these incompatibilities
> and assertion by loading a 32-bit MIPS binary and playing with
> "set mips abi n64/o64", etc?
>

Yes, most likely, but see below.

> All in all, maybe your original patch that flagged incompatible
> abi/isa combination is the way to go?
>
> I also wonder whether the bfd arch detection couldn't be always
> compiled in, at least for elf.  Why does bfd fail to detect that this
> is an bfd_arch_i386 file in the first place?

It seems bfd also falls back to the default, which is mips in this case.

p bfd_default_vector[0]
$3 = (const bfd_target *) 0x9beac0 <mips_elf32_trad_be_vec>

I gave it a try with a legitimate x86 core file being loaded in a 
mips-targeted gdb and i see the same problem with the internal error. 
Initially, when loading the core, the bfd arch is unknown, and then we 
pick the default arch in bfd_find_target here:

   /* This is safe; the vector cannot be null.  */
   if (targname == NULL || strcmp (targname, "default") == 0)
     {
       if (bfd_default_vector[0] != NULL)
         target = bfd_default_vector[0];
       else
         target = bfd_target_vector[0];
       if (abfd)
         {
           abfd->xvec = target;
           abfd->target_defaulted = TRUE;
         }
       return target;
     }

Sounds like we have a couple issues. The mips backend not handling weird 
abi/isa combinations and GDB not preventing clearly incompatible core 
files from proceeding further into processing in the target's backend?
  
Pedro Alves Jan. 12, 2016, 4 p.m. UTC | #5
On 01/12/2016 03:43 PM, Luis Machado wrote:

> Sounds like we have a couple issues. The mips backend not handling weird 
> abi/isa combinations and GDB not preventing clearly incompatible core 
> files from proceeding further into processing in the target's backend?

Sounds like it.  Not sure whether the blame for the latter is BFD's or
GDB's though.

Thanks,
Pedro Alves
  
Maciej W. Rozycki Jan. 12, 2016, 6:30 p.m. UTC | #6
On Tue, 12 Jan 2016, Luis Machado wrote:

> > > The data above leads gdbarch_info_fill to assign default_bfd_arch to
> > > info->bfd_arch_info here:
> > > 
> > >     /* From the default.  */
> > >     if (info->bfd_arch_info == NULL)
> > >       info->bfd_arch_info = default_bfd_arch;
> > > 
> > > So the core file essentially turns into a mips-compatible core file.
> > 
> > Hmmm.  I see.  I think we can't really change this, given that there
> > are formats that don't have an architecture.  Like, e.g., srec:
> > 
> >   (gdb) file testsuite/gdb.base/intstr2.srec
> >   Reading symbols from testsuite/gdb.base/intstr2.srec...(no debugging
> > symbols found)...done.

 Or we could be talking to a live target with no executable selected at 
all.  This is also why there are settings like `set mips abi ...' 
available -- to let the user select the executable model for a target 
there's no other source of information about.

> > I also wonder whether the bfd arch detection couldn't be always
> > compiled in, at least for elf.  Why does bfd fail to detect that this
> > is an bfd_arch_i386 file in the first place?

 The mapping between `e_machine' and `bfd_architecture' is only provided 
by individual BFD ELF target backends, via the ELF_MACHINE_CODE and 
ELF_ARCH macros.

> It seems bfd also falls back to the default, which is mips in this case.
> 
> p bfd_default_vector[0]
> $3 = (const bfd_target *) 0x9beac0 <mips_elf32_trad_be_vec>

 Regardless, I'd expect a suitable generic ELF BFD target to be selected, 
which is what AFAICT `bfd_check_format' does.  It is called by our 
`core_open' function and has a `core_file_p' handler, which makes suitable 
checks including `e_machine' in particular, except for generic ELF BFD 
targets, which are special-cased (and always come last).  So in the 
absence of specific ELF target support in BFD I'd expect a compatible 
generic ELF target to be chosen rather than the default BFD target, which 
might be incompatible.

> Sounds like we have a couple issues. The mips backend not handling weird
> abi/isa combinations and GDB not preventing clearly incompatible core files
> from proceeding further into processing in the target's backend?

 I have given it some thought and came to a conclusion that we should at 
least try being consistent.  Which means I think we should not try to 
handle files within the MIPS backend which would not be passed in the 
first place in an `--enable-targets=all' configuration.  Rather than 
checking `e_machine' explicitly I'd be leaning towards using BFD to detect 
such a situation though, perhaps by using a condition like

  if (info.abfd != NULL
      && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
      && bfd_get_arch (info.abfd) != bfd_arch_mips)
    return NULL;

(maybe with an additional error message) though ultimately I think it 
would make sense to define different BFD architecture codes for file 
formats which by definition carry no architecture information and for ones 
that do but are not supported.  Then for the formers we could continue 
selecting the target using the current algorithm and for the latters we'd 
just reject them as incompatible with the given backend -- all somewhere 
in generic code so that individual target backends do not have to repeat 
it all.

 As to ABI, ISA, etc. settings -- these are internal to the MIPS backend, 
so its the backend's job to sanitise them.

  Maciej
  
Pedro Alves Jan. 12, 2016, 7:08 p.m. UTC | #7
On 01/12/2016 06:30 PM, Maciej W. Rozycki wrote:
> On Tue, 12 Jan 2016, Luis Machado wrote:
>> Pedro Alves wrote:
> 
>>> I also wonder whether the bfd arch detection couldn't be always
>>> compiled in, at least for elf.  Why does bfd fail to detect that this
>>> is an bfd_arch_i386 file in the first place?
> 
>  The mapping between `e_machine' and `bfd_architecture' is only provided 
> by individual BFD ELF target backends, via the ELF_MACHINE_CODE and 
> ELF_ARCH macros.

Thanks.  In principle, it sounds to me that at least the
ELF_MACHINE_CODE -> bfd_architecture sniffing bits could be
factored out and always be present.  But, that might not be practical.

>> Sounds like we have a couple issues. The mips backend not handling weird
>> abi/isa combinations and GDB not preventing clearly incompatible core files
>> from proceeding further into processing in the target's backend?
> 
>  I have given it some thought and came to a conclusion that we should at 
> least try being consistent.  Which means I think we should not try to 
> handle files within the MIPS backend which would not be passed in the 
> first place in an `--enable-targets=all' configuration.  Rather than 
> checking `e_machine' explicitly I'd be leaning towards using BFD to detect 
> such a situation though, perhaps by using a condition like
> 
>   if (info.abfd != NULL
>       && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
>       && bfd_get_arch (info.abfd) != bfd_arch_mips)
>     return NULL;
> 
> (maybe with an additional error message) though ultimately I think it 
> would make sense to define different BFD architecture codes for file 
> formats which by definition carry no architecture information and for ones 
> that do but are not supported.

Agreed.  Seems like that could be the job of bfd_arch_obscure -- it's used
as default/unhandled case in some formats that do have architecture
information.  Though it isn't used throughout all bfd backends.

> Then for the formers we could continue 
> selecting the target using the current algorithm and for the latters we'd 
> just reject them as incompatible with the given backend -- all somewhere 
> in generic code so that individual target backends do not have to repeat 
> it all.
> 
>  As to ABI, ISA, etc. settings -- these are internal to the MIPS backend, 
> so its the backend's job to sanitise them.

/me nods.

Thanks,
Pedro Alves
  
Luis Machado Feb. 2, 2016, 12:58 p.m. UTC | #8
On 01/12/2016 04:30 PM, Maciej W. Rozycki wrote:
> On Tue, 12 Jan 2016, Luis Machado wrote:
>>> I also wonder whether the bfd arch detection couldn't be always
>>> compiled in, at least for elf.  Why does bfd fail to detect that this
>>> is an bfd_arch_i386 file in the first place?
>
>   The mapping between `e_machine' and `bfd_architecture' is only provided
> by individual BFD ELF target backends, via the ELF_MACHINE_CODE and
> ELF_ARCH macros.
>
>> It seems bfd also falls back to the default, which is mips in this case.
>>
>> p bfd_default_vector[0]
>> $3 = (const bfd_target *) 0x9beac0 <mips_elf32_trad_be_vec>
>
>   Regardless, I'd expect a suitable generic ELF BFD target to be selected,
> which is what AFAICT `bfd_check_format' does.  It is called by our
> `core_open' function and has a `core_file_p' handler, which makes suitable
> checks including `e_machine' in particular, except for generic ELF BFD
> targets, which are special-cased (and always come last).  So in the
> absence of specific ELF target support in BFD I'd expect a compatible
> generic ELF target to be chosen rather than the default BFD target, which
> might be incompatible.
>

Ah, indeed this is the case. We switch to a generic ELF target during 
bfd_check_format. So that is working as it should.

>> Sounds like we have a couple issues. The mips backend not handling weird
>> abi/isa combinations and GDB not preventing clearly incompatible core files
>> from proceeding further into processing in the target's backend?
>
>   I have given it some thought and came to a conclusion that we should at
> least try being consistent.  Which means I think we should not try to
> handle files within the MIPS backend which would not be passed in the
> first place in an `--enable-targets=all' configuration.  Rather than
> checking `e_machine' explicitly I'd be leaning towards using BFD to detect
> such a situation though, perhaps by using a condition like
>
>    if (info.abfd != NULL
>        && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
>        && bfd_get_arch (info.abfd) != bfd_arch_mips)
>      return NULL;
>
> (maybe with an additional error message) though ultimately I think it
> would make sense to define different BFD architecture codes for file
> formats which by definition carry no architecture information and for ones
> that do but are not supported.  Then for the formers we could continue
> selecting the target using the current algorithm and for the latters we'd
> just reject them as incompatible with the given backend -- all somewhere
> in generic code so that individual target backends do not have to repeat
> it all.

Though the above doesn't solve the bigger picture, it gets rid of the 
internal error when loading the incompatible core file.

Should we go ahead and have this additional check committed?

Luis
  
Pedro Alves Feb. 2, 2016, 2:19 p.m. UTC | #9
On 02/02/2016 12:58 PM, Luis Machado wrote:
> On 01/12/2016 04:30 PM, Maciej W. Rozycki wrote:
>> On Tue, 12 Jan 2016, Luis Machado wrote:
>>>> I also wonder whether the bfd arch detection couldn't be always
>>>> compiled in, at least for elf.  Why does bfd fail to detect that this
>>>> is an bfd_arch_i386 file in the first place?
>>
>>   The mapping between `e_machine' and `bfd_architecture' is only provided
>> by individual BFD ELF target backends, via the ELF_MACHINE_CODE and
>> ELF_ARCH macros.
>>
>>> It seems bfd also falls back to the default, which is mips in this case.
>>>
>>> p bfd_default_vector[0]
>>> $3 = (const bfd_target *) 0x9beac0 <mips_elf32_trad_be_vec>
>>
>>   Regardless, I'd expect a suitable generic ELF BFD target to be selected,
>> which is what AFAICT `bfd_check_format' does.  It is called by our
>> `core_open' function and has a `core_file_p' handler, which makes suitable
>> checks including `e_machine' in particular, except for generic ELF BFD
>> targets, which are special-cased (and always come last).  So in the
>> absence of specific ELF target support in BFD I'd expect a compatible
>> generic ELF target to be chosen rather than the default BFD target, which
>> might be incompatible.
>>
> 
> Ah, indeed this is the case. We switch to a generic ELF target during 
> bfd_check_format. So that is working as it should.
> 
>>> Sounds like we have a couple issues. The mips backend not handling weird
>>> abi/isa combinations and GDB not preventing clearly incompatible core files
>>> from proceeding further into processing in the target's backend?
>>
>>   I have given it some thought and came to a conclusion that we should at
>> least try being consistent.  Which means I think we should not try to
>> handle files within the MIPS backend which would not be passed in the
>> first place in an `--enable-targets=all' configuration.  Rather than
>> checking `e_machine' explicitly I'd be leaning towards using BFD to detect
>> such a situation though, perhaps by using a condition like
>>
>>    if (info.abfd != NULL
>>        && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
>>        && bfd_get_arch (info.abfd) != bfd_arch_mips)
>>      return NULL;
>>
>> (maybe with an additional error message) though ultimately I think it
>> would make sense to define different BFD architecture codes for file
>> formats which by definition carry no architecture information and for ones
>> that do but are not supported.  Then for the formers we could continue
>> selecting the target using the current algorithm and for the latters we'd
>> just reject them as incompatible with the given backend -- all somewhere
>> in generic code so that individual target backends do not have to repeat
>> it all.
> 
> Though the above doesn't solve the bigger picture, it gets rid of the 
> internal error when loading the incompatible core file.
> 
> Should we go ahead and have this additional check committed?

Did you try to trigger the assertion by loading a 32-bit MIPS binary
into gdb, and playing with "set mips abi n64/o64...", "set mipsfpu",
etc?

I think that adding a test to the testsuite that iterates through all
the possible combinations just to make sure gdb doesn't crash
would be great, and also show that the patch stands on its own
as well, irrespective of the bfd arch compatibility issues.

Thanks,
Pedro Alves
  
Pedro Alves Feb. 2, 2016, 2:22 p.m. UTC | #10
On 02/02/2016 02:19 PM, Pedro Alves wrote:
> On 02/02/2016 12:58 PM, Luis Machado wrote:

> Did you try to trigger the assertion by loading a 32-bit MIPS binary
> into gdb, and playing with "set mips abi n64/o64...", "set mipsfpu",
> etc?
> 
> I think that adding a test to the testsuite that iterates through all
> the possible combinations just to make sure gdb doesn't crash
> would be great, and also show that the patch stands on its own
> as well, irrespective of the bfd arch compatibility issues.

TBC, I meant, the original patch that checked unsuitable ABI/ISA
combinations.

Thanks,
Pedro Alves
  
Maciej W. Rozycki Feb. 4, 2016, 9:01 p.m. UTC | #11
On Tue, 2 Feb 2016, Pedro Alves wrote:

> > Did you try to trigger the assertion by loading a 32-bit MIPS binary
> > into gdb, and playing with "set mips abi n64/o64...", "set mipsfpu",
> > etc?
> > 
> > I think that adding a test to the testsuite that iterates through all
> > the possible combinations just to make sure gdb doesn't crash
> > would be great, and also show that the patch stands on its own
> > as well, irrespective of the bfd arch compatibility issues.
> 
> TBC, I meant, the original patch that checked unsuitable ABI/ISA
> combinations.

 NB I can only see the use for these knobs to deal with two situations:

1. There's no executable and we want to connect to a live target for 
   minimal binary-only/disasembly-level debugging.  We need to set the 
   endianness, ABI, ISA, etc. to match the target then (although arguably 
   at least the endianness should be supplied by the debug stub somehow; 
   we just don't have a way defined right now).

2. We have a buggy or outdated GDB executable which fails to determine the 
   right settings automatically.  As it looks for example as recently as 
   last week I came across a problem where GDB failed to detect the 
   presence of the FPU under Linux for some reason.  I had to forcefully
   request `set mipsfpu double' to override the wrongly chosen setting of 
   `none', at which point accesses to the FPU worked as expected, so it 
   wasn't like the unit was inaccessible.

Obviously I need to debug #2, but overall I agree we ought to bail out 
gracefully rather than crash if the manual settings lead to a nonsensical 
configuration.

 Therefore I went back to the original patch now.  It obscures things a 
bit I'm afraid from my point of view as it combines a syntactical change 
(the addition of the `arch_is_incompatible' auxiliary variable) and a 
semantical change (the addition of the `mips_isa == ISA_MIPS16' check), so 
I'd like to see the two changes separated.

 TBH I'm not convinced whether the auxiliary variable buys us anything 
here -- it doesn't serve as documentation as we have an explanatory 
comment here already, which BTW needs to be updated accordingly if the 
condition is extended to cover an ISA incompatibility.

 As to which, and more importantly -- there is no actual architectural 
incompatibility between the n64 (or n32) ABI and the MIPS16 instruction 
set; there are 64-bit MIPS processors in existence which implement the 
MIPS16 ISA as well, e.g. the NEC VR4111, and the ISA itself includes 
64-bit instructions on such a processor.  So the MIPS16 ISA is really 
agnostic to the ABI, just as is the regular MIPS ISA or the microMIPS ISA.  
Therefore any such fix needs to go elsewhere I'm afraid -- we probably do 
something outright silly for the ISA_MIPS16 setting.

  Maciej
  
Luis Machado Feb. 5, 2016, 11:29 a.m. UTC | #12
On 02/04/2016 07:01 PM, Maciej W. Rozycki wrote:
> On Tue, 2 Feb 2016, Pedro Alves wrote:
>
>>> Did you try to trigger the assertion by loading a 32-bit MIPS binary
>>> into gdb, and playing with "set mips abi n64/o64...", "set mipsfpu",
>>> etc?
>>>
>>> I think that adding a test to the testsuite that iterates through all
>>> the possible combinations just to make sure gdb doesn't crash
>>> would be great, and also show that the patch stands on its own
>>> as well, irrespective of the bfd arch compatibility issues.

I was playing around with these settings.

>>
>> TBC, I meant, the original patch that checked unsuitable ABI/ISA
>> combinations.
>
>   NB I can only see the use for these knobs to deal with two situations:
>
> 1. There's no executable and we want to connect to a live target for
>     minimal binary-only/disasembly-level debugging.  We need to set the
>     endianness, ABI, ISA, etc. to match the target then (although arguably
>     at least the endianness should be supplied by the debug stub somehow;
>     we just don't have a way defined right now).
>

While trying reproducers out, i noticed this use case doesn't seem to 
work properly under some conditions anymore. Whenever GDB doesn't find a 
binary and sysroot is set to empty, it will not attempt to continue with 
the remote session. It seems to just give up.

Sending packet: $qXfer:exec-file:read:6394:0,fff#60...Packet received: 
lgdb.base/break
<- remote->to_xfer_partial (0xcb3a80, 27, 6394, 0xe38cc0, 0x0, 0x0, 
0xfff, 0x98) = 1
remote:target_xfer_partial (27, 6394, 0xe38cc0, 0x0, 0x0, 4095) = 1, 
152, bytes =
  2f 6e 65 74 2f 62 75 69 6c 64 32 2d 6c 75 63 69 ...
-> remote->to_check_pending_interrupt (...)
<- remote->to_check_pending_interrupt (0xcb3a80)
-> remote->to_xfer_partial (...)
<- remote->to_xfer_partial (0xcb3a80, 27, 6394, 0xe38d58, 0x0, 0x98, 
0xf67, 0x0) = 0
remote:target_xfer_partial (27, 6394, 0xe38d58, 0x0, 0x98, 3943) = 0, 0
<- remote->to_pid_to_exec_file (0xcb3a80, 25492) = gdb.base/break
target_close ()
gdb.base/break: No such file or directory.
(gdb) i r
The program has no registers now.
(gdb) kill
The program is not being run.

Otherwise gdbserver will transfer the file over from the remote end. But 
i digress.

I can easily reproduce the internal error by simply loading a 32-bit 
MIPS binary and flipping the abi to any of the 64-bit variants.

This doesn't seem to be terribly important as people interested in 
playing with these setting will most likely know what they're doing.

The testcase causing an internal error seems to be even less important 
and very unlikely to occur, but it always runs as part of the testsuite 
and it is a bit of an annoyance.

GDB should not give an internal error or crash, obviously.

> 2. We have a buggy or outdated GDB executable which fails to determine the
>     right settings automatically.  As it looks for example as recently as
>     last week I came across a problem where GDB failed to detect the
>     presence of the FPU under Linux for some reason.  I had to forcefully
>     request `set mipsfpu double' to override the wrongly chosen setting of
>     `none', at which point accesses to the FPU worked as expected, so it
>     wasn't like the unit was inaccessible.
>
> Obviously I need to debug #2, but overall I agree we ought to bail out
> gracefully rather than crash if the manual settings lead to a nonsensical
> configuration.
>
>   Therefore I went back to the original patch now.  It obscures things a
> bit I'm afraid from my point of view as it combines a syntactical change
> (the addition of the `arch_is_incompatible' auxiliary variable) and a
> semantical change (the addition of the `mips_isa == ISA_MIPS16' check), so
> I'd like to see the two changes separated.
>
>   TBH I'm not convinced whether the auxiliary variable buys us anything
> here -- it doesn't serve as documentation as we have an explanatory
> comment here already, which BTW needs to be updated accordingly if the
> condition is extended to cover an ISA incompatibility.

The naming could've been better. I went that route in the hopes that 
future checks would just flip that boolean while keeping the conditional 
block separate, otherwise we would have a bigger conditional block that 
may not be as straightforward to parse.
>
>   As to which, and more importantly -- there is no actual architectural
> incompatibility between the n64 (or n32) ABI and the MIPS16 instruction
> set; there are 64-bit MIPS processors in existence which implement the
> MIPS16 ISA as well, e.g. the NEC VR4111, and the ISA itself includes
> 64-bit instructions on such a processor.  So the MIPS16 ISA is really
> agnostic to the ABI, just as is the regular MIPS ISA or the microMIPS ISA.
> Therefore any such fix needs to go elsewhere I'm afraid -- we probably do
> something outright silly for the ISA_MIPS16 setting.

Fair enough. Do you have a suggestion on where that fix should go to?

The culprit seems to be the mix of an arch selection that gives us 
64-bit cooked registers (due to mips_abi_regsize) and an ISA that gives 
us 32-bit registers (due to mips_isa_regsize). With that combination, 
mips_pseudo_register_read will fail in a fatal way, as well as 
mips_pseudo_register_write if we ever manage to go past the reading step.

Luis
  
Maciej W. Rozycki Feb. 5, 2016, 2:09 p.m. UTC | #13
On Fri, 5 Feb 2016, Luis Machado wrote:

> > 1. There's no executable and we want to connect to a live target for
> >     minimal binary-only/disasembly-level debugging.  We need to set the
> >     endianness, ABI, ISA, etc. to match the target then (although arguably
> >     at least the endianness should be supplied by the debug stub somehow;
> >     we just don't have a way defined right now).
> > 
> 
> While trying reproducers out, i noticed this use case doesn't seem to work
> properly under some conditions anymore. Whenever GDB doesn't find a binary and
> sysroot is set to empty, it will not attempt to continue with the remote
> session. It seems to just give up.
> 
> Sending packet: $qXfer:exec-file:read:6394:0,fff#60...Packet received:
> lgdb.base/break
> <- remote->to_xfer_partial (0xcb3a80, 27, 6394, 0xe38cc0, 0x0, 0x0, 0xfff,
> 0x98) = 1
> remote:target_xfer_partial (27, 6394, 0xe38cc0, 0x0, 0x0, 4095) = 1, 152,
> bytes =
>  2f 6e 65 74 2f 62 75 69 6c 64 32 2d 6c 75 63 69 ...
> -> remote->to_check_pending_interrupt (...)
> <- remote->to_check_pending_interrupt (0xcb3a80)
> -> remote->to_xfer_partial (...)
> <- remote->to_xfer_partial (0xcb3a80, 27, 6394, 0xe38d58, 0x0, 0x98, 0xf67,
> 0x0) = 0
> remote:target_xfer_partial (27, 6394, 0xe38d58, 0x0, 0x98, 3943) = 0, 0
> <- remote->to_pid_to_exec_file (0xcb3a80, 25492) = gdb.base/break
> target_close ()
> gdb.base/break: No such file or directory.
> (gdb) i r
> The program has no registers now.
> (gdb) kill
> The program is not being run.
> 
> Otherwise gdbserver will transfer the file over from the remote end. But i
> digress.

 Thanks for letting me know, I'll have a look -- this might be an issue 
with gdbserver rather than GDB proper.

 Actually I have never tried this scenario with gdbserver, my usual case 
was running through board firmware over JTAG and a third-party debug stub 
talking to it.  Or, in the very old days, the MDI target I posted a while 
ago (<https://sourceware.org/ml/gdb-patches/2008-02/msg00439.html>).

> I can easily reproduce the internal error by simply loading a 32-bit MIPS
> binary and flipping the abi to any of the 64-bit variants.

 I think I can see where this can happen.  We have this condition in 
`mips_gdbarch_init' to catch this situation:

  /* If we have only 32-bit registers, then we can't debug a 64-bit
     ABI.  */
  if (info.target_desc
      && tdesc_property (info.target_desc, PROPERTY_GP32) != NULL
      && mips_abi != MIPS_ABI_EABI32
      && mips_abi != MIPS_ABI_O32)

-- however it works in positive logic, that is only if we have a valid 
target description and that description is wrong, and doing nothing if we 
don't.  However `mips_isa_regsize' also has fallback logic, specifically:

  /* Fall back to the previous behavior.  */
  return (gdbarch_bfd_arch_info (gdbarch)->bits_per_word
	  / gdbarch_bfd_arch_info (gdbarch)->bits_per_byte);

So I think in `mips_gdbarch_init' we need to incorporate a corresponding 
check and reject any BFD arch which implies an incompatible register size; 
I think irrespectively of whether we have a target description or not.  
We have the necessary bits readily available here AFAICT.

> This doesn't seem to be terribly important as people interested in playing
> with these setting will most likely know what they're doing.
> 
> The testcase causing an internal error seems to be even less important and
> very unlikely to occur, but it always runs as part of the testsuite and it is
> a bit of an annoyance.
> 
> GDB should not give an internal error or crash, obviously.

 Oh absolutely, no argument about this as far as I'm concerned.  I'd even 
say stronger that it must not!

> >   TBH I'm not convinced whether the auxiliary variable buys us anything
> > here -- it doesn't serve as documentation as we have an explanatory
> > comment here already, which BTW needs to be updated accordingly if the
> > condition is extended to cover an ISA incompatibility.
> 
> The naming could've been better. I went that route in the hopes that future
> checks would just flip that boolean while keeping the conditional block
> separate, otherwise we would have a bigger conditional block that may not be
> as straightforward to parse.

 Fair enough -- right now we only have this single `if' statement, but if 
we have a separate knob to be driven, as you're proposing, then it'll make 
it easier, and therefore might encourage people to keep it clean if it 
tunrs out the control needs to be more complex, e.g. by using a `switch' 
statement if needed.  So OK, I'm not opposing it -- let's just make it 
separate from the fix to the original issue.

> >   As to which, and more importantly -- there is no actual architectural
> > incompatibility between the n64 (or n32) ABI and the MIPS16 instruction
> > set; there are 64-bit MIPS processors in existence which implement the
> > MIPS16 ISA as well, e.g. the NEC VR4111, and the ISA itself includes
> > 64-bit instructions on such a processor.  So the MIPS16 ISA is really
> > agnostic to the ABI, just as is the regular MIPS ISA or the microMIPS ISA.
> > Therefore any such fix needs to go elsewhere I'm afraid -- we probably do
> > something outright silly for the ISA_MIPS16 setting.
> 
> Fair enough. Do you have a suggestion on where that fix should go to?

 None offhand, I'll see if I can have a look soon.  With my observation 
above about `mips_gdbarch_init' vs `mips_isa_regsize' I think this is a 
separate bug though, probably in BFD.

> The culprit seems to be the mix of an arch selection that gives us 64-bit
> cooked registers (due to mips_abi_regsize) and an ISA that gives us 32-bit
> registers (due to mips_isa_regsize). With that combination,
> mips_pseudo_register_read will fail in a fatal way, as well as
> mips_pseudo_register_write if we ever manage to go past the reading step.

 Correct, however setting the MIPS16 ISA (or microMIPS, for that matter, 
as this is analogous) shouldn't affect `mips_isa_regsize'.  That just 
seems plain wrong to me.  I suspect this might be just some historical 
baggage, needing cleaning up.

  Maciej
  
Luis Machado Jan. 9, 2017, 7:56 p.m. UTC | #14
On 01/12/2016 12:30 PM, Maciej W. Rozycki wrote:
> On Tue, 12 Jan 2016, Luis Machado wrote:
>
>>>> The data above leads gdbarch_info_fill to assign default_bfd_arch to
>>>> info->bfd_arch_info here:
>>>>
>>>>     /* From the default.  */
>>>>     if (info->bfd_arch_info == NULL)
>>>>       info->bfd_arch_info = default_bfd_arch;
>>>>
>>>> So the core file essentially turns into a mips-compatible core file.
>>>
>>> Hmmm.  I see.  I think we can't really change this, given that there
>>> are formats that don't have an architecture.  Like, e.g., srec:
>>>
>>>   (gdb) file testsuite/gdb.base/intstr2.srec
>>>   Reading symbols from testsuite/gdb.base/intstr2.srec...(no debugging
>>> symbols found)...done.
>
>  Or we could be talking to a live target with no executable selected at
> all.  This is also why there are settings like `set mips abi ...'
> available -- to let the user select the executable model for a target
> there's no other source of information about.
>
>>> I also wonder whether the bfd arch detection couldn't be always
>>> compiled in, at least for elf.  Why does bfd fail to detect that this
>>> is an bfd_arch_i386 file in the first place?
>
>  The mapping between `e_machine' and `bfd_architecture' is only provided
> by individual BFD ELF target backends, via the ELF_MACHINE_CODE and
> ELF_ARCH macros.
>
>> It seems bfd also falls back to the default, which is mips in this case.
>>
>> p bfd_default_vector[0]
>> $3 = (const bfd_target *) 0x9beac0 <mips_elf32_trad_be_vec>
>
>  Regardless, I'd expect a suitable generic ELF BFD target to be selected,
> which is what AFAICT `bfd_check_format' does.  It is called by our
> `core_open' function and has a `core_file_p' handler, which makes suitable
> checks including `e_machine' in particular, except for generic ELF BFD
> targets, which are special-cased (and always come last).  So in the
> absence of specific ELF target support in BFD I'd expect a compatible
> generic ELF target to be chosen rather than the default BFD target, which
> might be incompatible.
>
>> Sounds like we have a couple issues. The mips backend not handling weird
>> abi/isa combinations and GDB not preventing clearly incompatible core files
>> from proceeding further into processing in the target's backend?
>
>  I have given it some thought and came to a conclusion that we should at
> least try being consistent.  Which means I think we should not try to
> handle files within the MIPS backend which would not be passed in the
> first place in an `--enable-targets=all' configuration.  Rather than
> checking `e_machine' explicitly I'd be leaning towards using BFD to detect
> such a situation though, perhaps by using a condition like
>
>   if (info.abfd != NULL
>       && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
>       && bfd_get_arch (info.abfd) != bfd_arch_mips)
>     return NULL;
>
> (maybe with an additional error message) though ultimately I think it
> would make sense to define different BFD architecture codes for file
> formats which by definition carry no architecture information and for ones
> that do but are not supported.  Then for the formers we could continue
> selecting the target using the current algorithm and for the latters we'd
> just reject them as incompatible with the given backend -- all somewhere
> in generic code so that individual target backends do not have to repeat
> it all.
>
>  As to ABI, ISA, etc. settings -- these are internal to the MIPS backend,
> so its the backend's job to sanitise them.
>
>   Maciej
>

After quite a while, i'm revisiting this one.

Reading the thread once again, is my understanding correct that the 
first patch is more suitable now? Possibly with some cleanups?

https://sourceware.org/ml/gdb-patches/2016-01/msg00134.html

Regards,
Luis
  
Pedro Alves Jan. 19, 2017, 4:56 p.m. UTC | #15
On 01/09/2017 07:56 PM, Luis Machado wrote:

> Reading the thread once again, is my understanding correct that the
> first patch is more suitable now? Possibly with some cleanups?
> 
> https://sourceware.org/ml/gdb-patches/2016-01/msg00134.html

I think a summary of the discussion would help.

I think it'd be very interesting to come up with a combination
of "set mips abi n64/o64" etc, and maybe "set gnutarget" that
exposes the problem.  I've since added the
bit gdb.base/all-architectures*.exp testcase, and that does
try all sorts of combinations, including mips ones, but it
evidently isn't catching this for some reason.

Thanks,
Pedro Alves
  
Luis Machado Jan. 19, 2017, 5:05 p.m. UTC | #16
On 01/19/2017 10:56 AM, Pedro Alves wrote:
> On 01/09/2017 07:56 PM, Luis Machado wrote:
>
>> Reading the thread once again, is my understanding correct that the
>> first patch is more suitable now? Possibly with some cleanups?
>>
>> https://sourceware.org/ml/gdb-patches/2016-01/msg00134.html
>
> I think a summary of the discussion would help.
>

I'll need a refresh myself. Let me gather the most useful bits in the 
discussion and i'll report back.
  

Patch

gdb/ChangeLog:

2016-01-11  Luis Machado  <lgustavo@codesourcery.com>

	bz 18964

	* mips-tdep.c (mips_gdbarch_init): Sanity check ELF e_machine
	field and bail out if it is not MIPS-compatible.

gdb/testsuite/ChangeLog:

2016-01-11  Luis Machado  <lgustavo@codesourcery.com>

	bz 18964

	* gdb.arch/i376-biarch-core.exp: Handle the core file not being
	recognized.
	Use variables for repeating test names.
---
 gdb/mips-tdep.c                             |  6 ++++++
 gdb/testsuite/gdb.arch/i386-biarch-core.exp | 18 +++++++++++++++---
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/gdb/mips-tdep.c b/gdb/mips-tdep.c
index ca17864..cdfd80e 100644
--- a/gdb/mips-tdep.c
+++ b/gdb/mips-tdep.c
@@ -8208,6 +8208,12 @@  mips_gdbarch_init (struct gdbarch_info info, struct gdbarch_list *arches)
   int dspacc;
   int dspctl;
 
+  /* Sanity check the e_machine field.  */
+  if (info.abfd
+      && bfd_get_flavour (info.abfd) == bfd_target_elf_flavour
+      && elf_elfheader (info.abfd)->e_machine != EM_MIPS)
+    return NULL;
+
   /* Fill in the OS dependent register numbers and names.  */
   if (info.osabi == GDB_OSABI_IRIX)
     {
diff --git a/gdb/testsuite/gdb.arch/i386-biarch-core.exp b/gdb/testsuite/gdb.arch/i386-biarch-core.exp
index 607b947..a4c0541 100644
--- a/gdb/testsuite/gdb.arch/i386-biarch-core.exp
+++ b/gdb/testsuite/gdb.arch/i386-biarch-core.exp
@@ -28,13 +28,14 @@  gdb_start
 gdb_reinitialize_dir $srcdir/$subdir
 
 set test "complete set gnutarget"
+set text_test ".text is readable"
 gdb_test_multiple "complete set gnutarget " $test {
     -re "set gnutarget elf64-little\r\n(.*\r\n)?$gdb_prompt $" {
 	pass $test
     }
     -re "\r\n$gdb_prompt $" {
 	pass $test
-	untested ".text is readable"
+	untested $text_test
 	return
     }
 }
@@ -62,8 +63,19 @@  if {$corestat(size) != 102400} {
 # objcopy as it corrupts the core file beyond all recognition.
 # The output therefore does not matter much, just we should not get GDB
 # internal error.
-gdb_test "core-file ${corefile}" ".*" "core-file"
+set test "core-file"
+gdb_test_multiple "core-file ${corefile}" $test {
+    -re ".* no core file handler recognizes format\r\n$gdb_prompt $" {
+	# Make sure we handle cases where the core file isn't even properly
+	# loaded due to architecture incompatibilities.
+	untested $text_test
+	return
+    }
+    -re "\r\n$gdb_prompt $" {
+	pass $test
+    }
+}
 
 # Test if at least the core file segments memory has been loaded.
 # https://bugzilla.redhat.com/show_bug.cgi?id=457187
-gdb_test "x/bx $address" "\r\n\[ \t\]*$address:\[ \t\]*0xf4\[ \t\]*" ".text is readable"
+gdb_test "x/bx $address" "\r\n\[ \t\]*$address:\[ \t\]*0xf4\[ \t\]*" $text_test
-- 
1.9.1