gcov-tool: Allow merging of empty profile lists

Message ID 20220323093404.13225-1-sebastian.huber@embedded-brains.de
State New
Headers
Series gcov-tool: Allow merging of empty profile lists |

Commit Message

Sebastian Huber March 23, 2022, 9:34 a.m. UTC
  The gcov_profile_merge() already had code to deal with profile information
which had no counterpart to merge with.  For profile information from files
with no associated counterpart, the profile information is simply used as is
with the weighting transformation applied.  Make sure that gcov_profile_merge()
works with an empty target profile list.  Return the merged profile list.

gcc/
	* gcov-tool.cc (gcov_profile_merge): Adjust return type.
	(profile_merge): Allow merging of directories which contain no profile
	files.

libgcc/
	* libgcov-util.c (gcov_profile_merge): Return the list of merged
	profiles.  Accept empty target and source profile lists.
---
 gcc/gcov-tool.cc      | 27 ++++++++++-----------------
 libgcc/libgcov-util.c | 15 +++++++++------
 2 files changed, 19 insertions(+), 23 deletions(-)
  

Comments

Martin Liška March 23, 2022, 12:19 p.m. UTC | #1
On 3/23/22 10:34, Sebastian Huber wrote:

Hello.

Thanks for the patch. Note we're in stage4, so the patch can eventually go
in in the next stage1.

> The gcov_profile_merge() already had code to deal with profile information
> which had no counterpart to merge with.  For profile information from files
> with no associated counterpart, the profile information is simply used as is
> with the weighting transformation applied.  Make sure that gcov_profile_merge()
> works with an empty target profile list.  Return the merged profile list.

Can you please provide a simple demo with a pair of profiles
where I'll see what changes?

> 
> gcc/
> 	* gcov-tool.cc (gcov_profile_merge): Adjust return type.
> 	(profile_merge): Allow merging of directories which contain no profile
> 	files.
> 
> libgcc/
> 	* libgcov-util.c (gcov_profile_merge): Return the list of merged
> 	profiles.  Accept empty target and source profile lists.
> ---
>   gcc/gcov-tool.cc      | 27 ++++++++++-----------------
>   libgcc/libgcov-util.c | 15 +++++++++------
>   2 files changed, 19 insertions(+), 23 deletions(-)
> 
> diff --git a/gcc/gcov-tool.cc b/gcc/gcov-tool.cc
> index f4e42ae763c..2e4083a664d 100644
> --- a/gcc/gcov-tool.cc
> +++ b/gcc/gcov-tool.cc
> @@ -40,7 +40,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>   #endif
>   #include <getopt.h>
>   
> -extern int gcov_profile_merge (struct gcov_info*, struct gcov_info*, int, int);
> +extern struct gcov_info *gcov_profile_merge (struct gcov_info*,
> +					     struct gcov_info*, int, int);
>   extern int gcov_profile_overlap (struct gcov_info*, struct gcov_info*);
>   extern int gcov_profile_normalize (struct gcov_info*, gcov_type);
>   extern int gcov_profile_scale (struct gcov_info*, float, int, int);
> @@ -141,26 +142,18 @@ profile_merge (const char *d1, const char *d2, const char *out, int w1, int w2)
>   {
>     struct gcov_info *d1_profile;
>     struct gcov_info *d2_profile;
> -  int ret;
> +  struct gcov_info *merged_profile;
>   
>     d1_profile = gcov_read_profile_dir (d1, 0);
> -  if (!d1_profile)
> -    return 1;
> -
> -  if (d2)
> -    {
> -      d2_profile = gcov_read_profile_dir (d2, 0);
> -      if (!d2_profile)
> -        return 1;
> +  d2_profile = gcov_read_profile_dir (d2, 0);

Is it fine calling 'gcov_read_profile_dir (d2, 0)' without 'if (d2)'?

>   
> -      /* The actual merge: we overwrite to d1_profile.  */
> -      ret = gcov_profile_merge (d1_profile, d2_profile, w1, w2);
> +  /* The actual merge: we overwrite to d1_profile.  */
> +  merged_profile = gcov_profile_merge (d1_profile, d2_profile, w1, w2);
>   
> -      if (ret)
> -        return ret;
> -    }
> -
> -  gcov_output_files (out, d1_profile);
> +  if (merged_profile)
> +    gcov_output_files (out, merged_profile);
> +  else if (verbose)
> +    fnotice (stdout, "no profile files were merged\n");
>   
>     return 0;
>   }
> diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
> index ba7fb924b53..e5496f4ade2 100644
> --- a/libgcc/libgcov-util.c
> +++ b/libgcc/libgcov-util.c
> @@ -677,13 +677,13 @@ find_match_gcov_info (struct gcov_info **array, int size,
>      Return 0 on success: without mismatch.
>      Reutrn 1 on error.  */

Please adjust the function comment.

Cheers,
Martin

>   
> -int
> +struct gcov_info *
>   gcov_profile_merge (struct gcov_info *tgt_profile, struct gcov_info *src_profile,
>                       int w1, int w2)
>   {
>     struct gcov_info *gi_ptr;
>     struct gcov_info **tgt_infos;
> -  struct gcov_info *tgt_tail;
> +  struct gcov_info **tgt_tail;
>     struct gcov_info **in_src_not_tgt;
>     unsigned tgt_cnt = 0, src_cnt = 0;
>     unsigned unmatch_info_cnt = 0;
> @@ -703,7 +703,10 @@ gcov_profile_merge (struct gcov_info *tgt_profile, struct gcov_info *src_profile
>     for (gi_ptr = tgt_profile, i = 0; gi_ptr; gi_ptr = gi_ptr->next, i++)
>       tgt_infos[i] = gi_ptr;
>   
> -  tgt_tail = tgt_infos[tgt_cnt - 1];
> +  if (tgt_cnt)
> +     tgt_tail = &tgt_infos[tgt_cnt - 1]->next;
> +  else
> +     tgt_tail = &tgt_profile;
>   
>     /* First pass on tgt_profile, we multiply w1 to all counters.  */
>     if (w1 > 1)
> @@ -732,14 +735,14 @@ gcov_profile_merge (struct gcov_info *tgt_profile, struct gcov_info *src_profile
>         gi_ptr = in_src_not_tgt[i];
>         gcov_merge (gi_ptr, gi_ptr, w2 - 1);
>         gi_ptr->next = NULL;
> -      tgt_tail->next = gi_ptr;
> -      tgt_tail = gi_ptr;
> +      *tgt_tail = gi_ptr;
> +      tgt_tail = &gi_ptr->next;
>       }
>   
>     free (in_src_not_tgt);
>     free (tgt_infos);
>   
> -  return 0;
> +  return tgt_profile;
>   }
>   
>   typedef gcov_type (*counter_op_fn) (gcov_type, void*, void*);
  
Sebastian Huber March 23, 2022, 2:50 p.m. UTC | #2
Hello Martin,

On 23/03/2022 13:19, Martin Liška wrote:
> On 3/23/22 10:34, Sebastian Huber wrote:
> 
> Hello.
> 
> Thanks for the patch. Note we're in stage4, so the patch can eventually go
> in in the next stage1.

ok, good.

> 
>> The gcov_profile_merge() already had code to deal with profile 
>> information
>> which had no counterpart to merge with.  For profile information from 
>> files
>> with no associated counterpart, the profile information is simply used 
>> as is
>> with the weighting transformation applied.  Make sure that 
>> gcov_profile_merge()
>> works with an empty target profile list.  Return the merged profile list.
> 
> Can you please provide a simple demo with a pair of profiles
> where I'll see what changes?

The background for this feature is that I would like to combine the gcov 
information obtained from several test executables. Each test executable 
will print something like this (log.txt):

*** BEGIN OF GCOV INFO ***

/home/EB/sebastian_h/src/lwip/b-xilinx_zynq_a9_qemu/init.gcda
YWRjZ1IzMEKtLjW3AAAAAQMAAADcaps855EX05p4KUUAAKEBOgAAAAEAAAAAAAAAAQAAAAAAAAAB
AAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEA
AAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAA
AAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAABAwAAACXn3k16
TDqmuIMwpAAAoQECAAAAAgAAAAAAAAAAAAABAwAAADzkvDcfSnvcuIMwpAAAoQH+////AAAAAQMA
AACnWNZaIM7GWZ9hiOIAAKEBBAAAAAEAAAAAAAAAAAAAAAAAAAAAAAABAwAAAPkGW3YHFUOO6Old
2wAAoQECAAAAAQAAAAAAAAAAAAABAwAAAIvy4CE9FxuM6Old2wAAoQECAAAAAQAAAAAAAAAAAAAB
AwAAANyvBDZiERlQ6Old2wAAoQECAAAAAQAAAAAAAAAAAAABAwAAACKQjCp2pYlIuIMwpAAAoQEC
AAAAAQAAAAAAAAAAAAABAwAAAKSSXEjQFDluuIMwpAAAoQH+////AAAAAA==

...

*** END OF GCOV INFO ***

The attached script reads the log file and creates the *.gcda files 
using gcov-tool. Initially, the target files do not exist.

> 
>>
>> gcc/
>>     * gcov-tool.cc (gcov_profile_merge): Adjust return type.
>>     (profile_merge): Allow merging of directories which contain no 
>> profile
>>     files.
>>
>> libgcc/
>>     * libgcov-util.c (gcov_profile_merge): Return the list of merged
>>     profiles.  Accept empty target and source profile lists.
>> ---
>>   gcc/gcov-tool.cc      | 27 ++++++++++-----------------
>>   libgcc/libgcov-util.c | 15 +++++++++------
>>   2 files changed, 19 insertions(+), 23 deletions(-)
>>
>> diff --git a/gcc/gcov-tool.cc b/gcc/gcov-tool.cc
>> index f4e42ae763c..2e4083a664d 100644
>> --- a/gcc/gcov-tool.cc
>> +++ b/gcc/gcov-tool.cc
>> @@ -40,7 +40,8 @@ see the files COPYING3 and COPYING.RUNTIME 
>> respectively.  If not, see
>>   #endif
>>   #include <getopt.h>
>> -extern int gcov_profile_merge (struct gcov_info*, struct gcov_info*, 
>> int, int);
>> +extern struct gcov_info *gcov_profile_merge (struct gcov_info*,
>> +                         struct gcov_info*, int, int);
>>   extern int gcov_profile_overlap (struct gcov_info*, struct gcov_info*);
>>   extern int gcov_profile_normalize (struct gcov_info*, gcov_type);
>>   extern int gcov_profile_scale (struct gcov_info*, float, int, int);
>> @@ -141,26 +142,18 @@ profile_merge (const char *d1, const char *d2, 
>> const char *out, int w1, int w2)
>>   {
>>     struct gcov_info *d1_profile;
>>     struct gcov_info *d2_profile;
>> -  int ret;
>> +  struct gcov_info *merged_profile;
>>     d1_profile = gcov_read_profile_dir (d1, 0);
>> -  if (!d1_profile)
>> -    return 1;
>> -
>> -  if (d2)
>> -    {
>> -      d2_profile = gcov_read_profile_dir (d2, 0);
>> -      if (!d2_profile)
>> -        return 1;
>> +  d2_profile = gcov_read_profile_dir (d2, 0);
> 
> Is it fine calling 'gcov_read_profile_dir (d2, 0)' without 'if (d2)'?

Yes, the caller ensures that d1 and d2 are both provided:

   if (argc - optind != 2)
     merge_usage ();

   return profile_merge (argv[optind], argv[optind+1], output_dir, w1, w2);

Maybe we should provide a better error message, if the user doesn't 
provide two directories instead of the general usage message.

> 
>> -      /* The actual merge: we overwrite to d1_profile.  */
>> -      ret = gcov_profile_merge (d1_profile, d2_profile, w1, w2);
>> +  /* The actual merge: we overwrite to d1_profile.  */
>> +  merged_profile = gcov_profile_merge (d1_profile, d2_profile, w1, w2);
>> -      if (ret)
>> -        return ret;
>> -    }
>> -
>> -  gcov_output_files (out, d1_profile);
>> +  if (merged_profile)
>> +    gcov_output_files (out, merged_profile);
>> +  else if (verbose)
>> +    fnotice (stdout, "no profile files were merged\n");
>>     return 0;
>>   }
>> diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
>> index ba7fb924b53..e5496f4ade2 100644
>> --- a/libgcc/libgcov-util.c
>> +++ b/libgcc/libgcov-util.c
>> @@ -677,13 +677,13 @@ find_match_gcov_info (struct gcov_info **array, 
>> int size,
>>      Return 0 on success: without mismatch.
>>      Reutrn 1 on error.  */
> 
> Please adjust the function comment.

Oh, yes.
  
Martin Liška March 24, 2022, 10:29 a.m. UTC | #3
On 3/23/22 15:50, Sebastian Huber wrote:
> The attached script reads the log file and creates the *.gcda files using gcov-tool. Initially, the target files do not exist.

Now I've got your use-case and I like it. It's cool one can utilize GCOV even without a filesystem.

Please update the patch. I basically don't see any issues with it.

Cheers,
Martin
  
Sebastian Huber March 24, 2022, 10:51 a.m. UTC | #4
On 24/03/2022 11:29, Martin Liška wrote:
> On 3/23/22 15:50, Sebastian Huber wrote:
>> The attached script reads the log file and creates the *.gcda files 
>> using gcov-tool. Initially, the target files do not exist.
> 
> Now I've got your use-case and I like it. It's cool one can utilize GCOV 
> even without a filesystem.

Yes, it basically works quite well. I try to make it a bit easier to 
use. What seems to be quite common is that tests for embedded systems 
report their test data through an in order character stream, for example 
through an UART device. I used this primitive encoding of the gcov 
information:

*** BEGIN OF GCOV INFO ***

/home/EB/sebastian_h/src/lwip/b-xilinx_zynq_a9_qemu/init.gcda
YWRjZ1IzMEKtLjW3AAAAAQMAAADcaps855EX05p4KUUAAKEBOgAAAAEAAAAAAAAAAQAAAAAAAAAB
AAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAAAAAAAAAAAAAEA
AAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAA
AAAAAAABAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAA
AAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAABAwAAACXn3k16
TDqmuIMwpAAAoQECAAAAAgAAAAAAAAAAAAABAwAAADzkvDcfSnvcuIMwpAAAoQH+////AAAAAQMA
AACnWNZaIM7GWZ9hiOIAAKEBBAAAAAEAAAAAAAAAAAAAAAAAAAAAAAABAwAAAPkGW3YHFUOO6Old
2wAAoQECAAAAAQAAAAAAAAAAAAABAwAAAIvy4CE9FxuM6Old2wAAoQECAAAAAQAAAAAAAAAAAAAB
AwAAANyvBDZiERlQ6Old2wAAoQECAAAAAQAAAAAAAAAAAAABAwAAACKQjCp2pYlIuIMwpAAAoQEC
AAAAAQAAAAAAAAAAAAABAwAAAKSSXEjQFDluuIMwpAAAoQH+////AAAAAA==

/home/EB/sebastian_h/src/lwip/b-xilinx_zynq_a9_qemu/src/netif/ethernet.gcda
YWRjZ1IzMEIwMjW3AAAAAQMAAAC+EBQuXa7stuNAYTkAAKEBCgAAAAEAAAAAAAAAAQAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAABAwAAAAQbGWmmhbpBJ5lR8AAAoQEmAAAAAQAAAAAA
AAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
AAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAAAAAAAAAAA
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA

...

*** END OF GCOV INFO ***

Maybe we could add the file path into the gcov information stream using 
a new tag:

#define GCOV_TAG_GCDA_FILE_NAME  ((gcov_unsigned_t)0xa5000000)

Then the complete gcov information can be dumped using a single base64 
encoded stream. We could add some support to the gcov-tool to read this 
stream and merge it into gcda files.
  
Martin Liška March 24, 2022, 12:03 p.m. UTC | #5
On 3/24/22 11:51, Sebastian Huber wrote:
> Maybe we could add the file path into the gcov information stream using a new tag:
> 
> #define GCOV_TAG_GCDA_FILE_NAME  ((gcov_unsigned_t)0xa5000000)
> 
> Then the complete gcov information can be dumped using a single base64 encoded stream. We could add some support to the gcov-tool to read this stream and merge it into gcda files.

I would support such patch!

Thanks,
Martin
  
Sebastian Huber March 28, 2022, 4:23 p.m. UTC | #6
On 24/03/2022 13:03, Martin Liška wrote:
> On 3/24/22 11:51, Sebastian Huber wrote:
>> Maybe we could add the file path into the gcov information stream 
>> using a new tag:
>>
>> #define GCOV_TAG_GCDA_FILE_NAME  ((gcov_unsigned_t)0xa5000000)
>>
>> Then the complete gcov information can be dumped using a single base64 
>> encoded stream. We could add some support to the gcov-tool to read 
>> this stream and merge it into gcda files.
> 
> I would support such patch!

Ok, good. I work currently on a prototype implementation. My use case 
would be like this.

Run a test executable which dumps all gcov info objects in a serial data 
stream. It could be encoded as base64. It could be also compressed. On 
the host you unpack the encoded data stream and feed it into gcov-tool 
using the new "merge-stream" subcommand:

gcov-tool --help
Usage: gcov-tool [OPTION]... SUB_COMMAND [OPTION]...

Offline tool to handle gcda counts

   -h, --help                            Print this help, then exit
   -v, --version                         Print version number, then exit
   merge-stream [options] [stream-file]  Merge coverage stream file (or 
stdin)
                                         and coverage file contents
     -v, --verbose                       Verbose mode
     -w, --weight <w1,w2>                Set weights (float point values)

Example:

base64 -d log.txt | gcov-tool merge-stream

The gcov-tool uses a new tag which contains the filename of the 
associated gcov info file:

gcov-dump b-xilinx_zynq_a9_qemu/init.gcda
b-xilinx_zynq_a9_qemu/init.gcda:data:magic `gcda':version `B20 '
b-xilinx_zynq_a9_qemu/init.gcda:stamp 3496756572
b-xilinx_zynq_a9_qemu/init.gcda:checksum 137326246
b-xilinx_zynq_a9_qemu/init.gcda:  a5000000:  62:FILENAME 
`/home/EB/sebastian_h/src/lwip/b-xilinx_zynq_a9_qemu/init.gcda'
b-xilinx_zynq_a9_qemu/init.gcda:  a1000000:   8:OBJECT_SUMMARY runs=0, 
sum_max=0
b-xilinx_zynq_a9_qemu/init.gcda:  01000000:  12:FUNCTION 
ident=1016818396, lineno_checksum=0xd31791e7, cfg_checksum=0x4529789a
b-xilinx_zynq_a9_qemu/init.gcda:    01a10000: 232:COUNTERS arcs 29 counts

Should I generate this filename tag to all configurations or just in 
case inhibit_libc is defined?

Advantage:

* No dependency on the GCC configuration

Disadvantages:

* Bigger files
* Actual filename and the filename of the tag differ if file is moved
* Potential information leak

In any case, I would add the support for the (optional) filename tag to 
all readers.
  
Martin Liška March 30, 2022, 11:56 a.m. UTC | #7
On 3/28/22 18:23, Sebastian Huber wrote:
> On 24/03/2022 13:03, Martin Liška wrote:
>> On 3/24/22 11:51, Sebastian Huber wrote:
>>> Maybe we could add the file path into the gcov information stream using a new tag:
>>>
>>> #define GCOV_TAG_GCDA_FILE_NAME  ((gcov_unsigned_t)0xa5000000)
>>>
>>> Then the complete gcov information can be dumped using a single base64 encoded stream. We could add some support to the gcov-tool to read this stream and merge it into gcda files.
>>
>> I would support such patch!
> 
> Ok, good. I work currently on a prototype implementation. My use case would be like this.
> 

Great, the suggested scheme works for me.

> Run a test executable which dumps all gcov info objects in a serial data stream. It could be encoded as base64. It could be also compressed. On the host you unpack the encoded data stream and feed it into gcov-tool using the new "merge-stream" subcommand:
> 
> gcov-tool --help
> Usage: gcov-tool [OPTION]... SUB_COMMAND [OPTION]...
> 
> Offline tool to handle gcda counts
> 
>    -h, --help                            Print this help, then exit
>    -v, --version                         Print version number, then exit
>    merge-stream [options] [stream-file]  Merge coverage stream file (or stdin)
>                                          and coverage file contents
>      -v, --verbose                       Verbose mode
>      -w, --weight <w1,w2>                Set weights (float point values)
> 
> Example:
> 
> base64 -d log.txt | gcov-tool merge-stream
> 
> The gcov-tool uses a new tag which contains the filename of the associated gcov info file:
> 
> gcov-dump b-xilinx_zynq_a9_qemu/init.gcda
> b-xilinx_zynq_a9_qemu/init.gcda:data:magic `gcda':version `B20 '
> b-xilinx_zynq_a9_qemu/init.gcda:stamp 3496756572
> b-xilinx_zynq_a9_qemu/init.gcda:checksum 137326246
> b-xilinx_zynq_a9_qemu/init.gcda:  a5000000:  62:FILENAME `/home/EB/sebastian_h/src/lwip/b-xilinx_zynq_a9_qemu/init.gcda'
> b-xilinx_zynq_a9_qemu/init.gcda:  a1000000:   8:OBJECT_SUMMARY runs=0, sum_max=0
> b-xilinx_zynq_a9_qemu/init.gcda:  01000000:  12:FUNCTION ident=1016818396, lineno_checksum=0xd31791e7, cfg_checksum=0x4529789a
> b-xilinx_zynq_a9_qemu/init.gcda:    01a10000: 232:COUNTERS arcs 29 counts
> 
> Should I generate this filename tag to all configurations or just in case inhibit_libc is defined?

I would emit it unconditionally. Btw. why do you need the tag?

Cheers,
Martin

> 
> Advantage:
> 
> * No dependency on the GCC configuration
> 
> Disadvantages:
> 
> * Bigger files
> * Actual filename and the filename of the tag differ if file is moved
> * Potential information leak
> 
> In any case, I would add the support for the (optional) filename tag to all readers.
>
  
Sebastian Huber March 30, 2022, 1:30 p.m. UTC | #8
On 30/03/2022 13:56, Martin Liška wrote:
>> Example:
>>
>> base64 -d log.txt | gcov-tool merge-stream
>>
>> The gcov-tool uses a new tag which contains the filename of the 
>> associated gcov info file:
>>
>> gcov-dump b-xilinx_zynq_a9_qemu/init.gcda
>> b-xilinx_zynq_a9_qemu/init.gcda:data:magic `gcda':version `B20 '
>> b-xilinx_zynq_a9_qemu/init.gcda:stamp 3496756572
>> b-xilinx_zynq_a9_qemu/init.gcda:checksum 137326246
>> b-xilinx_zynq_a9_qemu/init.gcda:  a5000000:  62:FILENAME 
>> `/home/EB/sebastian_h/src/lwip/b-xilinx_zynq_a9_qemu/init.gcda'
>> b-xilinx_zynq_a9_qemu/init.gcda:  a1000000:   8:OBJECT_SUMMARY runs=0, 
>> sum_max=0
>> b-xilinx_zynq_a9_qemu/init.gcda:  01000000:  12:FUNCTION 
>> ident=1016818396, lineno_checksum=0xd31791e7, cfg_checksum=0x4529789a
>> b-xilinx_zynq_a9_qemu/init.gcda:    01a10000: 232:COUNTERS arcs 29 counts
>>
>> Should I generate this filename tag to all configurations or just in 
>> case inhibit_libc is defined?
> 
> I would emit it unconditionally. Btw. why do you need the tag?

The tag was the easiest way to add the filename to the gcov information.

We need some gcov defined way to get the filename associated with a gcov 
information, so that the gcov-tool can generate the gcov files from the 
gcov information itself. In a hosted environment, it is not necessary to 
include the filename in the gcov information, since the instrumented 
executable already creates the gcov files. In a freestanding 
environment, the gcov information is not automatically stored to files 
since no file system may be available. Here, we can dump the gcov 
information through __gcov_info_to_gcda(). This dump is basically a 
concatenation of several gcov files.

An alternative to a tag inside the gcov data would be a header which is 
dumped before the gcov data and understood by the gcov-tool:

header : int32:filename-magic int32:version string

#define GCOV_FILENAME_MAGIC ((gcov_unsigned_t)0x6763666e) /* "gcfn" */
  
Sebastian Huber March 30, 2022, 2:48 p.m. UTC | #9
On 30/03/2022 15:30, Sebastian Huber wrote:
> On 30/03/2022 13:56, Martin Liška wrote:
>>> Example:
>>>
>>> base64 -d log.txt | gcov-tool merge-stream
>>>
>>> The gcov-tool uses a new tag which contains the filename of the 
>>> associated gcov info file:
>>>
>>> gcov-dump b-xilinx_zynq_a9_qemu/init.gcda
>>> b-xilinx_zynq_a9_qemu/init.gcda:data:magic `gcda':version `B20 '
>>> b-xilinx_zynq_a9_qemu/init.gcda:stamp 3496756572
>>> b-xilinx_zynq_a9_qemu/init.gcda:checksum 137326246
>>> b-xilinx_zynq_a9_qemu/init.gcda:  a5000000:  62:FILENAME 
>>> `/home/EB/sebastian_h/src/lwip/b-xilinx_zynq_a9_qemu/init.gcda'
>>> b-xilinx_zynq_a9_qemu/init.gcda:  a1000000:   8:OBJECT_SUMMARY 
>>> runs=0, sum_max=0
>>> b-xilinx_zynq_a9_qemu/init.gcda:  01000000:  12:FUNCTION 
>>> ident=1016818396, lineno_checksum=0xd31791e7, cfg_checksum=0x4529789a
>>> b-xilinx_zynq_a9_qemu/init.gcda:    01a10000: 232:COUNTERS arcs 29 
>>> counts
>>>
>>> Should I generate this filename tag to all configurations or just in 
>>> case inhibit_libc is defined?
>>
>> I would emit it unconditionally. Btw. why do you need the tag?
> 
> The tag was the easiest way to add the filename to the gcov information.
> 
> We need some gcov defined way to get the filename associated with a gcov 
> information, so that the gcov-tool can generate the gcov files from the 
> gcov information itself. In a hosted environment, it is not necessary to 
> include the filename in the gcov information, since the instrumented 
> executable already creates the gcov files. In a freestanding 
> environment, the gcov information is not automatically stored to files 
> since no file system may be available. Here, we can dump the gcov 
> information through __gcov_info_to_gcda(). This dump is basically a 
> concatenation of several gcov files.
> 
> An alternative to a tag inside the gcov data would be a header which is 
> dumped before the gcov data and understood by the gcov-tool:
> 
> header : int32:filename-magic int32:version string
> 
> #define GCOV_FILENAME_MAGIC ((gcov_unsigned_t)0x6763666e) /* "gcfn" */

Thanks for asking the question. The alternative with the header is much 
less intrusive. I will work on this approach now.

Another question, I would like to add an option to gcov-tool to 
transform the filenames using regular expressions (for example, remove 
or add a prefix). Can I use the C++ <regex> for this?
  
Martin Liška March 31, 2022, 6:58 a.m. UTC | #10
On 3/30/22 16:48, Sebastian Huber wrote:
> 
> 
> On 30/03/2022 15:30, Sebastian Huber wrote:
>> On 30/03/2022 13:56, Martin Liška wrote:
>>>> Example:
>>>>
>>>> base64 -d log.txt | gcov-tool merge-stream
>>>>
>>>> The gcov-tool uses a new tag which contains the filename of the associated gcov info file:
>>>>
>>>> gcov-dump b-xilinx_zynq_a9_qemu/init.gcda
>>>> b-xilinx_zynq_a9_qemu/init.gcda:data:magic `gcda':version `B20 '
>>>> b-xilinx_zynq_a9_qemu/init.gcda:stamp 3496756572
>>>> b-xilinx_zynq_a9_qemu/init.gcda:checksum 137326246
>>>> b-xilinx_zynq_a9_qemu/init.gcda:  a5000000:  62:FILENAME `/home/EB/sebastian_h/src/lwip/b-xilinx_zynq_a9_qemu/init.gcda'
>>>> b-xilinx_zynq_a9_qemu/init.gcda:  a1000000:   8:OBJECT_SUMMARY runs=0, sum_max=0
>>>> b-xilinx_zynq_a9_qemu/init.gcda:  01000000:  12:FUNCTION ident=1016818396, lineno_checksum=0xd31791e7, cfg_checksum=0x4529789a
>>>> b-xilinx_zynq_a9_qemu/init.gcda:    01a10000: 232:COUNTERS arcs 29 counts
>>>>
>>>> Should I generate this filename tag to all configurations or just in case inhibit_libc is defined?
>>>
>>> I would emit it unconditionally. Btw. why do you need the tag?
>>
>> The tag was the easiest way to add the filename to the gcov information.
>>
>> We need some gcov defined way to get the filename associated with a gcov information, so that the gcov-tool can generate the gcov files from the gcov information itself. In a hosted environment, it is not necessary to include the filename in the gcov information, since the instrumented executable already creates the gcov files. In a freestanding environment, the gcov information is not automatically stored to files since no file system may be available. Here, we can dump the gcov information through __gcov_info_to_gcda(). This dump is basically a concatenation of several gcov files.
>>
>> An alternative to a tag inside the gcov data would be a header which is dumped before the gcov data and understood by the gcov-tool:
>>
>> header : int32:filename-magic int32:version string
>>
>> #define GCOV_FILENAME_MAGIC ((gcov_unsigned_t)0x6763666e) /* "gcfn" */
> 
> Thanks for asking the question. The alternative with the header is much less intrusive. I will work on this approach now.

Agreed.

> 
> Another question, I would like to add an option to gcov-tool to transform the filenames using regular expressions (for example, remove or add a prefix). Can I use the C++ <regex> for this?

Sure, use it!

Thanks,
Martin
  

Patch

diff --git a/gcc/gcov-tool.cc b/gcc/gcov-tool.cc
index f4e42ae763c..2e4083a664d 100644
--- a/gcc/gcov-tool.cc
+++ b/gcc/gcov-tool.cc
@@ -40,7 +40,8 @@  see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #endif
 #include <getopt.h>
 
-extern int gcov_profile_merge (struct gcov_info*, struct gcov_info*, int, int);
+extern struct gcov_info *gcov_profile_merge (struct gcov_info*,
+					     struct gcov_info*, int, int);
 extern int gcov_profile_overlap (struct gcov_info*, struct gcov_info*);
 extern int gcov_profile_normalize (struct gcov_info*, gcov_type);
 extern int gcov_profile_scale (struct gcov_info*, float, int, int);
@@ -141,26 +142,18 @@  profile_merge (const char *d1, const char *d2, const char *out, int w1, int w2)
 {
   struct gcov_info *d1_profile;
   struct gcov_info *d2_profile;
-  int ret;
+  struct gcov_info *merged_profile;
 
   d1_profile = gcov_read_profile_dir (d1, 0);
-  if (!d1_profile)
-    return 1;
-
-  if (d2)
-    {
-      d2_profile = gcov_read_profile_dir (d2, 0);
-      if (!d2_profile)
-        return 1;
+  d2_profile = gcov_read_profile_dir (d2, 0);
 
-      /* The actual merge: we overwrite to d1_profile.  */
-      ret = gcov_profile_merge (d1_profile, d2_profile, w1, w2);
+  /* The actual merge: we overwrite to d1_profile.  */
+  merged_profile = gcov_profile_merge (d1_profile, d2_profile, w1, w2);
 
-      if (ret)
-        return ret;
-    }
-
-  gcov_output_files (out, d1_profile);
+  if (merged_profile)
+    gcov_output_files (out, merged_profile);
+  else if (verbose)
+    fnotice (stdout, "no profile files were merged\n");
 
   return 0;
 }
diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
index ba7fb924b53..e5496f4ade2 100644
--- a/libgcc/libgcov-util.c
+++ b/libgcc/libgcov-util.c
@@ -677,13 +677,13 @@  find_match_gcov_info (struct gcov_info **array, int size,
    Return 0 on success: without mismatch.
    Reutrn 1 on error.  */
 
-int
+struct gcov_info *
 gcov_profile_merge (struct gcov_info *tgt_profile, struct gcov_info *src_profile,
                     int w1, int w2)
 {
   struct gcov_info *gi_ptr;
   struct gcov_info **tgt_infos;
-  struct gcov_info *tgt_tail;
+  struct gcov_info **tgt_tail;
   struct gcov_info **in_src_not_tgt;
   unsigned tgt_cnt = 0, src_cnt = 0;
   unsigned unmatch_info_cnt = 0;
@@ -703,7 +703,10 @@  gcov_profile_merge (struct gcov_info *tgt_profile, struct gcov_info *src_profile
   for (gi_ptr = tgt_profile, i = 0; gi_ptr; gi_ptr = gi_ptr->next, i++)
     tgt_infos[i] = gi_ptr;
 
-  tgt_tail = tgt_infos[tgt_cnt - 1];
+  if (tgt_cnt)
+     tgt_tail = &tgt_infos[tgt_cnt - 1]->next;
+  else
+     tgt_tail = &tgt_profile;
 
   /* First pass on tgt_profile, we multiply w1 to all counters.  */
   if (w1 > 1)
@@ -732,14 +735,14 @@  gcov_profile_merge (struct gcov_info *tgt_profile, struct gcov_info *src_profile
       gi_ptr = in_src_not_tgt[i];
       gcov_merge (gi_ptr, gi_ptr, w2 - 1);
       gi_ptr->next = NULL;
-      tgt_tail->next = gi_ptr;
-      tgt_tail = gi_ptr;
+      *tgt_tail = gi_ptr;
+      tgt_tail = &gi_ptr->next;
     }
 
   free (in_src_not_tgt);
   free (tgt_infos);
 
-  return 0;
+  return tgt_profile;
 }
 
 typedef gcov_type (*counter_op_fn) (gcov_type, void*, void*);