gcov: make profile merging smarter

Message ID 7adcbe4f-5aa7-30bc-82e6-a877e03eaafd@suse.cz
State New
Headers
Series gcov: make profile merging smarter |

Commit Message

Martin Liška Oct. 1, 2021, 9:55 a.m. UTC
  Support merging of profiles that are built from a different .o files
but belong to the same source file. Moreover, a checksum is verified
during profile merging and so we can safely combine such profile.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
I'm going to install the patch if there are no comments about it.

Thanks,
Martin

	PR gcov-profile/90364

gcc/ChangeLog:

	* coverage.c (build_info): Emit checksum to the global variable.
	(build_info_type): Add new field for checksum.
	(coverage_obj_finish): Pass object_checksum.
	(coverage_init): Use 0 as checksum for .gcno files.
	* gcov-dump.c (dump_gcov_file): Dump also new checksum field.
	* gcov.c (read_graph_file): Read also checksum.

libgcc/ChangeLog:

	* libgcov-driver.c (merge_one_data): Skip timestamp and verify
	checksums.
	(write_one_data): Write also checksum.
	* libgcov-util.c (read_gcda_file): Read also checksum field.
	* libgcov.h (struct gcov_info): Add new field.
---
  gcc/coverage.c          | 50 ++++++++++++++++++++++++++++-------------
  gcc/gcov-dump.c         |  9 ++++----
  gcc/gcov.c              |  5 +++++
  libgcc/libgcov-driver.c |  8 +++++--
  libgcc/libgcov-util.c   |  3 +++
  libgcc/libgcov.h        |  1 +
  6 files changed, 54 insertions(+), 22 deletions(-)
  

Comments

Richard Biener Oct. 1, 2021, 10:17 a.m. UTC | #1
On Fri, Oct 1, 2021 at 11:55 AM Martin Liška <mliska@suse.cz> wrote:
>
> Support merging of profiles that are built from a different .o files
> but belong to the same source file. Moreover, a checksum is verified
> during profile merging and so we can safely combine such profile.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> I'm going to install the patch if there are no comments about it.

Is the ->stamp field now unused?

I wonder whether crc32 is good enough or whether we need to enable
-fprofile-correction by default for example?  But -fprofile-correction
is about -fprofile-use, not profile merging, right?  What does the latter
do upon mismatch?

Alternatively would it be possible to keep multiple sets of data in the file,
one for each 'stamp'?  How does the profile-use step figure a mismatch
here, or does it not care whether it mixes input with 'different stamp'?

The current behavior is also somewhat odd:

> ./a.out
libgcov profiling error:/tmp/t.gcda:overwriting an existing profile
data with a different timestamp

but it should probably say 'warning' since it indeed simply overwrites data
instead of merging it.

I wonder if we can simply drop the stamp check alltogether and make
the checks that match up the data against the number of counters behave
as to warning and overwriting the data instead of failing fatally?  The
actual merging of data would need to be delayed, but at least until
the first actual merge was done we could simply proceed?

Richard.

> Thanks,
> Martin
>
>         PR gcov-profile/90364
>
> gcc/ChangeLog:
>
>         * coverage.c (build_info): Emit checksum to the global variable.
>         (build_info_type): Add new field for checksum.
>         (coverage_obj_finish): Pass object_checksum.
>         (coverage_init): Use 0 as checksum for .gcno files.
>         * gcov-dump.c (dump_gcov_file): Dump also new checksum field.
>         * gcov.c (read_graph_file): Read also checksum.
>
> libgcc/ChangeLog:
>
>         * libgcov-driver.c (merge_one_data): Skip timestamp and verify
>         checksums.
>         (write_one_data): Write also checksum.
>         * libgcov-util.c (read_gcda_file): Read also checksum field.
>         * libgcov.h (struct gcov_info): Add new field.
> ---
>   gcc/coverage.c          | 50 ++++++++++++++++++++++++++++-------------
>   gcc/gcov-dump.c         |  9 ++++----
>   gcc/gcov.c              |  5 +++++
>   libgcc/libgcov-driver.c |  8 +++++--
>   libgcc/libgcov-util.c   |  3 +++
>   libgcc/libgcov.h        |  1 +
>   6 files changed, 54 insertions(+), 22 deletions(-)
>
> diff --git a/gcc/coverage.c b/gcc/coverage.c
> index 10d7f8366cb..4467f1eaa5c 100644
> --- a/gcc/coverage.c
> +++ b/gcc/coverage.c
> @@ -129,16 +129,7 @@ static const char *const ctr_names[GCOV_COUNTERS] = {
>   #undef DEF_GCOV_COUNTER
>
>   /* Forward declarations.  */
> -static void read_counts_file (void);
>   static tree build_var (tree, tree, int);
> -static void build_fn_info_type (tree, unsigned, tree);
> -static void build_info_type (tree, tree);
> -static tree build_fn_info (const struct coverage_data *, tree, tree);
> -static tree build_info (tree, tree);
> -static bool coverage_obj_init (void);
> -static vec<constructor_elt, va_gc> *coverage_obj_fn
> -(vec<constructor_elt, va_gc> *, tree, struct coverage_data const *);
> -static void coverage_obj_finish (vec<constructor_elt, va_gc> *);
>
>   /* Return the type node for gcov_type.  */
>
> @@ -218,6 +209,9 @@ read_counts_file (void)
>     tag = gcov_read_unsigned ();
>     bbg_file_stamp = crc32_unsigned (bbg_file_stamp, tag);
>
> +  /* Read checksum.  */
> +  gcov_read_unsigned ();
> +
>     counts_hash = new hash_table<counts_entry> (10);
>     while ((tag = gcov_read_unsigned ()))
>       {
> @@ -935,6 +929,12 @@ build_info_type (tree type, tree fn_info_ptr_type)
>     DECL_CHAIN (field) = fields;
>     fields = field;
>
> +  /* Checksum.  */
> +  field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
> +                     get_gcov_unsigned_t ());
> +  DECL_CHAIN (field) = fields;
> +  fields = field;
> +
>     /* Filename */
>     field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
>                       build_pointer_type (build_qualified_type
> @@ -977,7 +977,7 @@ build_info_type (tree type, tree fn_info_ptr_type)
>      function info objects.  */
>
>   static tree
> -build_info (tree info_type, tree fn_ary)
> +build_info (tree info_type, tree fn_ary, unsigned object_checksum)
>   {
>     tree info_fields = TYPE_FIELDS (info_type);
>     tree merge_fn_type, n_funcs;
> @@ -996,13 +996,19 @@ build_info (tree info_type, tree fn_ary)
>     /* next -- NULL */
>     CONSTRUCTOR_APPEND_ELT (v1, info_fields, null_pointer_node);
>     info_fields = DECL_CHAIN (info_fields);
> -
> +
>     /* stamp */
>     CONSTRUCTOR_APPEND_ELT (v1, info_fields,
>                           build_int_cstu (TREE_TYPE (info_fields),
>                                           bbg_file_stamp));
>     info_fields = DECL_CHAIN (info_fields);
>
> +  /* Checksum.  */
> +  CONSTRUCTOR_APPEND_ELT (v1, info_fields,
> +                         build_int_cstu (TREE_TYPE (info_fields),
> +                                         object_checksum));
> +  info_fields = DECL_CHAIN (info_fields);
> +
>     /* Filename */
>     da_file_name_len = strlen (da_file_name);
>     filename_string = build_string (da_file_name_len + 1, da_file_name);
> @@ -1214,7 +1220,8 @@ coverage_obj_fn (vec<constructor_elt, va_gc> *ctor, tree fn,
>      function objects from CTOR.  Generate the gcov_info initializer.  */
>
>   static void
> -coverage_obj_finish (vec<constructor_elt, va_gc> *ctor)
> +coverage_obj_finish (vec<constructor_elt, va_gc> *ctor,
> +                    unsigned object_checksum)
>   {
>     unsigned n_functions = vec_safe_length (ctor);
>     tree fn_info_ary_type = build_array_type
> @@ -1231,7 +1238,7 @@ coverage_obj_finish (vec<constructor_elt, va_gc> *ctor)
>     varpool_node::finalize_decl (fn_info_ary);
>
>     DECL_INITIAL (gcov_info_var)
> -    = build_info (TREE_TYPE (gcov_info_var), fn_info_ary);
> +    = build_info (TREE_TYPE (gcov_info_var), fn_info_ary, object_checksum);
>     varpool_node::finalize_decl (gcov_info_var);
>   }
>
> @@ -1300,7 +1307,6 @@ coverage_init (const char *filename)
>     strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX);
>
>     bbg_file_stamp = local_tick;
> -
>     if (flag_auto_profile)
>       read_autofdo_file ();
>     else if (flag_branch_probabilities)
> @@ -1328,6 +1334,8 @@ coverage_init (const char *filename)
>           gcov_write_unsigned (GCOV_NOTE_MAGIC);
>           gcov_write_unsigned (GCOV_VERSION);
>           gcov_write_unsigned (bbg_file_stamp);
> +         /* Use an arbitrary checksum */
> +         gcov_write_unsigned (0);
>           gcov_write_string (getpwd ());
>
>           /* Do not support has_unexecuted_blocks for Ada.  */
> @@ -1353,14 +1361,24 @@ coverage_finish (void)
>          cannot uniquely stamp it.  If we can stamp it, libgcov will DTRT.  */
>       unlink (da_file_name);
>
> +  /* Global GCDA checksum that aggregates all functions.  */
> +  unsigned object_checksum = 0;
> +
>     if (coverage_obj_init ())
>       {
>         vec<constructor_elt, va_gc> *fn_ctor = NULL;
>         struct coverage_data *fn;
>
>         for (fn = functions_head; fn; fn = fn->next)
> -       fn_ctor = coverage_obj_fn (fn_ctor, fn->fn_decl, fn);
> -      coverage_obj_finish (fn_ctor);
> +       {
> +         fn_ctor = coverage_obj_fn (fn_ctor, fn->fn_decl, fn);
> +
> +         object_checksum = crc32_unsigned (object_checksum, fn->ident);
> +         object_checksum = crc32_unsigned (object_checksum,
> +                                           fn->lineno_checksum);
> +         object_checksum = crc32_unsigned (object_checksum, fn->cfg_checksum);
> +       }
> +      coverage_obj_finish (fn_ctor, object_checksum);
>       }
>
>     XDELETEVEC (da_file_name);
> diff --git a/gcc/gcov-dump.c b/gcc/gcov-dump.c
> index f1489fe0214..bfaf735d2ff 100644
> --- a/gcc/gcov-dump.c
> +++ b/gcc/gcov-dump.c
> @@ -206,11 +206,12 @@ dump_gcov_file (const char *filename)
>     }
>
>     /* stamp */
> -  {
> -    unsigned stamp = gcov_read_unsigned ();
> +  unsigned stamp = gcov_read_unsigned ();
> +  printf ("%s:stamp %lu\n", filename, (unsigned long)stamp);
>
> -    printf ("%s:stamp %lu\n", filename, (unsigned long)stamp);
> -  }
> +  /* Checksum */
> +  unsigned checksum = gcov_read_unsigned ();
> +  printf ("%s:checksum %lu\n", filename, (unsigned long)checksum);
>
>     if (!is_data_type)
>       {
> diff --git a/gcc/gcov.c b/gcc/gcov.c
> index cf0a49d8c30..829e955a63b 100644
> --- a/gcc/gcov.c
> +++ b/gcc/gcov.c
> @@ -1814,6 +1814,8 @@ read_graph_file (void)
>                bbg_file_name, v, e);
>       }
>     bbg_stamp = gcov_read_unsigned ();
> +  /* Read checksum.  */
> +  gcov_read_unsigned ();
>     bbg_cwd = xstrdup (gcov_read_string ());
>     bbg_supports_has_unexecuted_blocks = gcov_read_unsigned ();
>
> @@ -2031,6 +2033,9 @@ read_count_file (void)
>         goto cleanup;
>       }
>
> +  /* Read checksum.  */
> +  gcov_read_unsigned ();
> +
>     while ((tag = gcov_read_unsigned ()))
>       {
>         unsigned length = gcov_read_unsigned ();
> diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
> index 087f71e0107..7aa97bbb06a 100644
> --- a/libgcc/libgcov-driver.c
> +++ b/libgcc/libgcov-driver.c
> @@ -260,12 +260,15 @@ merge_one_data (const char *filename,
>     if (!gcov_version (gi_ptr, length, filename))
>       return -1;
>
> +  /* Skip timestamp.  */
> +  gcov_read_unsigned ();
> +
>     length = gcov_read_unsigned ();
> -  if (length != gi_ptr->stamp)
> +  if (length != gi_ptr->checksum)
>       {
>         /* Read from a different compilation.  Overwrite the file.  */
>         gcov_error (GCOV_PROF_PREFIX "overwriting an existing profile data "
> -                 "with a different timestamp\n", filename);
> +                 "with a different checksum\n", filename);
>         return 0;
>       }
>
> @@ -495,6 +498,7 @@ write_one_data (const struct gcov_info *gi_ptr,
>     dump_unsigned (GCOV_DATA_MAGIC, dump_fn, arg);
>     dump_unsigned (GCOV_VERSION, dump_fn, arg);
>     dump_unsigned (gi_ptr->stamp, dump_fn, arg);
> +  dump_unsigned (gi_ptr->checksum, dump_fn, arg);
>
>   #ifdef NEED_L_GCOV
>     /* Generate whole program statistics.  */
> diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
> index bc7416d99bf..766ca3559c4 100644
> --- a/libgcc/libgcov-util.c
> +++ b/libgcc/libgcov-util.c
> @@ -310,6 +310,9 @@ read_gcda_file (const char *filename)
>     /* Read stamp.  */
>     obj_info->stamp = gcov_read_unsigned ();
>
> +  /* Read checksum.  */
> +  obj_info->checksum = gcov_read_unsigned ();
> +
>     while (1)
>       {
>         gcov_position_t base;
> diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
> index f6354a7a070..2a365c95759 100644
> --- a/libgcc/libgcov.h
> +++ b/libgcc/libgcov.h
> @@ -230,6 +230,7 @@ struct gcov_info
>     struct gcov_info *next;     /* link to next, used by libgcov */
>
>     gcov_unsigned_t stamp;      /* uniquifying time stamp */
> +  gcov_unsigned_t checksum;    /* unique object checksum */
>     const char *filename;               /* output file name */
>
>     gcov_merge_fn merge[GCOV_COUNTERS];  /* merge functions (null for
> --
> 2.33.0
>
  
Martin Liška Oct. 1, 2021, 10:53 a.m. UTC | #2
On 10/1/21 12:17, Richard Biener wrote:
> On Fri, Oct 1, 2021 at 11:55 AM Martin Liška <mliska@suse.cz> wrote:
>>
>> Support merging of profiles that are built from a different .o files
>> but belong to the same source file. Moreover, a checksum is verified
>> during profile merging and so we can safely combine such profile.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>> I'm going to install the patch if there are no comments about it.
> 
> Is the ->stamp field now unused?

Yes, it's still used for pairing of .gcda and .gcno files when --coverage is used.

> 
> I wonder whether crc32 is good enough or whether we need to enable

Dunno. We can alternatively use a stronger hashing function, maybe inchash?

> -fprofile-correction by default for example?

Probably not.

> But -fprofile-correction
> is about -fprofile-use, not profile merging, right?  What does the latter
> do upon mismatch?

It prints the 'libgcov profiling error'.

> 
> Alternatively would it be possible to keep multiple sets of data in the file,
> one for each 'stamp'?

Yes, we can theoretically do it, but I'm not planning working on that now.

> How does the profile-use step figure a mismatch
> here, or does it not care whether it mixes input with 'different stamp'?

Based on function id, it does verification for cfg_checksum and lineno_checksum.

> 
> The current behavior is also somewhat odd:
> 
>> ./a.out
> libgcov profiling error:/tmp/t.gcda:overwriting an existing profile
> data with a different timestamp
> 
> but it should probably say 'warning' since it indeed simply overwrites data
> instead of merging it.

Yes, I can prepare an incremental patch for it.

> 
> I wonder if we can simply drop the stamp check alltogether and make
> the checks that match up the data against the number of counters behave
> as to warning and overwriting the data instead of failing fatally?  The
> actual merging of data would need to be delayed, but at least until
> the first actual merge was done we could simply proceed?

Do you mean doing only merging of functions that have correct checksum, and bailing
out for the functions that don't?

Thanks for the ideas.

Martin

> 
> Richard.
> 
>> Thanks,
>> Martin
>>
>>          PR gcov-profile/90364
>>
>> gcc/ChangeLog:
>>
>>          * coverage.c (build_info): Emit checksum to the global variable.
>>          (build_info_type): Add new field for checksum.
>>          (coverage_obj_finish): Pass object_checksum.
>>          (coverage_init): Use 0 as checksum for .gcno files.
>>          * gcov-dump.c (dump_gcov_file): Dump also new checksum field.
>>          * gcov.c (read_graph_file): Read also checksum.
>>
>> libgcc/ChangeLog:
>>
>>          * libgcov-driver.c (merge_one_data): Skip timestamp and verify
>>          checksums.
>>          (write_one_data): Write also checksum.
>>          * libgcov-util.c (read_gcda_file): Read also checksum field.
>>          * libgcov.h (struct gcov_info): Add new field.
>> ---
>>    gcc/coverage.c          | 50 ++++++++++++++++++++++++++++-------------
>>    gcc/gcov-dump.c         |  9 ++++----
>>    gcc/gcov.c              |  5 +++++
>>    libgcc/libgcov-driver.c |  8 +++++--
>>    libgcc/libgcov-util.c   |  3 +++
>>    libgcc/libgcov.h        |  1 +
>>    6 files changed, 54 insertions(+), 22 deletions(-)
>>
>> diff --git a/gcc/coverage.c b/gcc/coverage.c
>> index 10d7f8366cb..4467f1eaa5c 100644
>> --- a/gcc/coverage.c
>> +++ b/gcc/coverage.c
>> @@ -129,16 +129,7 @@ static const char *const ctr_names[GCOV_COUNTERS] = {
>>    #undef DEF_GCOV_COUNTER
>>
>>    /* Forward declarations.  */
>> -static void read_counts_file (void);
>>    static tree build_var (tree, tree, int);
>> -static void build_fn_info_type (tree, unsigned, tree);
>> -static void build_info_type (tree, tree);
>> -static tree build_fn_info (const struct coverage_data *, tree, tree);
>> -static tree build_info (tree, tree);
>> -static bool coverage_obj_init (void);
>> -static vec<constructor_elt, va_gc> *coverage_obj_fn
>> -(vec<constructor_elt, va_gc> *, tree, struct coverage_data const *);
>> -static void coverage_obj_finish (vec<constructor_elt, va_gc> *);
>>
>>    /* Return the type node for gcov_type.  */
>>
>> @@ -218,6 +209,9 @@ read_counts_file (void)
>>      tag = gcov_read_unsigned ();
>>      bbg_file_stamp = crc32_unsigned (bbg_file_stamp, tag);
>>
>> +  /* Read checksum.  */
>> +  gcov_read_unsigned ();
>> +
>>      counts_hash = new hash_table<counts_entry> (10);
>>      while ((tag = gcov_read_unsigned ()))
>>        {
>> @@ -935,6 +929,12 @@ build_info_type (tree type, tree fn_info_ptr_type)
>>      DECL_CHAIN (field) = fields;
>>      fields = field;
>>
>> +  /* Checksum.  */
>> +  field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
>> +                     get_gcov_unsigned_t ());
>> +  DECL_CHAIN (field) = fields;
>> +  fields = field;
>> +
>>      /* Filename */
>>      field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
>>                        build_pointer_type (build_qualified_type
>> @@ -977,7 +977,7 @@ build_info_type (tree type, tree fn_info_ptr_type)
>>       function info objects.  */
>>
>>    static tree
>> -build_info (tree info_type, tree fn_ary)
>> +build_info (tree info_type, tree fn_ary, unsigned object_checksum)
>>    {
>>      tree info_fields = TYPE_FIELDS (info_type);
>>      tree merge_fn_type, n_funcs;
>> @@ -996,13 +996,19 @@ build_info (tree info_type, tree fn_ary)
>>      /* next -- NULL */
>>      CONSTRUCTOR_APPEND_ELT (v1, info_fields, null_pointer_node);
>>      info_fields = DECL_CHAIN (info_fields);
>> -
>> +
>>      /* stamp */
>>      CONSTRUCTOR_APPEND_ELT (v1, info_fields,
>>                            build_int_cstu (TREE_TYPE (info_fields),
>>                                            bbg_file_stamp));
>>      info_fields = DECL_CHAIN (info_fields);
>>
>> +  /* Checksum.  */
>> +  CONSTRUCTOR_APPEND_ELT (v1, info_fields,
>> +                         build_int_cstu (TREE_TYPE (info_fields),
>> +                                         object_checksum));
>> +  info_fields = DECL_CHAIN (info_fields);
>> +
>>      /* Filename */
>>      da_file_name_len = strlen (da_file_name);
>>      filename_string = build_string (da_file_name_len + 1, da_file_name);
>> @@ -1214,7 +1220,8 @@ coverage_obj_fn (vec<constructor_elt, va_gc> *ctor, tree fn,
>>       function objects from CTOR.  Generate the gcov_info initializer.  */
>>
>>    static void
>> -coverage_obj_finish (vec<constructor_elt, va_gc> *ctor)
>> +coverage_obj_finish (vec<constructor_elt, va_gc> *ctor,
>> +                    unsigned object_checksum)
>>    {
>>      unsigned n_functions = vec_safe_length (ctor);
>>      tree fn_info_ary_type = build_array_type
>> @@ -1231,7 +1238,7 @@ coverage_obj_finish (vec<constructor_elt, va_gc> *ctor)
>>      varpool_node::finalize_decl (fn_info_ary);
>>
>>      DECL_INITIAL (gcov_info_var)
>> -    = build_info (TREE_TYPE (gcov_info_var), fn_info_ary);
>> +    = build_info (TREE_TYPE (gcov_info_var), fn_info_ary, object_checksum);
>>      varpool_node::finalize_decl (gcov_info_var);
>>    }
>>
>> @@ -1300,7 +1307,6 @@ coverage_init (const char *filename)
>>      strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX);
>>
>>      bbg_file_stamp = local_tick;
>> -
>>      if (flag_auto_profile)
>>        read_autofdo_file ();
>>      else if (flag_branch_probabilities)
>> @@ -1328,6 +1334,8 @@ coverage_init (const char *filename)
>>            gcov_write_unsigned (GCOV_NOTE_MAGIC);
>>            gcov_write_unsigned (GCOV_VERSION);
>>            gcov_write_unsigned (bbg_file_stamp);
>> +         /* Use an arbitrary checksum */
>> +         gcov_write_unsigned (0);
>>            gcov_write_string (getpwd ());
>>
>>            /* Do not support has_unexecuted_blocks for Ada.  */
>> @@ -1353,14 +1361,24 @@ coverage_finish (void)
>>           cannot uniquely stamp it.  If we can stamp it, libgcov will DTRT.  */
>>        unlink (da_file_name);
>>
>> +  /* Global GCDA checksum that aggregates all functions.  */
>> +  unsigned object_checksum = 0;
>> +
>>      if (coverage_obj_init ())
>>        {
>>          vec<constructor_elt, va_gc> *fn_ctor = NULL;
>>          struct coverage_data *fn;
>>
>>          for (fn = functions_head; fn; fn = fn->next)
>> -       fn_ctor = coverage_obj_fn (fn_ctor, fn->fn_decl, fn);
>> -      coverage_obj_finish (fn_ctor);
>> +       {
>> +         fn_ctor = coverage_obj_fn (fn_ctor, fn->fn_decl, fn);
>> +
>> +         object_checksum = crc32_unsigned (object_checksum, fn->ident);
>> +         object_checksum = crc32_unsigned (object_checksum,
>> +                                           fn->lineno_checksum);
>> +         object_checksum = crc32_unsigned (object_checksum, fn->cfg_checksum);
>> +       }
>> +      coverage_obj_finish (fn_ctor, object_checksum);
>>        }
>>
>>      XDELETEVEC (da_file_name);
>> diff --git a/gcc/gcov-dump.c b/gcc/gcov-dump.c
>> index f1489fe0214..bfaf735d2ff 100644
>> --- a/gcc/gcov-dump.c
>> +++ b/gcc/gcov-dump.c
>> @@ -206,11 +206,12 @@ dump_gcov_file (const char *filename)
>>      }
>>
>>      /* stamp */
>> -  {
>> -    unsigned stamp = gcov_read_unsigned ();
>> +  unsigned stamp = gcov_read_unsigned ();
>> +  printf ("%s:stamp %lu\n", filename, (unsigned long)stamp);
>>
>> -    printf ("%s:stamp %lu\n", filename, (unsigned long)stamp);
>> -  }
>> +  /* Checksum */
>> +  unsigned checksum = gcov_read_unsigned ();
>> +  printf ("%s:checksum %lu\n", filename, (unsigned long)checksum);
>>
>>      if (!is_data_type)
>>        {
>> diff --git a/gcc/gcov.c b/gcc/gcov.c
>> index cf0a49d8c30..829e955a63b 100644
>> --- a/gcc/gcov.c
>> +++ b/gcc/gcov.c
>> @@ -1814,6 +1814,8 @@ read_graph_file (void)
>>                 bbg_file_name, v, e);
>>        }
>>      bbg_stamp = gcov_read_unsigned ();
>> +  /* Read checksum.  */
>> +  gcov_read_unsigned ();
>>      bbg_cwd = xstrdup (gcov_read_string ());
>>      bbg_supports_has_unexecuted_blocks = gcov_read_unsigned ();
>>
>> @@ -2031,6 +2033,9 @@ read_count_file (void)
>>          goto cleanup;
>>        }
>>
>> +  /* Read checksum.  */
>> +  gcov_read_unsigned ();
>> +
>>      while ((tag = gcov_read_unsigned ()))
>>        {
>>          unsigned length = gcov_read_unsigned ();
>> diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
>> index 087f71e0107..7aa97bbb06a 100644
>> --- a/libgcc/libgcov-driver.c
>> +++ b/libgcc/libgcov-driver.c
>> @@ -260,12 +260,15 @@ merge_one_data (const char *filename,
>>      if (!gcov_version (gi_ptr, length, filename))
>>        return -1;
>>
>> +  /* Skip timestamp.  */
>> +  gcov_read_unsigned ();
>> +
>>      length = gcov_read_unsigned ();
>> -  if (length != gi_ptr->stamp)
>> +  if (length != gi_ptr->checksum)
>>        {
>>          /* Read from a different compilation.  Overwrite the file.  */
>>          gcov_error (GCOV_PROF_PREFIX "overwriting an existing profile data "
>> -                 "with a different timestamp\n", filename);
>> +                 "with a different checksum\n", filename);
>>          return 0;
>>        }
>>
>> @@ -495,6 +498,7 @@ write_one_data (const struct gcov_info *gi_ptr,
>>      dump_unsigned (GCOV_DATA_MAGIC, dump_fn, arg);
>>      dump_unsigned (GCOV_VERSION, dump_fn, arg);
>>      dump_unsigned (gi_ptr->stamp, dump_fn, arg);
>> +  dump_unsigned (gi_ptr->checksum, dump_fn, arg);
>>
>>    #ifdef NEED_L_GCOV
>>      /* Generate whole program statistics.  */
>> diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
>> index bc7416d99bf..766ca3559c4 100644
>> --- a/libgcc/libgcov-util.c
>> +++ b/libgcc/libgcov-util.c
>> @@ -310,6 +310,9 @@ read_gcda_file (const char *filename)
>>      /* Read stamp.  */
>>      obj_info->stamp = gcov_read_unsigned ();
>>
>> +  /* Read checksum.  */
>> +  obj_info->checksum = gcov_read_unsigned ();
>> +
>>      while (1)
>>        {
>>          gcov_position_t base;
>> diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
>> index f6354a7a070..2a365c95759 100644
>> --- a/libgcc/libgcov.h
>> +++ b/libgcc/libgcov.h
>> @@ -230,6 +230,7 @@ struct gcov_info
>>      struct gcov_info *next;     /* link to next, used by libgcov */
>>
>>      gcov_unsigned_t stamp;      /* uniquifying time stamp */
>> +  gcov_unsigned_t checksum;    /* unique object checksum */
>>      const char *filename;               /* output file name */
>>
>>      gcov_merge_fn merge[GCOV_COUNTERS];  /* merge functions (null for
>> --
>> 2.33.0
>>
  
Richard Biener Oct. 4, 2021, 11:16 a.m. UTC | #3
On Fri, Oct 1, 2021 at 12:53 PM Martin Liška <mliska@suse.cz> wrote:
>
> On 10/1/21 12:17, Richard Biener wrote:
> > On Fri, Oct 1, 2021 at 11:55 AM Martin Liška <mliska@suse.cz> wrote:
> >>
> >> Support merging of profiles that are built from a different .o files
> >> but belong to the same source file. Moreover, a checksum is verified
> >> during profile merging and so we can safely combine such profile.
> >>
> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >> I'm going to install the patch if there are no comments about it.
> >
> > Is the ->stamp field now unused?
>
> Yes, it's still used for pairing of .gcda and .gcno files when --coverage is used.
>
> >
> > I wonder whether crc32 is good enough or whether we need to enable
>
> Dunno. We can alternatively use a stronger hashing function, maybe inchash?
>
> > -fprofile-correction by default for example?
>
> Probably not.
>
> > But -fprofile-correction
> > is about -fprofile-use, not profile merging, right?  What does the latter
> > do upon mismatch?
>
> It prints the 'libgcov profiling error'.
>
> >
> > Alternatively would it be possible to keep multiple sets of data in the file,
> > one for each 'stamp'?
>
> Yes, we can theoretically do it, but I'm not planning working on that now.
>
> > How does the profile-use step figure a mismatch
> > here, or does it not care whether it mixes input with 'different stamp'?
>
> Based on function id, it does verification for cfg_checksum and lineno_checksum.
>
> >
> > The current behavior is also somewhat odd:
> >
> >> ./a.out
> > libgcov profiling error:/tmp/t.gcda:overwriting an existing profile
> > data with a different timestamp
> >
> > but it should probably say 'warning' since it indeed simply overwrites data
> > instead of merging it.
>
> Yes, I can prepare an incremental patch for it.
>
> >
> > I wonder if we can simply drop the stamp check alltogether and make
> > the checks that match up the data against the number of counters behave
> > as to warning and overwriting the data instead of failing fatally?  The
> > actual merging of data would need to be delayed, but at least until
> > the first actual merge was done we could simply proceed?
>
> Do you mean doing only merging of functions that have correct checksum, and bailing
> out for the functions that don't?

I meant in merge_one_data do not check ->stamp or ->checksum but instead rely
on the counter merging code to detect mismatches (there's read_mismatch and
read_error).  There's multiple things we can do when we run into those:

 - when we did not actually merged any counter yet we could issue the
   warning as before and drop the old data on the floor
 - when we _did_ merge some counters already we could hard-error
   (I suppose we can't roll-back merging that took place already)
 - we could do the merging two-stage, first see whether the data matches
   and only if it did perform the merging

Note that all of the changes (including yours) have user-visible effects and
the behavior is somewhat unobvious.  Not merging when the object was
re-built is indeed the most obvious behavior so I'm not sure it's a good
idea.  A new env variable to say whether to simply keep the _old_ data
when merging in new data isn't possible would be another "fix" I guess?

Richard.

>
> Thanks for the ideas.
>
> Martin
>
> >
> > Richard.
> >
> >> Thanks,
> >> Martin
> >>
> >>          PR gcov-profile/90364
> >>
> >> gcc/ChangeLog:
> >>
> >>          * coverage.c (build_info): Emit checksum to the global variable.
> >>          (build_info_type): Add new field for checksum.
> >>          (coverage_obj_finish): Pass object_checksum.
> >>          (coverage_init): Use 0 as checksum for .gcno files.
> >>          * gcov-dump.c (dump_gcov_file): Dump also new checksum field.
> >>          * gcov.c (read_graph_file): Read also checksum.
> >>
> >> libgcc/ChangeLog:
> >>
> >>          * libgcov-driver.c (merge_one_data): Skip timestamp and verify
> >>          checksums.
> >>          (write_one_data): Write also checksum.
> >>          * libgcov-util.c (read_gcda_file): Read also checksum field.
> >>          * libgcov.h (struct gcov_info): Add new field.
> >> ---
> >>    gcc/coverage.c          | 50 ++++++++++++++++++++++++++++-------------
> >>    gcc/gcov-dump.c         |  9 ++++----
> >>    gcc/gcov.c              |  5 +++++
> >>    libgcc/libgcov-driver.c |  8 +++++--
> >>    libgcc/libgcov-util.c   |  3 +++
> >>    libgcc/libgcov.h        |  1 +
> >>    6 files changed, 54 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/gcc/coverage.c b/gcc/coverage.c
> >> index 10d7f8366cb..4467f1eaa5c 100644
> >> --- a/gcc/coverage.c
> >> +++ b/gcc/coverage.c
> >> @@ -129,16 +129,7 @@ static const char *const ctr_names[GCOV_COUNTERS] = {
> >>    #undef DEF_GCOV_COUNTER
> >>
> >>    /* Forward declarations.  */
> >> -static void read_counts_file (void);
> >>    static tree build_var (tree, tree, int);
> >> -static void build_fn_info_type (tree, unsigned, tree);
> >> -static void build_info_type (tree, tree);
> >> -static tree build_fn_info (const struct coverage_data *, tree, tree);
> >> -static tree build_info (tree, tree);
> >> -static bool coverage_obj_init (void);
> >> -static vec<constructor_elt, va_gc> *coverage_obj_fn
> >> -(vec<constructor_elt, va_gc> *, tree, struct coverage_data const *);
> >> -static void coverage_obj_finish (vec<constructor_elt, va_gc> *);
> >>
> >>    /* Return the type node for gcov_type.  */
> >>
> >> @@ -218,6 +209,9 @@ read_counts_file (void)
> >>      tag = gcov_read_unsigned ();
> >>      bbg_file_stamp = crc32_unsigned (bbg_file_stamp, tag);
> >>
> >> +  /* Read checksum.  */
> >> +  gcov_read_unsigned ();
> >> +
> >>      counts_hash = new hash_table<counts_entry> (10);
> >>      while ((tag = gcov_read_unsigned ()))
> >>        {
> >> @@ -935,6 +929,12 @@ build_info_type (tree type, tree fn_info_ptr_type)
> >>      DECL_CHAIN (field) = fields;
> >>      fields = field;
> >>
> >> +  /* Checksum.  */
> >> +  field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
> >> +                     get_gcov_unsigned_t ());
> >> +  DECL_CHAIN (field) = fields;
> >> +  fields = field;
> >> +
> >>      /* Filename */
> >>      field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
> >>                        build_pointer_type (build_qualified_type
> >> @@ -977,7 +977,7 @@ build_info_type (tree type, tree fn_info_ptr_type)
> >>       function info objects.  */
> >>
> >>    static tree
> >> -build_info (tree info_type, tree fn_ary)
> >> +build_info (tree info_type, tree fn_ary, unsigned object_checksum)
> >>    {
> >>      tree info_fields = TYPE_FIELDS (info_type);
> >>      tree merge_fn_type, n_funcs;
> >> @@ -996,13 +996,19 @@ build_info (tree info_type, tree fn_ary)
> >>      /* next -- NULL */
> >>      CONSTRUCTOR_APPEND_ELT (v1, info_fields, null_pointer_node);
> >>      info_fields = DECL_CHAIN (info_fields);
> >> -
> >> +
> >>      /* stamp */
> >>      CONSTRUCTOR_APPEND_ELT (v1, info_fields,
> >>                            build_int_cstu (TREE_TYPE (info_fields),
> >>                                            bbg_file_stamp));
> >>      info_fields = DECL_CHAIN (info_fields);
> >>
> >> +  /* Checksum.  */
> >> +  CONSTRUCTOR_APPEND_ELT (v1, info_fields,
> >> +                         build_int_cstu (TREE_TYPE (info_fields),
> >> +                                         object_checksum));
> >> +  info_fields = DECL_CHAIN (info_fields);
> >> +
> >>      /* Filename */
> >>      da_file_name_len = strlen (da_file_name);
> >>      filename_string = build_string (da_file_name_len + 1, da_file_name);
> >> @@ -1214,7 +1220,8 @@ coverage_obj_fn (vec<constructor_elt, va_gc> *ctor, tree fn,
> >>       function objects from CTOR.  Generate the gcov_info initializer.  */
> >>
> >>    static void
> >> -coverage_obj_finish (vec<constructor_elt, va_gc> *ctor)
> >> +coverage_obj_finish (vec<constructor_elt, va_gc> *ctor,
> >> +                    unsigned object_checksum)
> >>    {
> >>      unsigned n_functions = vec_safe_length (ctor);
> >>      tree fn_info_ary_type = build_array_type
> >> @@ -1231,7 +1238,7 @@ coverage_obj_finish (vec<constructor_elt, va_gc> *ctor)
> >>      varpool_node::finalize_decl (fn_info_ary);
> >>
> >>      DECL_INITIAL (gcov_info_var)
> >> -    = build_info (TREE_TYPE (gcov_info_var), fn_info_ary);
> >> +    = build_info (TREE_TYPE (gcov_info_var), fn_info_ary, object_checksum);
> >>      varpool_node::finalize_decl (gcov_info_var);
> >>    }
> >>
> >> @@ -1300,7 +1307,6 @@ coverage_init (const char *filename)
> >>      strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX);
> >>
> >>      bbg_file_stamp = local_tick;
> >> -
> >>      if (flag_auto_profile)
> >>        read_autofdo_file ();
> >>      else if (flag_branch_probabilities)
> >> @@ -1328,6 +1334,8 @@ coverage_init (const char *filename)
> >>            gcov_write_unsigned (GCOV_NOTE_MAGIC);
> >>            gcov_write_unsigned (GCOV_VERSION);
> >>            gcov_write_unsigned (bbg_file_stamp);
> >> +         /* Use an arbitrary checksum */
> >> +         gcov_write_unsigned (0);
> >>            gcov_write_string (getpwd ());
> >>
> >>            /* Do not support has_unexecuted_blocks for Ada.  */
> >> @@ -1353,14 +1361,24 @@ coverage_finish (void)
> >>           cannot uniquely stamp it.  If we can stamp it, libgcov will DTRT.  */
> >>        unlink (da_file_name);
> >>
> >> +  /* Global GCDA checksum that aggregates all functions.  */
> >> +  unsigned object_checksum = 0;
> >> +
> >>      if (coverage_obj_init ())
> >>        {
> >>          vec<constructor_elt, va_gc> *fn_ctor = NULL;
> >>          struct coverage_data *fn;
> >>
> >>          for (fn = functions_head; fn; fn = fn->next)
> >> -       fn_ctor = coverage_obj_fn (fn_ctor, fn->fn_decl, fn);
> >> -      coverage_obj_finish (fn_ctor);
> >> +       {
> >> +         fn_ctor = coverage_obj_fn (fn_ctor, fn->fn_decl, fn);
> >> +
> >> +         object_checksum = crc32_unsigned (object_checksum, fn->ident);
> >> +         object_checksum = crc32_unsigned (object_checksum,
> >> +                                           fn->lineno_checksum);
> >> +         object_checksum = crc32_unsigned (object_checksum, fn->cfg_checksum);
> >> +       }
> >> +      coverage_obj_finish (fn_ctor, object_checksum);
> >>        }
> >>
> >>      XDELETEVEC (da_file_name);
> >> diff --git a/gcc/gcov-dump.c b/gcc/gcov-dump.c
> >> index f1489fe0214..bfaf735d2ff 100644
> >> --- a/gcc/gcov-dump.c
> >> +++ b/gcc/gcov-dump.c
> >> @@ -206,11 +206,12 @@ dump_gcov_file (const char *filename)
> >>      }
> >>
> >>      /* stamp */
> >> -  {
> >> -    unsigned stamp = gcov_read_unsigned ();
> >> +  unsigned stamp = gcov_read_unsigned ();
> >> +  printf ("%s:stamp %lu\n", filename, (unsigned long)stamp);
> >>
> >> -    printf ("%s:stamp %lu\n", filename, (unsigned long)stamp);
> >> -  }
> >> +  /* Checksum */
> >> +  unsigned checksum = gcov_read_unsigned ();
> >> +  printf ("%s:checksum %lu\n", filename, (unsigned long)checksum);
> >>
> >>      if (!is_data_type)
> >>        {
> >> diff --git a/gcc/gcov.c b/gcc/gcov.c
> >> index cf0a49d8c30..829e955a63b 100644
> >> --- a/gcc/gcov.c
> >> +++ b/gcc/gcov.c
> >> @@ -1814,6 +1814,8 @@ read_graph_file (void)
> >>                 bbg_file_name, v, e);
> >>        }
> >>      bbg_stamp = gcov_read_unsigned ();
> >> +  /* Read checksum.  */
> >> +  gcov_read_unsigned ();
> >>      bbg_cwd = xstrdup (gcov_read_string ());
> >>      bbg_supports_has_unexecuted_blocks = gcov_read_unsigned ();
> >>
> >> @@ -2031,6 +2033,9 @@ read_count_file (void)
> >>          goto cleanup;
> >>        }
> >>
> >> +  /* Read checksum.  */
> >> +  gcov_read_unsigned ();
> >> +
> >>      while ((tag = gcov_read_unsigned ()))
> >>        {
> >>          unsigned length = gcov_read_unsigned ();
> >> diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
> >> index 087f71e0107..7aa97bbb06a 100644
> >> --- a/libgcc/libgcov-driver.c
> >> +++ b/libgcc/libgcov-driver.c
> >> @@ -260,12 +260,15 @@ merge_one_data (const char *filename,
> >>      if (!gcov_version (gi_ptr, length, filename))
> >>        return -1;
> >>
> >> +  /* Skip timestamp.  */
> >> +  gcov_read_unsigned ();
> >> +
> >>      length = gcov_read_unsigned ();
> >> -  if (length != gi_ptr->stamp)
> >> +  if (length != gi_ptr->checksum)
> >>        {
> >>          /* Read from a different compilation.  Overwrite the file.  */
> >>          gcov_error (GCOV_PROF_PREFIX "overwriting an existing profile data "
> >> -                 "with a different timestamp\n", filename);
> >> +                 "with a different checksum\n", filename);
> >>          return 0;
> >>        }
> >>
> >> @@ -495,6 +498,7 @@ write_one_data (const struct gcov_info *gi_ptr,
> >>      dump_unsigned (GCOV_DATA_MAGIC, dump_fn, arg);
> >>      dump_unsigned (GCOV_VERSION, dump_fn, arg);
> >>      dump_unsigned (gi_ptr->stamp, dump_fn, arg);
> >> +  dump_unsigned (gi_ptr->checksum, dump_fn, arg);
> >>
> >>    #ifdef NEED_L_GCOV
> >>      /* Generate whole program statistics.  */
> >> diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
> >> index bc7416d99bf..766ca3559c4 100644
> >> --- a/libgcc/libgcov-util.c
> >> +++ b/libgcc/libgcov-util.c
> >> @@ -310,6 +310,9 @@ read_gcda_file (const char *filename)
> >>      /* Read stamp.  */
> >>      obj_info->stamp = gcov_read_unsigned ();
> >>
> >> +  /* Read checksum.  */
> >> +  obj_info->checksum = gcov_read_unsigned ();
> >> +
> >>      while (1)
> >>        {
> >>          gcov_position_t base;
> >> diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
> >> index f6354a7a070..2a365c95759 100644
> >> --- a/libgcc/libgcov.h
> >> +++ b/libgcc/libgcov.h
> >> @@ -230,6 +230,7 @@ struct gcov_info
> >>      struct gcov_info *next;     /* link to next, used by libgcov */
> >>
> >>      gcov_unsigned_t stamp;      /* uniquifying time stamp */
> >> +  gcov_unsigned_t checksum;    /* unique object checksum */
> >>      const char *filename;               /* output file name */
> >>
> >>      gcov_merge_fn merge[GCOV_COUNTERS];  /* merge functions (null for
> >> --
> >> 2.33.0
> >>
>
  
Martin Liška Oct. 4, 2021, 11:32 a.m. UTC | #4
On 10/4/21 13:16, Richard Biener wrote:
> I meant in merge_one_data do not check ->stamp or ->checksum but instead rely
> on the counter merging code to detect mismatches (there's read_mismatch and
> read_error).  There's multiple things we can do when we run into those:
> 
>   - when we did not actually merged any counter yet we could issue the
>     warning as before and drop the old data on the floor
>   - when we_did_  merge some counters already we could hard-error
>     (I suppose we can't roll-back merging that took place already)
>   - we could do the merging two-stage, first see whether the data matches
>     and only if it did perform the merging

I've got your point, you are basically suggesting a fine grained merging
(function based). Huh, I don't like it much as it's typically a mistake
in the build setup that 2 objects (with a different checksum) want to emit
profile to the same .gcda file.

My patch handles the obvious situation where an object file is built exactly
the same way (so no e.g. -O0 and -O2).

> 
> Note that all of the changes (including yours) have user-visible effects and
> the behavior is somewhat unobvious.  Not merging when the object was
> re-built is indeed the most obvious behavior so I'm not sure it's a good
> idea.  A new env variable to say whether to simply keep the_old_  data
> when merging in new data isn't possible would be another "fix" I guess?

Even for a situation when checksum matches, but the timestamp is different?
Sure, we can provide env. variables that can tweak the behavior.

Cheers,
Martin
  
Richard Biener Oct. 5, 2021, 10:04 a.m. UTC | #5
On Mon, Oct 4, 2021 at 1:32 PM Martin Liška <mliska@suse.cz> wrote:
>
> On 10/4/21 13:16, Richard Biener wrote:
> > I meant in merge_one_data do not check ->stamp or ->checksum but instead rely
> > on the counter merging code to detect mismatches (there's read_mismatch and
> > read_error).  There's multiple things we can do when we run into those:
> >
> >   - when we did not actually merged any counter yet we could issue the
> >     warning as before and drop the old data on the floor
> >   - when we_did_  merge some counters already we could hard-error
> >     (I suppose we can't roll-back merging that took place already)
> >   - we could do the merging two-stage, first see whether the data matches
> >     and only if it did perform the merging
>
> I've got your point, you are basically suggesting a fine grained merging
> (function based). Huh, I don't like it much as it's typically a mistake
> in the build setup that 2 objects (with a different checksum) want to emit
> profile to the same .gcda file.

I agree, it's usually a mistake.

> My patch handles the obvious situation where an object file is built exactly
> the same way (so no e.g. -O0 and -O2).

Yeah, but then the two profiles may not be related at all ...

> >
> > Note that all of the changes (including yours) have user-visible effects and
> > the behavior is somewhat unobvious.  Not merging when the object was
> > re-built is indeed the most obvious behavior so I'm not sure it's a good
> > idea.  A new env variable to say whether to simply keep the_old_  data
> > when merging in new data isn't possible would be another "fix" I guess?
>
> Even for a situation when checksum matches, but the timestamp is different?
> Sure, we can provide env. variables that can tweak the behavior.

I suppose another distinguishing factor might be the name of the executable.

But yeah, in the end it's a fishy area ...

So I guess your originally posted patch might be the best way to go - can you
try to amend the documentation as for the behavior with respect to
re-compiling and profile merging?  I suppose that if you re-compile just
a single .o you currently merge into all the other .o file counters but _not_
into the newly compiled old counters.  That would make coverage off
as well for incremental re-compiling?

I only can find

@item
Run the program on a representative workload to generate the arc profile
information.  This may be repeated any number of times.  You can run
concurrent instances of your program, and provided that the file system
supports locking, the data files will be correctly updated.  Unless
a strict ISO C dialect option is in effect, @code{fork} calls are
detected and correctly handled without double counting.

but that's under -coverage, not sure if there's a better place to amend.

Note I see there's -fprofile-dir which eventually can be used to "fix"
the SPEC issue as well?

Richard.

> Cheers,
> Martin
>
  
Martin Liška Oct. 11, 2021, 1:49 p.m. UTC | #6
On 10/5/21 12:04, Richard Biener wrote:
> On Mon, Oct 4, 2021 at 1:32 PM Martin Liška <mliska@suse.cz> wrote:
>>
>> On 10/4/21 13:16, Richard Biener wrote:
>>> I meant in merge_one_data do not check ->stamp or ->checksum but instead rely
>>> on the counter merging code to detect mismatches (there's read_mismatch and
>>> read_error).  There's multiple things we can do when we run into those:
>>>
>>>    - when we did not actually merged any counter yet we could issue the
>>>      warning as before and drop the old data on the floor
>>>    - when we_did_  merge some counters already we could hard-error
>>>      (I suppose we can't roll-back merging that took place already)
>>>    - we could do the merging two-stage, first see whether the data matches
>>>      and only if it did perform the merging
>>
>> I've got your point, you are basically suggesting a fine grained merging
>> (function based). Huh, I don't like it much as it's typically a mistake
>> in the build setup that 2 objects (with a different checksum) want to emit
>> profile to the same .gcda file.
> 
> I agree, it's usually a mistake.
> 
>> My patch handles the obvious situation where an object file is built exactly
>> the same way (so no e.g. -O0 and -O2).
> 
> Yeah, but then the two profiles may not be related at all ...

Well, it's quite common case that one object file is then linked into multiple
binaries (e.g. util.o in a project). We collect also sum_max:
Sum of individual run max values.
which helps handling such a situation.

> 
>>>
>>> Note that all of the changes (including yours) have user-visible effects and
>>> the behavior is somewhat unobvious.  Not merging when the object was
>>> re-built is indeed the most obvious behavior so I'm not sure it's a good
>>> idea.  A new env variable to say whether to simply keep the_old_  data
>>> when merging in new data isn't possible would be another "fix" I guess?
>>
>> Even for a situation when checksum matches, but the timestamp is different?
>> Sure, we can provide env. variables that can tweak the behavior.
> 
> I suppose another distinguishing factor might be the name of the executable.

Well, at compile time, we don't know name of a final executable.

> 
> But yeah, in the end it's a fishy area ...
> 
> So I guess your originally posted patch might be the best way to go - can you
> try to amend the documentation as for the behavior with respect to
> re-compiling and profile merging?  I suppose that if you re-compile just
> a single .o you currently merge into all the other .o file counters but _not_
> into the newly compiled old counters.

Yes, I can update the documentation.

> That would make coverage off
> as well for incremental re-compiling?

Yes.

> 
> I only can find
> 
> @item
> Run the program on a representative workload to generate the arc profile
> information.  This may be repeated any number of times.  You can run
> concurrent instances of your program, and provided that the file system
> supports locking, the data files will be correctly updated.  Unless
> a strict ISO C dialect option is in effect, @code{fork} calls are
> detected and correctly handled without double counting.
> 
> but that's under -coverage, not sure if there's a better place to amend.
> 
> Note I see there's -fprofile-dir which eventually can be used to "fix"
> the SPEC issue as well?

We would have to provide a different option value of -fprofile-dir for both
binaries. That's something we can't easily do in a SPEC config file.

Let me update the documentation bits.

Martin

> 
> Richard.
> 
>> Cheers,
>> Martin
>>
  
Martin Liška Oct. 11, 2021, 2:05 p.m. UTC | #7
On 10/11/21 15:49, Martin Liška wrote:
> Let me update the documentation bits.

There's the updated patch.

May I install the patch now?

Thanks,
Martin
  
Martin Liška Oct. 13, 2021, 1:27 p.m. UTC | #8
On 10/11/21 16:05, Martin Liška wrote:
> May I install the patch now?

Pushed to master, I guess we can tweak documentation in the future
if needed.

Martin
  

Patch

diff --git a/gcc/coverage.c b/gcc/coverage.c
index 10d7f8366cb..4467f1eaa5c 100644
--- a/gcc/coverage.c
+++ b/gcc/coverage.c
@@ -129,16 +129,7 @@  static const char *const ctr_names[GCOV_COUNTERS] = {
  #undef DEF_GCOV_COUNTER
  
  /* Forward declarations.  */
-static void read_counts_file (void);
  static tree build_var (tree, tree, int);
-static void build_fn_info_type (tree, unsigned, tree);
-static void build_info_type (tree, tree);
-static tree build_fn_info (const struct coverage_data *, tree, tree);
-static tree build_info (tree, tree);
-static bool coverage_obj_init (void);
-static vec<constructor_elt, va_gc> *coverage_obj_fn
-(vec<constructor_elt, va_gc> *, tree, struct coverage_data const *);
-static void coverage_obj_finish (vec<constructor_elt, va_gc> *);
  
  /* Return the type node for gcov_type.  */
  
@@ -218,6 +209,9 @@  read_counts_file (void)
    tag = gcov_read_unsigned ();
    bbg_file_stamp = crc32_unsigned (bbg_file_stamp, tag);
  
+  /* Read checksum.  */
+  gcov_read_unsigned ();
+
    counts_hash = new hash_table<counts_entry> (10);
    while ((tag = gcov_read_unsigned ()))
      {
@@ -935,6 +929,12 @@  build_info_type (tree type, tree fn_info_ptr_type)
    DECL_CHAIN (field) = fields;
    fields = field;
  
+  /* Checksum.  */
+  field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
+		      get_gcov_unsigned_t ());
+  DECL_CHAIN (field) = fields;
+  fields = field;
+
    /* Filename */
    field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
  		      build_pointer_type (build_qualified_type
@@ -977,7 +977,7 @@  build_info_type (tree type, tree fn_info_ptr_type)
     function info objects.  */
  
  static tree
-build_info (tree info_type, tree fn_ary)
+build_info (tree info_type, tree fn_ary, unsigned object_checksum)
  {
    tree info_fields = TYPE_FIELDS (info_type);
    tree merge_fn_type, n_funcs;
@@ -996,13 +996,19 @@  build_info (tree info_type, tree fn_ary)
    /* next -- NULL */
    CONSTRUCTOR_APPEND_ELT (v1, info_fields, null_pointer_node);
    info_fields = DECL_CHAIN (info_fields);
-
+
    /* stamp */
    CONSTRUCTOR_APPEND_ELT (v1, info_fields,
  			  build_int_cstu (TREE_TYPE (info_fields),
  					  bbg_file_stamp));
    info_fields = DECL_CHAIN (info_fields);
  
+  /* Checksum.  */
+  CONSTRUCTOR_APPEND_ELT (v1, info_fields,
+			  build_int_cstu (TREE_TYPE (info_fields),
+					  object_checksum));
+  info_fields = DECL_CHAIN (info_fields);
+
    /* Filename */
    da_file_name_len = strlen (da_file_name);
    filename_string = build_string (da_file_name_len + 1, da_file_name);
@@ -1214,7 +1220,8 @@  coverage_obj_fn (vec<constructor_elt, va_gc> *ctor, tree fn,
     function objects from CTOR.  Generate the gcov_info initializer.  */
  
  static void
-coverage_obj_finish (vec<constructor_elt, va_gc> *ctor)
+coverage_obj_finish (vec<constructor_elt, va_gc> *ctor,
+		     unsigned object_checksum)
  {
    unsigned n_functions = vec_safe_length (ctor);
    tree fn_info_ary_type = build_array_type
@@ -1231,7 +1238,7 @@  coverage_obj_finish (vec<constructor_elt, va_gc> *ctor)
    varpool_node::finalize_decl (fn_info_ary);
    
    DECL_INITIAL (gcov_info_var)
-    = build_info (TREE_TYPE (gcov_info_var), fn_info_ary);
+    = build_info (TREE_TYPE (gcov_info_var), fn_info_ary, object_checksum);
    varpool_node::finalize_decl (gcov_info_var);
  }
  
@@ -1300,7 +1307,6 @@  coverage_init (const char *filename)
    strcpy (da_file_name + prefix_len + len, GCOV_DATA_SUFFIX);
  
    bbg_file_stamp = local_tick;
-
    if (flag_auto_profile)
      read_autofdo_file ();
    else if (flag_branch_probabilities)
@@ -1328,6 +1334,8 @@  coverage_init (const char *filename)
  	  gcov_write_unsigned (GCOV_NOTE_MAGIC);
  	  gcov_write_unsigned (GCOV_VERSION);
  	  gcov_write_unsigned (bbg_file_stamp);
+	  /* Use an arbitrary checksum */
+	  gcov_write_unsigned (0);
  	  gcov_write_string (getpwd ());
  
  	  /* Do not support has_unexecuted_blocks for Ada.  */
@@ -1353,14 +1361,24 @@  coverage_finish (void)
         cannot uniquely stamp it.  If we can stamp it, libgcov will DTRT.  */
      unlink (da_file_name);
  
+  /* Global GCDA checksum that aggregates all functions.  */
+  unsigned object_checksum = 0;
+
    if (coverage_obj_init ())
      {
        vec<constructor_elt, va_gc> *fn_ctor = NULL;
        struct coverage_data *fn;
        
        for (fn = functions_head; fn; fn = fn->next)
-	fn_ctor = coverage_obj_fn (fn_ctor, fn->fn_decl, fn);
-      coverage_obj_finish (fn_ctor);
+	{
+	  fn_ctor = coverage_obj_fn (fn_ctor, fn->fn_decl, fn);
+
+	  object_checksum = crc32_unsigned (object_checksum, fn->ident);
+	  object_checksum = crc32_unsigned (object_checksum,
+					    fn->lineno_checksum);
+	  object_checksum = crc32_unsigned (object_checksum, fn->cfg_checksum);
+	}
+      coverage_obj_finish (fn_ctor, object_checksum);
      }
  
    XDELETEVEC (da_file_name);
diff --git a/gcc/gcov-dump.c b/gcc/gcov-dump.c
index f1489fe0214..bfaf735d2ff 100644
--- a/gcc/gcov-dump.c
+++ b/gcc/gcov-dump.c
@@ -206,11 +206,12 @@  dump_gcov_file (const char *filename)
    }
  
    /* stamp */
-  {
-    unsigned stamp = gcov_read_unsigned ();
+  unsigned stamp = gcov_read_unsigned ();
+  printf ("%s:stamp %lu\n", filename, (unsigned long)stamp);
  
-    printf ("%s:stamp %lu\n", filename, (unsigned long)stamp);
-  }
+  /* Checksum */
+  unsigned checksum = gcov_read_unsigned ();
+  printf ("%s:checksum %lu\n", filename, (unsigned long)checksum);
  
    if (!is_data_type)
      {
diff --git a/gcc/gcov.c b/gcc/gcov.c
index cf0a49d8c30..829e955a63b 100644
--- a/gcc/gcov.c
+++ b/gcc/gcov.c
@@ -1814,6 +1814,8 @@  read_graph_file (void)
  	       bbg_file_name, v, e);
      }
    bbg_stamp = gcov_read_unsigned ();
+  /* Read checksum.  */
+  gcov_read_unsigned ();
    bbg_cwd = xstrdup (gcov_read_string ());
    bbg_supports_has_unexecuted_blocks = gcov_read_unsigned ();
  
@@ -2031,6 +2033,9 @@  read_count_file (void)
        goto cleanup;
      }
  
+  /* Read checksum.  */
+  gcov_read_unsigned ();
+
    while ((tag = gcov_read_unsigned ()))
      {
        unsigned length = gcov_read_unsigned ();
diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
index 087f71e0107..7aa97bbb06a 100644
--- a/libgcc/libgcov-driver.c
+++ b/libgcc/libgcov-driver.c
@@ -260,12 +260,15 @@  merge_one_data (const char *filename,
    if (!gcov_version (gi_ptr, length, filename))
      return -1;
  
+  /* Skip timestamp.  */
+  gcov_read_unsigned ();
+
    length = gcov_read_unsigned ();
-  if (length != gi_ptr->stamp)
+  if (length != gi_ptr->checksum)
      {
        /* Read from a different compilation.  Overwrite the file.  */
        gcov_error (GCOV_PROF_PREFIX "overwriting an existing profile data "
-		  "with a different timestamp\n", filename);
+		  "with a different checksum\n", filename);
        return 0;
      }
  
@@ -495,6 +498,7 @@  write_one_data (const struct gcov_info *gi_ptr,
    dump_unsigned (GCOV_DATA_MAGIC, dump_fn, arg);
    dump_unsigned (GCOV_VERSION, dump_fn, arg);
    dump_unsigned (gi_ptr->stamp, dump_fn, arg);
+  dump_unsigned (gi_ptr->checksum, dump_fn, arg);
  
  #ifdef NEED_L_GCOV
    /* Generate whole program statistics.  */
diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
index bc7416d99bf..766ca3559c4 100644
--- a/libgcc/libgcov-util.c
+++ b/libgcc/libgcov-util.c
@@ -310,6 +310,9 @@  read_gcda_file (const char *filename)
    /* Read stamp.  */
    obj_info->stamp = gcov_read_unsigned ();
  
+  /* Read checksum.  */
+  obj_info->checksum = gcov_read_unsigned ();
+
    while (1)
      {
        gcov_position_t base;
diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
index f6354a7a070..2a365c95759 100644
--- a/libgcc/libgcov.h
+++ b/libgcc/libgcov.h
@@ -230,6 +230,7 @@  struct gcov_info
    struct gcov_info *next;	/* link to next, used by libgcov */
  
    gcov_unsigned_t stamp;	/* uniquifying time stamp */
+  gcov_unsigned_t checksum;	/* unique object checksum */
    const char *filename;		/* output file name */
  
    gcov_merge_fn merge[GCOV_COUNTERS];  /* merge functions (null for