tree-profile: Don't instrument an IFUNC resolver nor its callees

Message ID 20240226225312.22326-1-hjl.tools@gmail.com
State New
Headers
Series tree-profile: Don't instrument an IFUNC resolver nor its callees |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Testing passed

Commit Message

H.J. Lu Feb. 26, 2024, 10:53 p.m. UTC
  We can't instrument an IFUNC resolver nor its callees as it may require
TLS which hasn't been set up yet when the dynamic linker is resolving
IFUNC symbols.  Add an IFUNC resolver caller marker to symtab_node to
avoid recursive checking.

gcc/ChangeLog:

	PR tree-optimization/114115
	* cgraph.h (enum ifunc_caller): New.
	(symtab_node): Add has_ifunc_caller.
	* tree-profile.cc (check_ifunc_resolver): New.
	(is_caller_ifunc_resolver): Likewise.
	(tree_profiling): Don't instrument an IFUNC resolver nor its
	callees.

gcc/testsuite/ChangeLog:

	PR tree-optimization/114115
	* gcc.dg/pr114115.c: New test.
---
 gcc/cgraph.h                    | 18 +++++++
 gcc/testsuite/gcc.dg/pr114115.c | 24 +++++++++
 gcc/tree-profile.cc             | 92 +++++++++++++++++++++++++++++++++
 3 files changed, 134 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr114115.c
  

Comments

Jan Hubicka Feb. 29, 2024, 1:39 p.m. UTC | #1
> We can't instrument an IFUNC resolver nor its callees as it may require
> TLS which hasn't been set up yet when the dynamic linker is resolving
> IFUNC symbols.  Add an IFUNC resolver caller marker to symtab_node to
> avoid recursive checking.
> 
> gcc/ChangeLog:
> 
> 	PR tree-optimization/114115
> 	* cgraph.h (enum ifunc_caller): New.
> 	(symtab_node): Add has_ifunc_caller.
Unless we have users outside of tree-profile, I think it is better to
avoid adding extra data to cgraph_node.  One can use node->get_uid() indexed hash
set to save the two bits needed for propagation.
> 	* tree-profile.cc (check_ifunc_resolver): New.
> 	(is_caller_ifunc_resolver): Likewise.
> 	(tree_profiling): Don't instrument an IFUNC resolver nor its
> 	callees.
> 
> gcc/testsuite/ChangeLog:
> 
> 	PR tree-optimization/114115
> 	* gcc.dg/pr114115.c: New test.

The problem with this approach is that tracking callees of ifunc
resolvers will stop on the translation unit boundary and also with
indirect call.  Also while ifunc resolver itself is called only once,
its callees may also be used from performance critical code.

So it is not really reliable fix (though I guess it will work a lot of
common code).  I wonder what would be alternatives.  In GCC generated
profling code we use TLS only for undirect call profiling (so there is
no need to turn off rest of profiling).  I wonder if there is any chance
to not make it seffault when it is done before TLS is set up?

Honza
> ---
>  gcc/cgraph.h                    | 18 +++++++
>  gcc/testsuite/gcc.dg/pr114115.c | 24 +++++++++
>  gcc/tree-profile.cc             | 92 +++++++++++++++++++++++++++++++++
>  3 files changed, 134 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.dg/pr114115.c
> 
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index 47f35e8078d..ce99f4a5114 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -100,6 +100,21 @@ enum symbol_partitioning_class
>     SYMBOL_DUPLICATE
>  };
>  
> +/* Classification whether a function has any IFUNC resolver caller.  */
> +enum ifunc_caller
> +{
> +  /* It is unknown if this function has any IFUNC resolver caller.  */
> +  IFUNC_CALLER_UNKNOWN,
> +  /* Work in progress to check if this function has any IFUNC resolver
> +     caller.  */
> +  IFUNC_CALLER_WIP,
> +  /* This function has at least an IFUNC resolver caller, including
> +     itself.  */
> +  IFUNC_CALLER_TRUE,
> +  /* This function doesn't have any IFUNC resolver caller.  */
> +  IFUNC_CALLER_FALSE
> +};
> +
>  /* Base of all entries in the symbol table.
>     The symtab_node is inherited by cgraph and varpol nodes.  */
>  struct GTY((desc ("%h.type"), tag ("SYMTAB_SYMBOL"),
> @@ -121,6 +136,7 @@ public:
>        used_from_other_partition (false), in_other_partition (false),
>        address_taken (false), in_init_priority_hash (false),
>        need_lto_streaming (false), offloadable (false), ifunc_resolver (false),
> +      has_ifunc_caller (IFUNC_CALLER_UNKNOWN),
>        order (false), next_sharing_asm_name (NULL),
>        previous_sharing_asm_name (NULL), same_comdat_group (NULL), ref_list (),
>        alias_target (NULL), lto_file_data (NULL), aux (NULL),
> @@ -595,6 +611,8 @@ public:
>    /* Set when symbol is an IFUNC resolver.  */
>    unsigned ifunc_resolver : 1;
>  
> +  /* Classification whether a function has any IFUNC resolver caller.  */
> +  ENUM_BITFIELD (ifunc_caller) has_ifunc_caller : 2;
>  
>    /* Ordering of all symtab entries.  */
>    int order;
> diff --git a/gcc/testsuite/gcc.dg/pr114115.c b/gcc/testsuite/gcc.dg/pr114115.c
> new file mode 100644
> index 00000000000..2629f591877
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr114115.c
> @@ -0,0 +1,24 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O0 -fprofile-generate -fdump-tree-optimized" } */
> +/* { dg-require-profiling "-fprofile-generate" } */
> +/* { dg-require-ifunc "" } */
> +
> +void *foo_ifunc2() __attribute__((ifunc("foo_resolver")));
> +
> +void bar(void)
> +{
> +}
> +
> +static int f3()
> +{
> +  bar ();
> +  return 5;
> +}
> +
> +void (*foo_resolver(void))(void)
> +{
> +  f3();
> +  return bar;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "__gcov_indirect_call_profiler_v" "optimized" } } */
> diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
> index aed13e2b1bc..46478648b32 100644
> --- a/gcc/tree-profile.cc
> +++ b/gcc/tree-profile.cc
> @@ -738,6 +738,72 @@ include_source_file_for_profile (const char *filename)
>    return false;
>  }
>  
> +/* Return true and set *DATA to true if NODE is an ifunc resolver.  */
> +
> +static bool
> +check_ifunc_resolver (cgraph_node *node, void *data)
> +{
> +  if (node->ifunc_resolver)
> +    {
> +      bool *is_ifunc_resolver = (bool *) data;
> +      *is_ifunc_resolver = true;
> +      return true;
> +    }
> +  return false;
> +}
> +
> +/* Return true if any caller of NODE is an ifunc resolver.  */
> +
> +static bool
> +is_caller_ifunc_resolver (cgraph_node *node)
> +{
> +  if (node->has_ifunc_caller == IFUNC_CALLER_WIP)
> +    gcc_unreachable ();
> +
> +  node->has_ifunc_caller = IFUNC_CALLER_WIP;
> +  bool is_ifunc_resolver = false;
> +
> +  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
> +    {
> +      /* Check for recursive call.  */
> +      if (e->caller == node)
> +	continue;
> +
> +      switch (e->caller->has_ifunc_caller)
> +	{
> +	case IFUNC_CALLER_UNKNOWN:
> +	  e->caller->call_for_symbol_and_aliases (check_ifunc_resolver,
> +						  &is_ifunc_resolver,
> +						  true);
> +	  if (is_ifunc_resolver)
> +	    {
> +	      e->caller->has_ifunc_caller = IFUNC_CALLER_TRUE;
> +	      return true;
> +	    }
> +	  break;
> +	case IFUNC_CALLER_TRUE:
> +	  return true;
> +	case IFUNC_CALLER_FALSE:
> +	  /* This caller doesn't have any IFUNC resolver call.  Check
> +	     the next caller.  */
> +	  continue;
> +
> +	case IFUNC_CALLER_WIP:
> +	  continue;
> +	}
> +
> +      if (is_caller_ifunc_resolver (e->caller))
> +	{
> +	  e->caller->has_ifunc_caller = IFUNC_CALLER_TRUE;
> +	  return true;
> +	}
> +      else
> +	e->caller->has_ifunc_caller = IFUNC_CALLER_FALSE;
> +    }
> +
> +  return false;
> +}
> +
>  #ifndef HAVE_sync_compare_and_swapsi
>  #define HAVE_sync_compare_and_swapsi 0
>  #endif
> @@ -848,6 +914,32 @@ tree_profiling (void)
>  	    }
>  	}
>  
> +      /* Do not instrument an IFUNC resolver nor its callees.  */
> +      bool is_ifunc_resolver = false;
> +      switch (node->has_ifunc_caller)
> +	{
> +	case IFUNC_CALLER_UNKNOWN:
> +	  node->call_for_symbol_and_aliases (check_ifunc_resolver,
> +					     &is_ifunc_resolver, true);
> +	  if (is_ifunc_resolver || is_caller_ifunc_resolver (node))
> +	    {
> +	      node->has_ifunc_caller = IFUNC_CALLER_TRUE;
> +	      continue;
> +	    }
> +	  else
> +	    node->has_ifunc_caller = IFUNC_CALLER_FALSE;
> +	  break;
> +
> +	case IFUNC_CALLER_TRUE:
> +	  continue;
> +
> +	case IFUNC_CALLER_FALSE:
> +	  break;
> +
> +	case IFUNC_CALLER_WIP:
> +	  gcc_unreachable ();
> +	}
> +
>        push_cfun (DECL_STRUCT_FUNCTION (node->decl));
>  
>        if (dump_file)
> -- 
> 2.43.2
>
  
H.J. Lu Feb. 29, 2024, 2:13 p.m. UTC | #2
On Thu, Feb 29, 2024 at 5:39 AM Jan Hubicka <hubicka@ucw.cz> wrote:
>
> > We can't instrument an IFUNC resolver nor its callees as it may require
> > TLS which hasn't been set up yet when the dynamic linker is resolving
> > IFUNC symbols.  Add an IFUNC resolver caller marker to symtab_node to
> > avoid recursive checking.
> >
> > gcc/ChangeLog:
> >
> >       PR tree-optimization/114115
> >       * cgraph.h (enum ifunc_caller): New.
> >       (symtab_node): Add has_ifunc_caller.
> Unless we have users outside of tree-profile, I think it is better to
> avoid adding extra data to cgraph_node.  One can use node->get_uid() indexed hash
> set to save the two bits needed for propagation.
> >       * tree-profile.cc (check_ifunc_resolver): New.
> >       (is_caller_ifunc_resolver): Likewise.
> >       (tree_profiling): Don't instrument an IFUNC resolver nor its
> >       callees.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       PR tree-optimization/114115
> >       * gcc.dg/pr114115.c: New test.
>
> The problem with this approach is that tracking callees of ifunc
> resolvers will stop on the translation unit boundary and also with
> indirect call.  Also while ifunc resolver itself is called only once,
> its callees may also be used from performance critical code.

IFUNC selector shouldn't have any external dependencies which
can cause issues at run-time.

> So it is not really reliable fix (though I guess it will work a lot of
> common code).  I wonder what would be alternatives.  In GCC generated
> profling code we use TLS only for undirect call profiling (so there is
> no need to turn off rest of profiling).  I wonder if there is any chance
> to not make it seffault when it is done before TLS is set up?

IFUNC selector should make minimum external calls, none is preferred.
Any external calls may lead to issues at run-time.  It is a very bad idea
to profile IFUNC selector via external function call.

> Honza
> > ---
> >  gcc/cgraph.h                    | 18 +++++++
> >  gcc/testsuite/gcc.dg/pr114115.c | 24 +++++++++
> >  gcc/tree-profile.cc             | 92 +++++++++++++++++++++++++++++++++
> >  3 files changed, 134 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.dg/pr114115.c
> >
> > diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> > index 47f35e8078d..ce99f4a5114 100644
> > --- a/gcc/cgraph.h
> > +++ b/gcc/cgraph.h
> > @@ -100,6 +100,21 @@ enum symbol_partitioning_class
> >     SYMBOL_DUPLICATE
> >  };
> >
> > +/* Classification whether a function has any IFUNC resolver caller.  */
> > +enum ifunc_caller
> > +{
> > +  /* It is unknown if this function has any IFUNC resolver caller.  */
> > +  IFUNC_CALLER_UNKNOWN,
> > +  /* Work in progress to check if this function has any IFUNC resolver
> > +     caller.  */
> > +  IFUNC_CALLER_WIP,
> > +  /* This function has at least an IFUNC resolver caller, including
> > +     itself.  */
> > +  IFUNC_CALLER_TRUE,
> > +  /* This function doesn't have any IFUNC resolver caller.  */
> > +  IFUNC_CALLER_FALSE
> > +};
> > +
> >  /* Base of all entries in the symbol table.
> >     The symtab_node is inherited by cgraph and varpol nodes.  */
> >  struct GTY((desc ("%h.type"), tag ("SYMTAB_SYMBOL"),
> > @@ -121,6 +136,7 @@ public:
> >        used_from_other_partition (false), in_other_partition (false),
> >        address_taken (false), in_init_priority_hash (false),
> >        need_lto_streaming (false), offloadable (false), ifunc_resolver (false),
> > +      has_ifunc_caller (IFUNC_CALLER_UNKNOWN),
> >        order (false), next_sharing_asm_name (NULL),
> >        previous_sharing_asm_name (NULL), same_comdat_group (NULL), ref_list (),
> >        alias_target (NULL), lto_file_data (NULL), aux (NULL),
> > @@ -595,6 +611,8 @@ public:
> >    /* Set when symbol is an IFUNC resolver.  */
> >    unsigned ifunc_resolver : 1;
> >
> > +  /* Classification whether a function has any IFUNC resolver caller.  */
> > +  ENUM_BITFIELD (ifunc_caller) has_ifunc_caller : 2;
> >
> >    /* Ordering of all symtab entries.  */
> >    int order;
> > diff --git a/gcc/testsuite/gcc.dg/pr114115.c b/gcc/testsuite/gcc.dg/pr114115.c
> > new file mode 100644
> > index 00000000000..2629f591877
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.dg/pr114115.c
> > @@ -0,0 +1,24 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O0 -fprofile-generate -fdump-tree-optimized" } */
> > +/* { dg-require-profiling "-fprofile-generate" } */
> > +/* { dg-require-ifunc "" } */
> > +
> > +void *foo_ifunc2() __attribute__((ifunc("foo_resolver")));
> > +
> > +void bar(void)
> > +{
> > +}
> > +
> > +static int f3()
> > +{
> > +  bar ();
> > +  return 5;
> > +}
> > +
> > +void (*foo_resolver(void))(void)
> > +{
> > +  f3();
> > +  return bar;
> > +}
> > +
> > +/* { dg-final { scan-tree-dump-not "__gcov_indirect_call_profiler_v" "optimized" } } */
> > diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
> > index aed13e2b1bc..46478648b32 100644
> > --- a/gcc/tree-profile.cc
> > +++ b/gcc/tree-profile.cc
> > @@ -738,6 +738,72 @@ include_source_file_for_profile (const char *filename)
> >    return false;
> >  }
> >
> > +/* Return true and set *DATA to true if NODE is an ifunc resolver.  */
> > +
> > +static bool
> > +check_ifunc_resolver (cgraph_node *node, void *data)
> > +{
> > +  if (node->ifunc_resolver)
> > +    {
> > +      bool *is_ifunc_resolver = (bool *) data;
> > +      *is_ifunc_resolver = true;
> > +      return true;
> > +    }
> > +  return false;
> > +}
> > +
> > +/* Return true if any caller of NODE is an ifunc resolver.  */
> > +
> > +static bool
> > +is_caller_ifunc_resolver (cgraph_node *node)
> > +{
> > +  if (node->has_ifunc_caller == IFUNC_CALLER_WIP)
> > +    gcc_unreachable ();
> > +
> > +  node->has_ifunc_caller = IFUNC_CALLER_WIP;
> > +  bool is_ifunc_resolver = false;
> > +
> > +  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
> > +    {
> > +      /* Check for recursive call.  */
> > +      if (e->caller == node)
> > +     continue;
> > +
> > +      switch (e->caller->has_ifunc_caller)
> > +     {
> > +     case IFUNC_CALLER_UNKNOWN:
> > +       e->caller->call_for_symbol_and_aliases (check_ifunc_resolver,
> > +                                               &is_ifunc_resolver,
> > +                                               true);
> > +       if (is_ifunc_resolver)
> > +         {
> > +           e->caller->has_ifunc_caller = IFUNC_CALLER_TRUE;
> > +           return true;
> > +         }
> > +       break;
> > +     case IFUNC_CALLER_TRUE:
> > +       return true;
> > +     case IFUNC_CALLER_FALSE:
> > +       /* This caller doesn't have any IFUNC resolver call.  Check
> > +          the next caller.  */
> > +       continue;
> > +
> > +     case IFUNC_CALLER_WIP:
> > +       continue;
> > +     }
> > +
> > +      if (is_caller_ifunc_resolver (e->caller))
> > +     {
> > +       e->caller->has_ifunc_caller = IFUNC_CALLER_TRUE;
> > +       return true;
> > +     }
> > +      else
> > +     e->caller->has_ifunc_caller = IFUNC_CALLER_FALSE;
> > +    }
> > +
> > +  return false;
> > +}
> > +
> >  #ifndef HAVE_sync_compare_and_swapsi
> >  #define HAVE_sync_compare_and_swapsi 0
> >  #endif
> > @@ -848,6 +914,32 @@ tree_profiling (void)
> >           }
> >       }
> >
> > +      /* Do not instrument an IFUNC resolver nor its callees.  */
> > +      bool is_ifunc_resolver = false;
> > +      switch (node->has_ifunc_caller)
> > +     {
> > +     case IFUNC_CALLER_UNKNOWN:
> > +       node->call_for_symbol_and_aliases (check_ifunc_resolver,
> > +                                          &is_ifunc_resolver, true);
> > +       if (is_ifunc_resolver || is_caller_ifunc_resolver (node))
> > +         {
> > +           node->has_ifunc_caller = IFUNC_CALLER_TRUE;
> > +           continue;
> > +         }
> > +       else
> > +         node->has_ifunc_caller = IFUNC_CALLER_FALSE;
> > +       break;
> > +
> > +     case IFUNC_CALLER_TRUE:
> > +       continue;
> > +
> > +     case IFUNC_CALLER_FALSE:
> > +       break;
> > +
> > +     case IFUNC_CALLER_WIP:
> > +       gcc_unreachable ();
> > +     }
> > +
> >        push_cfun (DECL_STRUCT_FUNCTION (node->decl));
> >
> >        if (dump_file)
> > --
> > 2.43.2
> >
  
Jan Hubicka Feb. 29, 2024, 2:34 p.m. UTC | #3
> On Thu, Feb 29, 2024 at 5:39 AM Jan Hubicka <hubicka@ucw.cz> wrote:
> >
> > > We can't instrument an IFUNC resolver nor its callees as it may require
> > > TLS which hasn't been set up yet when the dynamic linker is resolving
> > > IFUNC symbols.  Add an IFUNC resolver caller marker to symtab_node to
> > > avoid recursive checking.
> > >
> > > gcc/ChangeLog:
> > >
> > >       PR tree-optimization/114115
> > >       * cgraph.h (enum ifunc_caller): New.
> > >       (symtab_node): Add has_ifunc_caller.
> > Unless we have users outside of tree-profile, I think it is better to
> > avoid adding extra data to cgraph_node.  One can use node->get_uid() indexed hash
> > set to save the two bits needed for propagation.
> > >       * tree-profile.cc (check_ifunc_resolver): New.
> > >       (is_caller_ifunc_resolver): Likewise.
> > >       (tree_profiling): Don't instrument an IFUNC resolver nor its
> > >       callees.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >       PR tree-optimization/114115
> > >       * gcc.dg/pr114115.c: New test.
> >
> > The problem with this approach is that tracking callees of ifunc
> > resolvers will stop on the translation unit boundary and also with
> > indirect call.  Also while ifunc resolver itself is called only once,
> > its callees may also be used from performance critical code.
> 
> IFUNC selector shouldn't have any external dependencies which
> can cause issues at run-time.

I am worried about scenario where ifunc selector calls function foo
defined locally and foo is also used from other places possibly in hot
loops.
> 
> > So it is not really reliable fix (though I guess it will work a lot of
> > common code).  I wonder what would be alternatives.  In GCC generated
> > profling code we use TLS only for undirect call profiling (so there is
> > no need to turn off rest of profiling).  I wonder if there is any chance
> > to not make it seffault when it is done before TLS is set up?
> 
> IFUNC selector should make minimum external calls, none is preferred.

Edge porfiling only inserts (atomic) 64bit increments of counters.
If target supports these operations inline, no external calls will be
done.

Indirect call profiling inserts the problematic TLS variable (to track
caller-callee pairs). Value profiling also inserts various additional
external calls to counters.

I am perfectly fine with disabling instrumentation for ifunc selectors
and functions only reachable from them, but I am worried about calles
used also from non-ifunc path.

For example selector implemented in C++ may do some string handling to
match CPU name and propagation will disable profiling for std::string
member functions (which may not be effective if comdat section is
prevailed from other translation unit).

> Any external calls may lead to issues at run-time.  It is a very bad idea
> to profile IFUNC selector via external function call.

Looking at https://sourceware.org/glibc/wiki/GNU_IFUNC
there are other limitations on ifunc except for profiling, such as
-fstack-protector-all.  So perhaps your propagation can be used to
disable those features as well.

"Unfortunately there are actually a lot of restrictions placed on IFUNC
usage which aren't entirely clear and the documentation needs to be
updated." makes me wonder what other transformations are potentially
dangerous.

Honza
  
H.J. Lu Feb. 29, 2024, 2:40 p.m. UTC | #4
On Thu, Feb 29, 2024 at 6:34 AM Jan Hubicka <hubicka@ucw.cz> wrote:
>
> > On Thu, Feb 29, 2024 at 5:39 AM Jan Hubicka <hubicka@ucw.cz> wrote:
> > >
> > > > We can't instrument an IFUNC resolver nor its callees as it may require
> > > > TLS which hasn't been set up yet when the dynamic linker is resolving
> > > > IFUNC symbols.  Add an IFUNC resolver caller marker to symtab_node to
> > > > avoid recursive checking.
> > > >
> > > > gcc/ChangeLog:
> > > >
> > > >       PR tree-optimization/114115
> > > >       * cgraph.h (enum ifunc_caller): New.
> > > >       (symtab_node): Add has_ifunc_caller.
> > > Unless we have users outside of tree-profile, I think it is better to
> > > avoid adding extra data to cgraph_node.  One can use node->get_uid() indexed hash
> > > set to save the two bits needed for propagation.
> > > >       * tree-profile.cc (check_ifunc_resolver): New.
> > > >       (is_caller_ifunc_resolver): Likewise.
> > > >       (tree_profiling): Don't instrument an IFUNC resolver nor its
> > > >       callees.
> > > >
> > > > gcc/testsuite/ChangeLog:
> > > >
> > > >       PR tree-optimization/114115
> > > >       * gcc.dg/pr114115.c: New test.
> > >
> > > The problem with this approach is that tracking callees of ifunc
> > > resolvers will stop on the translation unit boundary and also with
> > > indirect call.  Also while ifunc resolver itself is called only once,
> > > its callees may also be used from performance critical code.
> >
> > IFUNC selector shouldn't have any external dependencies which
> > can cause issues at run-time.
>
> I am worried about scenario where ifunc selector calls function foo
> defined locally and foo is also used from other places possibly in hot
> loops.
> >
> > > So it is not really reliable fix (though I guess it will work a lot of
> > > common code).  I wonder what would be alternatives.  In GCC generated
> > > profling code we use TLS only for undirect call profiling (so there is
> > > no need to turn off rest of profiling).  I wonder if there is any chance
> > > to not make it seffault when it is done before TLS is set up?
> >
> > IFUNC selector should make minimum external calls, none is preferred.
>
> Edge porfiling only inserts (atomic) 64bit increments of counters.
> If target supports these operations inline, no external calls will be
> done.
>
> Indirect call profiling inserts the problematic TLS variable (to track
> caller-callee pairs). Value profiling also inserts various additional
> external calls to counters.
>
> I am perfectly fine with disabling instrumentation for ifunc selectors
> and functions only reachable from them, but I am worried about calles
> used also from non-ifunc path.

Programmers need to understand not to do it.

> For example selector implemented in C++ may do some string handling to
> match CPU name and propagation will disable profiling for std::string

On x86, they should use CPUID, not string functions.

> member functions (which may not be effective if comdat section is
> prevailed from other translation unit).

String functions may lead to external function calls which is dangerous.

> > Any external calls may lead to issues at run-time.  It is a very bad idea
> > to profile IFUNC selector via external function call.
>
> Looking at https://sourceware.org/glibc/wiki/GNU_IFUNC
> there are other limitations on ifunc except for profiling, such as
> -fstack-protector-all.  So perhaps your propagation can be used to
> disable those features as well.

So, it may not be tree-profile specific.  Where should these 2 bits
be added?

> "Unfortunately there are actually a lot of restrictions placed on IFUNC
> usage which aren't entirely clear and the documentation needs to be
> updated." makes me wonder what other transformations are potentially
> dangerous.
>
> Honza
  
Jan Hubicka Feb. 29, 2024, 3:06 p.m. UTC | #5
> > I am worried about scenario where ifunc selector calls function foo
> > defined locally and foo is also used from other places possibly in hot
> > loops.
> > >
> > > > So it is not really reliable fix (though I guess it will work a lot of
> > > > common code).  I wonder what would be alternatives.  In GCC generated
> > > > profling code we use TLS only for undirect call profiling (so there is
> > > > no need to turn off rest of profiling).  I wonder if there is any chance
> > > > to not make it seffault when it is done before TLS is set up?
> > >
> > > IFUNC selector should make minimum external calls, none is preferred.
> >
> > Edge porfiling only inserts (atomic) 64bit increments of counters.
> > If target supports these operations inline, no external calls will be
> > done.
> >
> > Indirect call profiling inserts the problematic TLS variable (to track
> > caller-callee pairs). Value profiling also inserts various additional
> > external calls to counters.
> >
> > I am perfectly fine with disabling instrumentation for ifunc selectors
> > and functions only reachable from them, but I am worried about calles
> > used also from non-ifunc path.
> 
> Programmers need to understand not to do it.

It would help to have this documented. Should we warn when ifunc
resolver calls external function, comdat of function reachable from
non-ifunc code?
> 
> > For example selector implemented in C++ may do some string handling to
> > match CPU name and propagation will disable profiling for std::string
> 
> On x86, they should use CPUID, not string functions.
> 
> > member functions (which may not be effective if comdat section is
> > prevailed from other translation unit).
> 
> String functions may lead to external function calls which is dangerous.
> 
> > > Any external calls may lead to issues at run-time.  It is a very bad idea
> > > to profile IFUNC selector via external function call.
> >
> > Looking at https://sourceware.org/glibc/wiki/GNU_IFUNC
> > there are other limitations on ifunc except for profiling, such as
> > -fstack-protector-all.  So perhaps your propagation can be used to
> > disable those features as well.
> 
> So, it may not be tree-profile specific.  Where should these 2 bits
> be added?

If we want to disable other transforms too, then I think having a bit in
cgraph_node for reachability from ifunc resolver makes sense.
I would still do the cycle detection using on-side hash_map to avoid
polution of the global datastructure.

Thanks,
Honza
> 
> > "Unfortunately there are actually a lot of restrictions placed on IFUNC
> > usage which aren't entirely clear and the documentation needs to be
> > updated." makes me wonder what other transformations are potentially
> > dangerous.
> >
> > Honza
> 
> 
> -- 
> H.J.
  
H.J. Lu Feb. 29, 2024, 3:11 p.m. UTC | #6
On Thu, Feb 29, 2024 at 7:06 AM Jan Hubicka <hubicka@ucw.cz> wrote:
>
> > > I am worried about scenario where ifunc selector calls function foo
> > > defined locally and foo is also used from other places possibly in hot
> > > loops.
> > > >
> > > > > So it is not really reliable fix (though I guess it will work a lot of
> > > > > common code).  I wonder what would be alternatives.  In GCC generated
> > > > > profling code we use TLS only for undirect call profiling (so there is
> > > > > no need to turn off rest of profiling).  I wonder if there is any chance
> > > > > to not make it seffault when it is done before TLS is set up?
> > > >
> > > > IFUNC selector should make minimum external calls, none is preferred.
> > >
> > > Edge porfiling only inserts (atomic) 64bit increments of counters.
> > > If target supports these operations inline, no external calls will be
> > > done.
> > >
> > > Indirect call profiling inserts the problematic TLS variable (to track
> > > caller-callee pairs). Value profiling also inserts various additional
> > > external calls to counters.
> > >
> > > I am perfectly fine with disabling instrumentation for ifunc selectors
> > > and functions only reachable from them, but I am worried about calles
> > > used also from non-ifunc path.
> >
> > Programmers need to understand not to do it.
>
> It would help to have this documented. Should we warn when ifunc
> resolver calls external function, comdat of function reachable from
> non-ifunc code?

That will be nice.

> >
> > > For example selector implemented in C++ may do some string handling to
> > > match CPU name and propagation will disable profiling for std::string
> >
> > On x86, they should use CPUID, not string functions.
> >
> > > member functions (which may not be effective if comdat section is
> > > prevailed from other translation unit).
> >
> > String functions may lead to external function calls which is dangerous.
> >
> > > > Any external calls may lead to issues at run-time.  It is a very bad idea
> > > > to profile IFUNC selector via external function call.
> > >
> > > Looking at https://sourceware.org/glibc/wiki/GNU_IFUNC
> > > there are other limitations on ifunc except for profiling, such as
> > > -fstack-protector-all.  So perhaps your propagation can be used to
> > > disable those features as well.
> >
> > So, it may not be tree-profile specific.  Where should these 2 bits
> > be added?
>
> If we want to disable other transforms too, then I think having a bit in
> cgraph_node for reachability from ifunc resolver makes sense.
> I would still do the cycle detection using on-side hash_map to avoid
> polution of the global datastructure.
>

I will see what I can do.

Thanks.

> Thanks,
> Honza
> >
> > > "Unfortunately there are actually a lot of restrictions placed on IFUNC
> > > usage which aren't entirely clear and the documentation needs to be
> > > updated." makes me wonder what other transformations are potentially
> > > dangerous.
> > >
> > > Honza
> >
> >
> > --
> > H.J.
  
H.J. Lu March 5, 2024, 9:46 p.m. UTC | #7
On Thu, Feb 29, 2024 at 7:11 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Feb 29, 2024 at 7:06 AM Jan Hubicka <hubicka@ucw.cz> wrote:
> >
> > > > I am worried about scenario where ifunc selector calls function foo
> > > > defined locally and foo is also used from other places possibly in hot
> > > > loops.
> > > > >
> > > > > > So it is not really reliable fix (though I guess it will work a lot of
> > > > > > common code).  I wonder what would be alternatives.  In GCC generated
> > > > > > profling code we use TLS only for undirect call profiling (so there is
> > > > > > no need to turn off rest of profiling).  I wonder if there is any chance
> > > > > > to not make it seffault when it is done before TLS is set up?
> > > > >
> > > > > IFUNC selector should make minimum external calls, none is preferred.
> > > >
> > > > Edge porfiling only inserts (atomic) 64bit increments of counters.
> > > > If target supports these operations inline, no external calls will be
> > > > done.
> > > >
> > > > Indirect call profiling inserts the problematic TLS variable (to track
> > > > caller-callee pairs). Value profiling also inserts various additional
> > > > external calls to counters.
> > > >
> > > > I am perfectly fine with disabling instrumentation for ifunc selectors
> > > > and functions only reachable from them, but I am worried about calles
> > > > used also from non-ifunc path.
> > >
> > > Programmers need to understand not to do it.
> >
> > It would help to have this documented. Should we warn when ifunc
> > resolver calls external function, comdat of function reachable from
> > non-ifunc code?
>
> That will be nice.
>
> > >
> > > > For example selector implemented in C++ may do some string handling to
> > > > match CPU name and propagation will disable profiling for std::string
> > >
> > > On x86, they should use CPUID, not string functions.
> > >
> > > > member functions (which may not be effective if comdat section is
> > > > prevailed from other translation unit).
> > >
> > > String functions may lead to external function calls which is dangerous.
> > >
> > > > > Any external calls may lead to issues at run-time.  It is a very bad idea
> > > > > to profile IFUNC selector via external function call.
> > > >
> > > > Looking at https://sourceware.org/glibc/wiki/GNU_IFUNC
> > > > there are other limitations on ifunc except for profiling, such as
> > > > -fstack-protector-all.  So perhaps your propagation can be used to
> > > > disable those features as well.
> > >
> > > So, it may not be tree-profile specific.  Where should these 2 bits
> > > be added?
> >
> > If we want to disable other transforms too, then I think having a bit in
> > cgraph_node for reachability from ifunc resolver makes sense.
> > I would still do the cycle detection using on-side hash_map to avoid
> > polution of the global datastructure.
> >
>
> I will see what I can do.
>
>

The v2 patch is at

https://patchwork.sourceware.org/project/gcc/list/?series=31627
  

Patch

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 47f35e8078d..ce99f4a5114 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -100,6 +100,21 @@  enum symbol_partitioning_class
    SYMBOL_DUPLICATE
 };
 
+/* Classification whether a function has any IFUNC resolver caller.  */
+enum ifunc_caller
+{
+  /* It is unknown if this function has any IFUNC resolver caller.  */
+  IFUNC_CALLER_UNKNOWN,
+  /* Work in progress to check if this function has any IFUNC resolver
+     caller.  */
+  IFUNC_CALLER_WIP,
+  /* This function has at least an IFUNC resolver caller, including
+     itself.  */
+  IFUNC_CALLER_TRUE,
+  /* This function doesn't have any IFUNC resolver caller.  */
+  IFUNC_CALLER_FALSE
+};
+
 /* Base of all entries in the symbol table.
    The symtab_node is inherited by cgraph and varpol nodes.  */
 struct GTY((desc ("%h.type"), tag ("SYMTAB_SYMBOL"),
@@ -121,6 +136,7 @@  public:
       used_from_other_partition (false), in_other_partition (false),
       address_taken (false), in_init_priority_hash (false),
       need_lto_streaming (false), offloadable (false), ifunc_resolver (false),
+      has_ifunc_caller (IFUNC_CALLER_UNKNOWN),
       order (false), next_sharing_asm_name (NULL),
       previous_sharing_asm_name (NULL), same_comdat_group (NULL), ref_list (),
       alias_target (NULL), lto_file_data (NULL), aux (NULL),
@@ -595,6 +611,8 @@  public:
   /* Set when symbol is an IFUNC resolver.  */
   unsigned ifunc_resolver : 1;
 
+  /* Classification whether a function has any IFUNC resolver caller.  */
+  ENUM_BITFIELD (ifunc_caller) has_ifunc_caller : 2;
 
   /* Ordering of all symtab entries.  */
   int order;
diff --git a/gcc/testsuite/gcc.dg/pr114115.c b/gcc/testsuite/gcc.dg/pr114115.c
new file mode 100644
index 00000000000..2629f591877
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr114115.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O0 -fprofile-generate -fdump-tree-optimized" } */
+/* { dg-require-profiling "-fprofile-generate" } */
+/* { dg-require-ifunc "" } */
+
+void *foo_ifunc2() __attribute__((ifunc("foo_resolver")));
+
+void bar(void)
+{
+}
+
+static int f3()
+{
+  bar ();
+  return 5;
+}
+
+void (*foo_resolver(void))(void)
+{
+  f3();
+  return bar;
+}
+
+/* { dg-final { scan-tree-dump-not "__gcov_indirect_call_profiler_v" "optimized" } } */
diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
index aed13e2b1bc..46478648b32 100644
--- a/gcc/tree-profile.cc
+++ b/gcc/tree-profile.cc
@@ -738,6 +738,72 @@  include_source_file_for_profile (const char *filename)
   return false;
 }
 
+/* Return true and set *DATA to true if NODE is an ifunc resolver.  */
+
+static bool
+check_ifunc_resolver (cgraph_node *node, void *data)
+{
+  if (node->ifunc_resolver)
+    {
+      bool *is_ifunc_resolver = (bool *) data;
+      *is_ifunc_resolver = true;
+      return true;
+    }
+  return false;
+}
+
+/* Return true if any caller of NODE is an ifunc resolver.  */
+
+static bool
+is_caller_ifunc_resolver (cgraph_node *node)
+{
+  if (node->has_ifunc_caller == IFUNC_CALLER_WIP)
+    gcc_unreachable ();
+
+  node->has_ifunc_caller = IFUNC_CALLER_WIP;
+  bool is_ifunc_resolver = false;
+
+  for (cgraph_edge *e = node->callers; e; e = e->next_caller)
+    {
+      /* Check for recursive call.  */
+      if (e->caller == node)
+	continue;
+
+      switch (e->caller->has_ifunc_caller)
+	{
+	case IFUNC_CALLER_UNKNOWN:
+	  e->caller->call_for_symbol_and_aliases (check_ifunc_resolver,
+						  &is_ifunc_resolver,
+						  true);
+	  if (is_ifunc_resolver)
+	    {
+	      e->caller->has_ifunc_caller = IFUNC_CALLER_TRUE;
+	      return true;
+	    }
+	  break;
+	case IFUNC_CALLER_TRUE:
+	  return true;
+	case IFUNC_CALLER_FALSE:
+	  /* This caller doesn't have any IFUNC resolver call.  Check
+	     the next caller.  */
+	  continue;
+
+	case IFUNC_CALLER_WIP:
+	  continue;
+	}
+
+      if (is_caller_ifunc_resolver (e->caller))
+	{
+	  e->caller->has_ifunc_caller = IFUNC_CALLER_TRUE;
+	  return true;
+	}
+      else
+	e->caller->has_ifunc_caller = IFUNC_CALLER_FALSE;
+    }
+
+  return false;
+}
+
 #ifndef HAVE_sync_compare_and_swapsi
 #define HAVE_sync_compare_and_swapsi 0
 #endif
@@ -848,6 +914,32 @@  tree_profiling (void)
 	    }
 	}
 
+      /* Do not instrument an IFUNC resolver nor its callees.  */
+      bool is_ifunc_resolver = false;
+      switch (node->has_ifunc_caller)
+	{
+	case IFUNC_CALLER_UNKNOWN:
+	  node->call_for_symbol_and_aliases (check_ifunc_resolver,
+					     &is_ifunc_resolver, true);
+	  if (is_ifunc_resolver || is_caller_ifunc_resolver (node))
+	    {
+	      node->has_ifunc_caller = IFUNC_CALLER_TRUE;
+	      continue;
+	    }
+	  else
+	    node->has_ifunc_caller = IFUNC_CALLER_FALSE;
+	  break;
+
+	case IFUNC_CALLER_TRUE:
+	  continue;
+
+	case IFUNC_CALLER_FALSE:
+	  break;
+
+	case IFUNC_CALLER_WIP:
+	  gcc_unreachable ();
+	}
+
       push_cfun (DECL_STRUCT_FUNCTION (node->decl));
 
       if (dump_file)