ipa-cp: Only create all-context nodes for local cgraph nodes (PR125207)

Message ID ri6tsrjn3ov.fsf@virgil.suse.cz
State New
Headers
Series ipa-cp: Only create all-context nodes for local cgraph nodes (PR125207) |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm fail Test failed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-aarch64-bootstrap success Build passed
linaro-tcwg-bot/tcwg_simplebootstrap_build--master-arm-bootstrap success Build passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 fail Test failed

Commit Message

Martin Jambor June 3, 2026, 1:39 p.m. UTC
  Hi,

IPA-CP contains special logic to create specially marked clones with
constants that come from all callees (that are constant in all
contexts).  In the past that was the only case in which the heuristics
tried to assess the effect of multiple constants at once (that is no
longer true since GCC 16) and they are also useful when we realize we
can refine them still further in the case of recursive calls (such as
in testsuite/gcc.dg/ipa/pr93707.c) turn to carry more constants after
the initial "all-context" clone is created.

Unfortunately, the code creating the clones simply redirects all
incoming call graph edges which may include those which were indirect
in the phase of the algorithm which gathered IPA-CP lattices and the
arguments in these calls might contain different values than all
initially known ones, leading to miscompilations like the one in
PR125207.

Since GCC 16 already feeds the heuristics with all known constants
that are passed along with the constant the evaluation was initiated
for, the all-context nodes are really only necessary for the recursive
refinement cases described above.  It seems appropriate to only create
them when they are what they are called, when there can be no indirect
or unknown calls to them, i.e. when they are local.  This has the nice
effect of simplifying decide_whether_version_node somewhat.

There is some testsuite fallout.  First, IPA-CP will now no longer
clone a non-static function to only remove a (non-constant) parameter.
This likely only makes sense with tiny values of param
ipa-cp-eval-threshold or very small functions which are likely going
to be inlined anyway.  But gcc.dg/vla-1.c expected that to happen so I
made the function to be cloned static.

The testcase libgomp.c/ipcp-cb-spec1.c stopped working for the same
reason (the outlined task body has its address taken) but looking into
the dumps the test does not seem to test what it is supposed to, the
test function was completely eliminated before IPA.  I have therefore
changed it to really propagate constants.

Bootstrapped and tested on x86_64-linux.  LTO-O3 bootstrap underway.  OK
for master and the gcc-16 branch if it passes?

Thanks,

Martin


gcc/ChangeLog:

2026-06-02  Martin Jambor  <mjambor@suse.cz>

	PR ipa/125207
	* ipa-cp.cc (decide_about_value): Add detailed dumping about skipping
	a value when there are no edges for it.  Make dumping about skipping
	because of growth limit violation consistent with other cases.
	(decide_whether_version_node): Only create all-context nodes for local
	nodes, remove the logic doing it in other cases, allow cloning for
	single constant lattices for non-local nodes.  Adjust dumping.

gcc/testsuite/ChangeLog:

2026-06-02  Martin Jambor  <mjambor@suse.cz>

	PR ipa/125207
	* gcc.dg/ipa/pr125207.c: New test.
	* gcc.dg/vla-1.c (f1): Make function f1 static.

libgomp/ChangeLog:

2026-06-03  Martin Jambor  <mjambor@suse.cz>

	PR ipa/125207
	* testsuite/libgomp.c/ipcp-cb-spec1.c (test): Rework.
---
 gcc/ipa-cp.cc                               | 102 ++++++++------------
 gcc/testsuite/gcc.dg/ipa/pr125207.c         |  43 +++++++++
 gcc/testsuite/gcc.dg/vla-1.c                |   2 +-
 libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c |  14 ++-
 4 files changed, 93 insertions(+), 68 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr125207.c
  

Patch

diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
index 9e0b698b8cb..532bc007a2d 100644
--- a/gcc/ipa-cp.cc
+++ b/gcc/ipa-cp.cc
@@ -5957,7 +5957,7 @@  decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
   else if (val->local_size_cost + overall_size > get_max_overall_size (node))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
-	fprintf (dump_file, "   Ignoring candidate value because "
+	fprintf (dump_file, " - ignoring candidate value because "
 		 "maximum unit size would be reached with %li.\n",
 		 val->local_size_cost + overall_size);
       return false;
@@ -5965,7 +5965,19 @@  decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
   else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
 					    &rec_count_sum, &count_sum,
 					    &called_without_ipa_profile))
-    return false;
+    {
+      if (dump_file && (dump_flags & TDF_DETAILS))
+	{
+	  fprintf (dump_file, " - skipping candidate value ");
+	  print_ipcp_constant_value (dump_file, val->value);
+	  fprintf (dump_file, " for ");
+	  ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
+	  if (offset != -1)
+	    fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
+	  fprintf (dump_file, ": no relevant callers\n");
+	}
+      return false;
+    }
 
   if (!dbg_cnt (ipa_cp_values))
     return false;
@@ -6167,22 +6179,33 @@  decide_whether_version_node (struct cgraph_node *node, int cur_sweep)
   if (info->node_dead || count == 0)
     return false;
 
+  bool clone_for_all_contexts = node->local;
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
-	     node->dump_name ());
+    {
+      fprintf (dump_file, "\nEvaluating opportunities for %s.",
+	       node->dump_name ());
+      if (clone_for_all_contexts)
+	fprintf (dump_file, "  Will try to create a special all-context "
+		 "clone.\n");
+      fprintf (dump_file, "\n");
+    }
 
   auto_vec <cloning_opportunity_ranking, 32> opp_ranking;
   for (int i = 0; i < count;i++)
     {
       if (!ipa_is_param_used (info, i))
-	continue;
+	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, " - ignoring unused parameter %i.\n", i);
+	  continue;
+	}
 
       class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
       ipcp_lattice<tree> *lat = &plats->itself;
       ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
 
       if (!lat->bottom
-	  && !lat->is_single_const ())
+	  && (!clone_for_all_contexts || !lat->is_single_const ()))
 	{
 	  ipcp_value<tree> *val;
 	  for (val = lat->values; val; val = val->next)
@@ -6224,9 +6247,10 @@  decide_whether_version_node (struct cgraph_node *node, int cur_sweep)
 	  ipcp_value<tree> *val;
 	  for (aglat = plats->aggs; aglat; aglat = aglat->next)
 	    if (!aglat->bottom && aglat->values
-		/* If the following is false, the one value has been considered
+		/* If the following is false, the one value will be considered
 		   for cloning for all contexts.  */
-		&& (plats->aggs_contain_variable
+		&& (!clone_for_all_contexts
+		    || plats->aggs_contain_variable
 		    || !aglat->is_single_const ()))
 	      for (val = aglat->values; val; val = val->next)
 		{
@@ -6240,7 +6264,7 @@  decide_whether_version_node (struct cgraph_node *node, int cur_sweep)
 	}
 
       if (!ctxlat->bottom
-	  && !ctxlat->is_single_const ())
+	  && (!clone_for_all_contexts || !ctxlat->is_single_const ()))
 	{
 	  ipcp_value<ipa_polymorphic_call_context> *val;
 	  for (val = ctxlat->values; val; val = val->next)
@@ -6283,19 +6307,21 @@  decide_whether_version_node (struct cgraph_node *node, int cur_sweep)
 	}
     }
 
+  if (!clone_for_all_contexts)
+    return ret;
+
   struct caller_statistics stats;
   init_caller_stats (&stats);
   node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
 						false);
   if (!stats.n_calls)
     {
-      if (dump_file)
+      if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "   Not cloning for all contexts because "
 		 "there are no callers of the original node (any more).\n");
       return ret;
     }
 
-  bool do_clone_for_all_contexts = false;
   ipa_auto_call_arg_values avals;
   int removable_params_cost;
   bool ctx_independent_const
@@ -6304,60 +6330,10 @@  decide_whether_version_node (struct cgraph_node *node, int cur_sweep)
   if (ctx_independent_const || devirt_bonus > 0
       || (removable_params_cost && clone_for_param_removal_p (node)))
     {
-       ipa_call_estimates estimates;
-
-      estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
-      sreal time = estimates.nonspecialized_time - estimates.time;
-      time += devirt_bonus;
-      time += hint_time_bonus (node, estimates);
-      time += removable_params_cost;
-      int size = estimates.size - stats.n_calls * removable_params_cost;
-
-      if (dump_file && (dump_flags & TDF_DETAILS))
-	fprintf (dump_file, " - context independent values, size: %i, "
-		 "time_benefit: %f\n", size, (time).to_double ());
-
-      if (size <= 0 || node->local)
-	{
-	  if (!dbg_cnt (ipa_cp_values))
-	    return ret;
-
-	  do_clone_for_all_contexts = true;
-	  if (dump_file)
-	    fprintf (dump_file, "   Decided to specialize for all "
-		     "known contexts, code not going to grow.\n");
-	}
-      else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
-					   stats.count_sum, size,
-					   stats.called_without_ipa_profile,
-					   cur_sweep))
-	{
-	  if (size + overall_size <= get_max_overall_size (node))
-	    {
-	      if (!dbg_cnt (ipa_cp_values))
-		return ret;
-
-	      do_clone_for_all_contexts = true;
-	      overall_size += size;
-	      if (dump_file)
-		fprintf (dump_file, "   Decided to specialize for all "
-			 "known contexts, growth (to %li) deemed "
-			 "beneficial.\n", overall_size);
-	    }
-	  else if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "   Not cloning for all contexts because "
-		     "maximum unit size would be reached with %li.\n",
-		     size + overall_size);
-	}
-      else if (dump_file && (dump_flags & TDF_DETAILS))
-	fprintf (dump_file, "   Not cloning for all contexts because "
-		 "!good_cloning_opportunity_p.\n");
-    }
+      if (!dbg_cnt (ipa_cp_values))
+	return ret;
 
-  if (do_clone_for_all_contexts)
-    {
       auto_vec<cgraph_edge *> callers = node->collect_callers ();
-
       for (int i = callers.length () - 1; i >= 0; i--)
 	{
 	  cgraph_edge *cs = callers[i];
diff --git a/gcc/testsuite/gcc.dg/ipa/pr125207.c b/gcc/testsuite/gcc.dg/ipa/pr125207.c
new file mode 100644
index 00000000000..9c48add4810
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ipa/pr125207.c
@@ -0,0 +1,43 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3 --param ipa-cp-eval-threshold=1"  } */
+
+typedef int(*fp_t_5)(int, int, int);
+int g2, g30, g21, g17, g16, g14, g9, g3, g12, g31;
+_Bool g22, g26;
+static
+int f11(int, fp_t_5 a1, int, int, int)
+{
+    do {
+      g12 = a1(1 ^ g14, 0, g9);
+      if (g22)
+        if (g2 != g17)
+          if (g26)
+            return g30;
+    } while(true);
+}
+static __attribute__((noinline))
+int f2(int a0, int, int a2)
+{
+    static int t = 0;
+    t++;
+    if (t > 2)
+      __builtin_abort();
+    g26 = true;
+    g22 = true;
+    if (g3 != a0) {
+      g31 = 2;
+      a0 = g31;
+    }
+    if (a0 == 2)
+      if (a2 != 80)
+        return 0;
+    g17 = 6;
+    g21 += f11(0, f2, 0, 0, 0);
+    return 0;
+}
+void f1() { f2(0, 0, 0); }
+int main()
+{
+    f2(0, 0, 0);
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vla-1.c b/gcc/testsuite/gcc.dg/vla-1.c
index 52a1d9772d9..47f27b24961 100644
--- a/gcc/testsuite/gcc.dg/vla-1.c
+++ b/gcc/testsuite/gcc.dg/vla-1.c
@@ -1,7 +1,7 @@ 
 /* { dg-do compile } */
 /* { dg-options "-g -O3 -fdump-tree-optimized -fvar-tracking-assignments -fno-selective-scheduling -fno-selective-scheduling2 -fno-ipa-vrp --param ipa-cp-eval-threshold=1" } */
 
-int __attribute__((noinline))
+static int __attribute__((noinline))
 f1 (int i)
 {
   char a[i + 1];
diff --git a/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c b/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c
index a2ab03f692a..fa0d27b0f92 100644
--- a/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c
+++ b/libgomp/testsuite/libgomp.c/ipcp-cb-spec1.c
@@ -4,15 +4,21 @@ 
 /* { dg-options "-O3 -fopenmp -std=gnu99 -fdump-ipa-cp-details --param ipa-cp-eval-threshold=1" } */
 /* { dg-require-effective-target fopenmp } */
 
-void test(int c) {
+static inline void __attribute__((always_inline)) test(int c) {
   for (int i = 0; i < c; i++)
     if (!__builtin_constant_p(c))
       __builtin_abort();
 }
-int main() {
+
+int foo(int c) {
 #pragma omp task
-  test(7);
+  test(c);
+  return 0;
+}
+
+int main() {
+  foo(7);
   return 0;
 }
 
-/* { dg-final { scan-ipa-dump "Creating a specialized node of main._omp_fn" "cp" } } */
+/* { dg-final { scan-ipa-dump "Creating a specialized node of foo._omp_fn" "cp" } } */