tree-inline: Add __builtin_stack_{save,restore} pair about inline calls with calls to alloca [PR113596]

Message ID ZjSM3MSZxkBOzn9v@tucnak
State New
Headers
Series tree-inline: Add __builtin_stack_{save,restore} pair about inline calls with calls to alloca [PR113596] |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm fail Patch failed to apply
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 fail Patch failed to apply

Commit Message

Jakub Jelinek May 3, 2024, 7:06 a.m. UTC
  Hi!

The following patch adds save_NNN = __builtin_stack_save (); ...
__builtin_stack_restore (save_NNN);
pair around inline calls which call alloca (alloca calls because of
VLA vars are ignored in that decision).
The patch doesn't change anything on whether we try to inline such calls or
not, it just fixes the behavior when we inline them despite those checks.
The stack save/restore restores the behavior that alloca acquired regions
are freed at the end of the containing call.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-05-03  Jakub Jelinek  <jakub@redhat.com>

	PR middle-end/113596
	* tree-inline.cc (expand_call_inline): Emit __builtin_stack_save
	and __builtin_stack_restore calls around inlined functions which
	call alloca.

	* gcc.dg/pr113596.c: New test.
	* gcc.dg/tree-ssa/pr113596.c: New test.


	Jakub
  

Comments

Richard Biener May 3, 2024, 7:36 a.m. UTC | #1
On Fri, 3 May 2024, Jakub Jelinek wrote:

> Hi!
> 
> The following patch adds save_NNN = __builtin_stack_save (); ...
> __builtin_stack_restore (save_NNN);
> pair around inline calls which call alloca (alloca calls because of
> VLA vars are ignored in that decision).
> The patch doesn't change anything on whether we try to inline such calls or
> not, it just fixes the behavior when we inline them despite those checks.
> The stack save/restore restores the behavior that alloca acquired regions
> are freed at the end of the containing call.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2024-05-03  Jakub Jelinek  <jakub@redhat.com>
> 
> 	PR middle-end/113596
> 	* tree-inline.cc (expand_call_inline): Emit __builtin_stack_save
> 	and __builtin_stack_restore calls around inlined functions which
> 	call alloca.
> 
> 	* gcc.dg/pr113596.c: New test.
> 	* gcc.dg/tree-ssa/pr113596.c: New test.
> 
> --- gcc/tree-inline.cc.jj	2024-04-11 11:09:07.274670922 +0200
> +++ gcc/tree-inline.cc	2024-05-02 19:05:06.963750322 +0200
> @@ -4794,6 +4794,7 @@ expand_call_inline (basic_block bb, gimp
>    use_operand_p use;
>    gimple *simtenter_stmt = NULL;
>    vec<tree> *simtvars_save;
> +  tree save_stack = NULL_TREE;
>  
>    /* The gimplifier uses input_location in too many places, such as
>       internal_get_tmp_var ().  */
> @@ -5042,6 +5043,28 @@ expand_call_inline (basic_block bb, gimp
>  			GSI_NEW_STMT);
>      }
>  
> +  /* If function to be inlined calls alloca, wrap the inlined function
> +     in between save_stack = __builtin_stack_save (); and
> +     __builtin_stack_restore (save_stack); calls.  */
> +  if (id->src_cfun->calls_alloca && !gimple_call_noreturn_p (stmt))
> +    /* Don't do this for VLA allocations though, just for user alloca
> +       calls.  */
> +    for (struct cgraph_edge *e = id->src_node->callees; e; e = e->next_callee)
> +      if (gimple_maybe_alloca_call_p (e->call_stmt)
> +	  && !gimple_call_alloca_for_var_p (e->call_stmt))
> +	{
> +	  tree fn = builtin_decl_implicit (BUILT_IN_STACK_SAVE);
> +	  gcall *call = gimple_build_call (fn, 0);
> +	  save_stack = make_ssa_name (ptr_type_node);
> +	  gimple_call_set_lhs (call, save_stack);
> +	  gimple_stmt_iterator si = gsi_last_bb (bb);
> +	  gsi_insert_after (&si, call, GSI_NEW_STMT);
> +	  struct cgraph_node *dest = cgraph_node::get_create (fn);
> +	  id->dst_node->create_edge (dest, call, bb->count)->inline_failed
> +	    = CIF_BODY_NOT_AVAILABLE;
> +	  break;
> +	}
> +
>    if (DECL_INITIAL (fn))
>      {
>        if (gimple_block (stmt))
> @@ -5165,6 +5188,17 @@ expand_call_inline (basic_block bb, gimp
>  	    }
>  	}
>  
> +  if (save_stack)
> +    {
> +      tree fn = builtin_decl_implicit (BUILT_IN_STACK_RESTORE);
> +      gcall *call = gimple_build_call (fn, 1, save_stack);
> +      gsi_insert_before (&stmt_gsi, call, GSI_SAME_STMT);
> +      struct cgraph_node *dest = cgraph_node::get_create (fn);
> +      id->dst_node->create_edge (dest, call,
> +				 return_block->count)->inline_failed
> +	= CIF_BODY_NOT_AVAILABLE;
> +    }
> +
>    /* Reset the escaped solution.  */
>    if (cfun->gimple_df)
>      {
> --- gcc/testsuite/gcc.dg/pr113596.c.jj	2024-05-02 15:05:25.048642302 +0200
> +++ gcc/testsuite/gcc.dg/pr113596.c	2024-05-02 15:05:25.048642302 +0200
> @@ -0,0 +1,24 @@
> +/* PR middle-end/113596 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +__attribute__((noipa)) void
> +bar (char *p, int n)
> +{
> +  p[0] = 1;
> +  p[n - 1] = 2;
> +}
> +
> +static inline __attribute__((always_inline)) void
> +foo (int n)
> +{
> +  char *p = __builtin_alloca (n);
> +  bar (p, n);
> +}
> +
> +int
> +main ()
> +{
> +  for (int i = 2; i < 8192; ++i)
> +    foo (i);
> +}
> --- gcc/testsuite/gcc.dg/tree-ssa/pr113596.c.jj	2024-05-02 19:10:29.218455257 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr113596.c	2024-05-02 19:11:11.211895559 +0200
> @@ -0,0 +1,37 @@
> +/* PR middle-end/113596 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-einline" } */
> +/* { dg-final { scan-tree-dump-times "__builtin_stack_save \\\(" 3 "einline" } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_stack_restore \\\(" 3 "einline" } } */
> +
> +void baz (char *p, int n);
> +volatile int v;
> +
> +static inline __attribute__((always_inline)) void
> +foo (int n)
> +{
> +  ++v;
> +  {
> +    char *p = __builtin_alloca (n);
> +    baz (p, n);
> +  }
> +  ++v;
> +}
> +
> +static inline __attribute__((always_inline)) void
> +bar (int n)
> +{
> +  ++v;
> +  {
> +    char p[n];
> +    baz (p, n);
> +  }
> +  ++v;
> +}
> +
> +void
> +qux (int n)
> +{
> +  foo (n);
> +  bar (n);
> +}
> 
> 	Jakub
> 
>
  

Patch

--- gcc/tree-inline.cc.jj	2024-04-11 11:09:07.274670922 +0200
+++ gcc/tree-inline.cc	2024-05-02 19:05:06.963750322 +0200
@@ -4794,6 +4794,7 @@  expand_call_inline (basic_block bb, gimp
   use_operand_p use;
   gimple *simtenter_stmt = NULL;
   vec<tree> *simtvars_save;
+  tree save_stack = NULL_TREE;
 
   /* The gimplifier uses input_location in too many places, such as
      internal_get_tmp_var ().  */
@@ -5042,6 +5043,28 @@  expand_call_inline (basic_block bb, gimp
 			GSI_NEW_STMT);
     }
 
+  /* If function to be inlined calls alloca, wrap the inlined function
+     in between save_stack = __builtin_stack_save (); and
+     __builtin_stack_restore (save_stack); calls.  */
+  if (id->src_cfun->calls_alloca && !gimple_call_noreturn_p (stmt))
+    /* Don't do this for VLA allocations though, just for user alloca
+       calls.  */
+    for (struct cgraph_edge *e = id->src_node->callees; e; e = e->next_callee)
+      if (gimple_maybe_alloca_call_p (e->call_stmt)
+	  && !gimple_call_alloca_for_var_p (e->call_stmt))
+	{
+	  tree fn = builtin_decl_implicit (BUILT_IN_STACK_SAVE);
+	  gcall *call = gimple_build_call (fn, 0);
+	  save_stack = make_ssa_name (ptr_type_node);
+	  gimple_call_set_lhs (call, save_stack);
+	  gimple_stmt_iterator si = gsi_last_bb (bb);
+	  gsi_insert_after (&si, call, GSI_NEW_STMT);
+	  struct cgraph_node *dest = cgraph_node::get_create (fn);
+	  id->dst_node->create_edge (dest, call, bb->count)->inline_failed
+	    = CIF_BODY_NOT_AVAILABLE;
+	  break;
+	}
+
   if (DECL_INITIAL (fn))
     {
       if (gimple_block (stmt))
@@ -5165,6 +5188,17 @@  expand_call_inline (basic_block bb, gimp
 	    }
 	}
 
+  if (save_stack)
+    {
+      tree fn = builtin_decl_implicit (BUILT_IN_STACK_RESTORE);
+      gcall *call = gimple_build_call (fn, 1, save_stack);
+      gsi_insert_before (&stmt_gsi, call, GSI_SAME_STMT);
+      struct cgraph_node *dest = cgraph_node::get_create (fn);
+      id->dst_node->create_edge (dest, call,
+				 return_block->count)->inline_failed
+	= CIF_BODY_NOT_AVAILABLE;
+    }
+
   /* Reset the escaped solution.  */
   if (cfun->gimple_df)
     {
--- gcc/testsuite/gcc.dg/pr113596.c.jj	2024-05-02 15:05:25.048642302 +0200
+++ gcc/testsuite/gcc.dg/pr113596.c	2024-05-02 15:05:25.048642302 +0200
@@ -0,0 +1,24 @@ 
+/* PR middle-end/113596 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+__attribute__((noipa)) void
+bar (char *p, int n)
+{
+  p[0] = 1;
+  p[n - 1] = 2;
+}
+
+static inline __attribute__((always_inline)) void
+foo (int n)
+{
+  char *p = __builtin_alloca (n);
+  bar (p, n);
+}
+
+int
+main ()
+{
+  for (int i = 2; i < 8192; ++i)
+    foo (i);
+}
--- gcc/testsuite/gcc.dg/tree-ssa/pr113596.c.jj	2024-05-02 19:10:29.218455257 +0200
+++ gcc/testsuite/gcc.dg/tree-ssa/pr113596.c	2024-05-02 19:11:11.211895559 +0200
@@ -0,0 +1,37 @@ 
+/* PR middle-end/113596 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-einline" } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save \\\(" 3 "einline" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore \\\(" 3 "einline" } } */
+
+void baz (char *p, int n);
+volatile int v;
+
+static inline __attribute__((always_inline)) void
+foo (int n)
+{
+  ++v;
+  {
+    char *p = __builtin_alloca (n);
+    baz (p, n);
+  }
+  ++v;
+}
+
+static inline __attribute__((always_inline)) void
+bar (int n)
+{
+  ++v;
+  {
+    char p[n];
+    baz (p, n);
+  }
+  ++v;
+}
+
+void
+qux (int n)
+{
+  foo (n);
+  bar (n);
+}