tree-inline: Add __builtin_stack_{save,restore} pair about inline calls with calls to alloca [PR113596]
Checks
Commit Message
Hi!
The following patch adds save_NNN = __builtin_stack_save (); ...
__builtin_stack_restore (save_NNN);
pair around inline calls which call alloca (alloca calls because of
VLA vars are ignored in that decision).
The patch doesn't change anything on whether we try to inline such calls or
not, it just fixes the behavior when we inline them despite those checks.
The stack save/restore restores the behavior that alloca acquired regions
are freed at the end of the containing call.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
2024-05-03 Jakub Jelinek <jakub@redhat.com>
PR middle-end/113596
* tree-inline.cc (expand_call_inline): Emit __builtin_stack_save
and __builtin_stack_restore calls around inlined functions which
call alloca.
* gcc.dg/pr113596.c: New test.
* gcc.dg/tree-ssa/pr113596.c: New test.
Jakub
Comments
On Fri, 3 May 2024, Jakub Jelinek wrote:
> Hi!
>
> The following patch adds save_NNN = __builtin_stack_save (); ...
> __builtin_stack_restore (save_NNN);
> pair around inline calls which call alloca (alloca calls because of
> VLA vars are ignored in that decision).
> The patch doesn't change anything on whether we try to inline such calls or
> not, it just fixes the behavior when we inline them despite those checks.
> The stack save/restore restores the behavior that alloca acquired regions
> are freed at the end of the containing call.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK.
Thanks,
Richard.
> 2024-05-03 Jakub Jelinek <jakub@redhat.com>
>
> PR middle-end/113596
> * tree-inline.cc (expand_call_inline): Emit __builtin_stack_save
> and __builtin_stack_restore calls around inlined functions which
> call alloca.
>
> * gcc.dg/pr113596.c: New test.
> * gcc.dg/tree-ssa/pr113596.c: New test.
>
> --- gcc/tree-inline.cc.jj 2024-04-11 11:09:07.274670922 +0200
> +++ gcc/tree-inline.cc 2024-05-02 19:05:06.963750322 +0200
> @@ -4794,6 +4794,7 @@ expand_call_inline (basic_block bb, gimp
> use_operand_p use;
> gimple *simtenter_stmt = NULL;
> vec<tree> *simtvars_save;
> + tree save_stack = NULL_TREE;
>
> /* The gimplifier uses input_location in too many places, such as
> internal_get_tmp_var (). */
> @@ -5042,6 +5043,28 @@ expand_call_inline (basic_block bb, gimp
> GSI_NEW_STMT);
> }
>
> + /* If function to be inlined calls alloca, wrap the inlined function
> + in between save_stack = __builtin_stack_save (); and
> + __builtin_stack_restore (save_stack); calls. */
> + if (id->src_cfun->calls_alloca && !gimple_call_noreturn_p (stmt))
> + /* Don't do this for VLA allocations though, just for user alloca
> + calls. */
> + for (struct cgraph_edge *e = id->src_node->callees; e; e = e->next_callee)
> + if (gimple_maybe_alloca_call_p (e->call_stmt)
> + && !gimple_call_alloca_for_var_p (e->call_stmt))
> + {
> + tree fn = builtin_decl_implicit (BUILT_IN_STACK_SAVE);
> + gcall *call = gimple_build_call (fn, 0);
> + save_stack = make_ssa_name (ptr_type_node);
> + gimple_call_set_lhs (call, save_stack);
> + gimple_stmt_iterator si = gsi_last_bb (bb);
> + gsi_insert_after (&si, call, GSI_NEW_STMT);
> + struct cgraph_node *dest = cgraph_node::get_create (fn);
> + id->dst_node->create_edge (dest, call, bb->count)->inline_failed
> + = CIF_BODY_NOT_AVAILABLE;
> + break;
> + }
> +
> if (DECL_INITIAL (fn))
> {
> if (gimple_block (stmt))
> @@ -5165,6 +5188,17 @@ expand_call_inline (basic_block bb, gimp
> }
> }
>
> + if (save_stack)
> + {
> + tree fn = builtin_decl_implicit (BUILT_IN_STACK_RESTORE);
> + gcall *call = gimple_build_call (fn, 1, save_stack);
> + gsi_insert_before (&stmt_gsi, call, GSI_SAME_STMT);
> + struct cgraph_node *dest = cgraph_node::get_create (fn);
> + id->dst_node->create_edge (dest, call,
> + return_block->count)->inline_failed
> + = CIF_BODY_NOT_AVAILABLE;
> + }
> +
> /* Reset the escaped solution. */
> if (cfun->gimple_df)
> {
> --- gcc/testsuite/gcc.dg/pr113596.c.jj 2024-05-02 15:05:25.048642302 +0200
> +++ gcc/testsuite/gcc.dg/pr113596.c 2024-05-02 15:05:25.048642302 +0200
> @@ -0,0 +1,24 @@
> +/* PR middle-end/113596 */
> +/* { dg-do run } */
> +/* { dg-options "-O2" } */
> +
> +__attribute__((noipa)) void
> +bar (char *p, int n)
> +{
> + p[0] = 1;
> + p[n - 1] = 2;
> +}
> +
> +static inline __attribute__((always_inline)) void
> +foo (int n)
> +{
> + char *p = __builtin_alloca (n);
> + bar (p, n);
> +}
> +
> +int
> +main ()
> +{
> + for (int i = 2; i < 8192; ++i)
> + foo (i);
> +}
> --- gcc/testsuite/gcc.dg/tree-ssa/pr113596.c.jj 2024-05-02 19:10:29.218455257 +0200
> +++ gcc/testsuite/gcc.dg/tree-ssa/pr113596.c 2024-05-02 19:11:11.211895559 +0200
> @@ -0,0 +1,37 @@
> +/* PR middle-end/113596 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-einline" } */
> +/* { dg-final { scan-tree-dump-times "__builtin_stack_save \\\(" 3 "einline" } } */
> +/* { dg-final { scan-tree-dump-times "__builtin_stack_restore \\\(" 3 "einline" } } */
> +
> +void baz (char *p, int n);
> +volatile int v;
> +
> +static inline __attribute__((always_inline)) void
> +foo (int n)
> +{
> + ++v;
> + {
> + char *p = __builtin_alloca (n);
> + baz (p, n);
> + }
> + ++v;
> +}
> +
> +static inline __attribute__((always_inline)) void
> +bar (int n)
> +{
> + ++v;
> + {
> + char p[n];
> + baz (p, n);
> + }
> + ++v;
> +}
> +
> +void
> +qux (int n)
> +{
> + foo (n);
> + bar (n);
> +}
>
> Jakub
>
>
@@ -4794,6 +4794,7 @@ expand_call_inline (basic_block bb, gimp
use_operand_p use;
gimple *simtenter_stmt = NULL;
vec<tree> *simtvars_save;
+ tree save_stack = NULL_TREE;
/* The gimplifier uses input_location in too many places, such as
internal_get_tmp_var (). */
@@ -5042,6 +5043,28 @@ expand_call_inline (basic_block bb, gimp
GSI_NEW_STMT);
}
+ /* If function to be inlined calls alloca, wrap the inlined function
+ in between save_stack = __builtin_stack_save (); and
+ __builtin_stack_restore (save_stack); calls. */
+ if (id->src_cfun->calls_alloca && !gimple_call_noreturn_p (stmt))
+ /* Don't do this for VLA allocations though, just for user alloca
+ calls. */
+ for (struct cgraph_edge *e = id->src_node->callees; e; e = e->next_callee)
+ if (gimple_maybe_alloca_call_p (e->call_stmt)
+ && !gimple_call_alloca_for_var_p (e->call_stmt))
+ {
+ tree fn = builtin_decl_implicit (BUILT_IN_STACK_SAVE);
+ gcall *call = gimple_build_call (fn, 0);
+ save_stack = make_ssa_name (ptr_type_node);
+ gimple_call_set_lhs (call, save_stack);
+ gimple_stmt_iterator si = gsi_last_bb (bb);
+ gsi_insert_after (&si, call, GSI_NEW_STMT);
+ struct cgraph_node *dest = cgraph_node::get_create (fn);
+ id->dst_node->create_edge (dest, call, bb->count)->inline_failed
+ = CIF_BODY_NOT_AVAILABLE;
+ break;
+ }
+
if (DECL_INITIAL (fn))
{
if (gimple_block (stmt))
@@ -5165,6 +5188,17 @@ expand_call_inline (basic_block bb, gimp
}
}
+ if (save_stack)
+ {
+ tree fn = builtin_decl_implicit (BUILT_IN_STACK_RESTORE);
+ gcall *call = gimple_build_call (fn, 1, save_stack);
+ gsi_insert_before (&stmt_gsi, call, GSI_SAME_STMT);
+ struct cgraph_node *dest = cgraph_node::get_create (fn);
+ id->dst_node->create_edge (dest, call,
+ return_block->count)->inline_failed
+ = CIF_BODY_NOT_AVAILABLE;
+ }
+
/* Reset the escaped solution. */
if (cfun->gimple_df)
{
@@ -0,0 +1,24 @@
+/* PR middle-end/113596 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+__attribute__((noipa)) void
+bar (char *p, int n)
+{
+ p[0] = 1;
+ p[n - 1] = 2;
+}
+
+static inline __attribute__((always_inline)) void
+foo (int n)
+{
+ char *p = __builtin_alloca (n);
+ bar (p, n);
+}
+
+int
+main ()
+{
+ for (int i = 2; i < 8192; ++i)
+ foo (i);
+}
@@ -0,0 +1,37 @@
+/* PR middle-end/113596 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-einline" } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_save \\\(" 3 "einline" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_stack_restore \\\(" 3 "einline" } } */
+
+void baz (char *p, int n);
+volatile int v;
+
+static inline __attribute__((always_inline)) void
+foo (int n)
+{
+ ++v;
+ {
+ char *p = __builtin_alloca (n);
+ baz (p, n);
+ }
+ ++v;
+}
+
+static inline __attribute__((always_inline)) void
+bar (int n)
+{
+ ++v;
+ {
+ char p[n];
+ baz (p, n);
+ }
+ ++v;
+}
+
+void
+qux (int n)
+{
+ foo (n);
+ bar (n);
+}