Use modref kills in tree-ssa-dse
Commit Message
Hi,
this patch extends tree-ssa-dse to use modref kill summary to clear
live_bytes. This makes it possible to remove calls that are killed
in parts.
I noticed that DSE duplicates the logic of tree-ssa-alias that is mathing bases
of memory accesses. Here operands_equal_p (base1, base, OEP_ADDRESS_OF) is used.
So it won't work with mismatching memref offsets. We probably want to commonize
this and add common function that matches bases and returns offset adjustments.
I wonder however if it can catch any cases that the tree-ssa-alias code doesn't?
Other check that stmt_kills_ref_p has and tree-ssa-dse is for non-call-exceptions.
Bootstrapped/regtested x86_64-linux, OK?
gcc/ChangeLog:
* ipa-modref.c (get_modref_function_summary): New function.
* ipa-modref.h (get_modref_function_summary): Declare.
* tree-ssa-dse.c (clear_live_bytes_for_ref): Break out from ...
(clear_bytes_written_by): ... here; add handling of modref summary.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/modref-dse-4.c: New test.
Comments
On Mon, 15 Nov 2021, Jan Hubicka wrote:
> Hi,
> this patch extends tree-ssa-dse to use modref kill summary to clear
> live_bytes. This makes it possible to remove calls that are killed
> in parts.
>
> I noticed that DSE duplicates the logic of tree-ssa-alias that is
> mathing bases of memory accesses. Here operands_equal_p (base1, base,
> OEP_ADDRESS_OF) is used. So it won't work with mismatching memref
> offsets. We probably want to commonize this and add common function
> that matches bases and returns offset adjustments. I wonder however if
> it can catch any cases that the tree-ssa-alias code doesn't?
Not sure, tree-ssa-dse.c doesn't seem to handle MEM_REF with offset?
VN has adjust_offsets_for_equal_base_address for this purpose. I
agree that some common functionality like
bool
get_relative_extent_of (const ao_ref *base, const ao_ref *ref,
poly_int64 *offset);
that computes [offset, offset + ref->[max_]size] of REF adjusted as to
make ao_ref_base have the same address (or return false if not
possible). Then [ base->offset, base->offset + base->max_size ]
can be compared against that.
> Other check that stmt_kills_ref_p has and tree-ssa-dse is for
> non-call-exceptions.
>
> Bootstrapped/regtested x86_64-linux, OK?
See below.
> gcc/ChangeLog:
>
> * ipa-modref.c (get_modref_function_summary): New function.
> * ipa-modref.h (get_modref_function_summary): Declare.
> * tree-ssa-dse.c (clear_live_bytes_for_ref): Break out from ...
> (clear_bytes_written_by): ... here; add handling of modref summary.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/tree-ssa/modref-dse-4.c: New test.
>
> diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
> index df4612bbff9..8966f9fd2a4 100644
> --- a/gcc/ipa-modref.c
> +++ b/gcc/ipa-modref.c
> @@ -724,6 +724,22 @@ get_modref_function_summary (cgraph_node *func)
> return r;
> }
>
> +/* Get function summary for CALL if it exists, return NULL otherwise.
> + If INTERPOSED is non-NULL set it to true if call may be interposed. */
> +
> +modref_summary *
> +get_modref_function_summary (gcall *call, bool *interposed)
> +{
> + tree callee = gimple_call_fndecl (call);
> + if (!callee)
> + return NULL;
> + struct cgraph_node *node = cgraph_node::get (callee);
> + if (!node)
> + return NULL;
> + if (interposed)
> + *interposed = !node->binds_to_current_def_p ();
> + return get_modref_function_summary (node);
> +}
> +
> namespace {
>
> /* Construct modref_access_node from REF. */
> diff --git a/gcc/ipa-modref.h b/gcc/ipa-modref.h
> index 9e8a30fd80a..72e608864ce 100644
> --- a/gcc/ipa-modref.h
> +++ b/gcc/ipa-modref.h
> @@ -50,6 +50,7 @@ struct GTY(()) modref_summary
> };
>
> modref_summary *get_modref_function_summary (cgraph_node *func);
> +modref_summary *get_modref_function_summary (gcall *call, bool *interposed);
> void ipa_modref_c_finalize ();
> void ipa_merge_modref_summary_after_inlining (cgraph_edge *e);
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c
> new file mode 100644
> index 00000000000..81aa7dc587c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/modref-dse-4.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-dse2-details" } */
> +struct a {int a,b,c;};
> +__attribute__ ((noinline))
> +void
> +kill_me (struct a *a)
> +{
> + a->a=0;
> + a->b=0;
> + a->c=0;
> +}
> +__attribute__ ((noinline))
> +void
> +my_pleasure (struct a *a)
> +{
> + a->a=1;
> + a->c=2;
> +}
> +void
> +set (struct a *a)
> +{
> + kill_me (a);
> + my_pleasure (a);
> + a->b=1;
> +}
> +/* { dg-final { scan-tree-dump "Deleted dead store: kill_me" "dse2" } } */
> diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
> index ce0083a6dab..d2f54b0faad 100644
> --- a/gcc/tree-ssa-dse.c
> +++ b/gcc/tree-ssa-dse.c
> @@ -209,6 +209,24 @@ normalize_ref (ao_ref *copy, ao_ref *ref)
> return true;
> }
>
> +/* Update LIVE_BYTES tracking REF for write to WRITE:
> + Verify we have the same base memory address, the write
> + has a known size and overlaps with REF. */
> +static void
> +clear_live_bytes_for_ref (sbitmap live_bytes, ao_ref *ref, ao_ref *write)
> +{
> + HOST_WIDE_INT start, size;
> +
> + if (valid_ao_ref_for_dse (write)
> + && operand_equal_p (write->base, ref->base, OEP_ADDRESS_OF)
> + && known_eq (write->size, write->max_size)
> + && normalize_ref (write, ref)
normalize_ref alters 'write', I think we should work on a local
copy here. See live_bytes_read which takes a copy of 'use_ref'.
Otherwise looks good to me.
Thanks,
Richard.
> + && (write->offset - ref->offset).is_constant (&start)
> + && write->size.is_constant (&size))
> + bitmap_clear_range (live_bytes, start / BITS_PER_UNIT,
> + size / BITS_PER_UNIT);
> +}
> +
> /* Clear any bytes written by STMT from the bitmap LIVE_BYTES. The base
> address written by STMT must match the one found in REF, which must
> have its base address previously initialized.
> @@ -220,20 +238,21 @@ static void
> clear_bytes_written_by (sbitmap live_bytes, gimple *stmt, ao_ref *ref)
> {
> ao_ref write;
> +
> + if (gcall *call = dyn_cast <gcall *> (stmt))
> + {
> + bool interposed;
> + modref_summary *summary = get_modref_function_summary (call, &interposed);
> +
> + if (summary && !interposed)
> + for (auto kill : summary->kills)
> + if (kill.get_ao_ref (as_a <gcall *> (stmt), &write))
> + clear_live_bytes_for_ref (live_bytes, ref, &write);
> + }
> if (!initialize_ao_ref_for_dse (stmt, &write))
> return;
>
> - /* Verify we have the same base memory address, the write
> - has a known size and overlaps with REF. */
> - HOST_WIDE_INT start, size;
> - if (valid_ao_ref_for_dse (&write)
> - && operand_equal_p (write.base, ref->base, OEP_ADDRESS_OF)
> - && known_eq (write.size, write.max_size)
> - && normalize_ref (&write, ref)
> - && (write.offset - ref->offset).is_constant (&start)
> - && write.size.is_constant (&size))
> - bitmap_clear_range (live_bytes, start / BITS_PER_UNIT,
> - size / BITS_PER_UNIT);
> + clear_live_bytes_for_ref (live_bytes, ref, &write);
> }
>
> /* REF is a memory write. Extract relevant information from it and
>
>
> Not sure, tree-ssa-dse.c doesn't seem to handle MEM_REF with offset?
>
> VN has adjust_offsets_for_equal_base_address for this purpose. I
> agree that some common functionality like
>
> bool
> get_relative_extent_of (const ao_ref *base, const ao_ref *ref,
> poly_int64 *offset);
>
> that computes [offset, offset + ref->[max_]size] of REF adjusted as to
> make ao_ref_base have the same address (or return false if not
> possible). Then [ base->offset, base->offset + base->max_size ]
> can be compared against that.
OK, I will look into that.
> > + if (valid_ao_ref_for_dse (write)
> > + && operand_equal_p (write->base, ref->base, OEP_ADDRESS_OF)
> > + && known_eq (write->size, write->max_size)
> > + && normalize_ref (write, ref)
>
> normalize_ref alters 'write', I think we should work on a local
> copy here. See live_bytes_read which takes a copy of 'use_ref'.
We never proces same write twice (get_ao_ref is always constructing
fresh copy), so this should be safe. Or shall I turn the write
parameter to "ao_ref write" instead of "ao_ref *write" just to be sure
we do not break infuture?
Thank you,
Honza
On Tue, 16 Nov 2021, Jan Hubicka wrote:
> >
> > Not sure, tree-ssa-dse.c doesn't seem to handle MEM_REF with offset?
> >
> > VN has adjust_offsets_for_equal_base_address for this purpose. I
> > agree that some common functionality like
> >
> > bool
> > get_relative_extent_of (const ao_ref *base, const ao_ref *ref,
> > poly_int64 *offset);
> >
> > that computes [offset, offset + ref->[max_]size] of REF adjusted as to
> > make ao_ref_base have the same address (or return false if not
> > possible). Then [ base->offset, base->offset + base->max_size ]
> > can be compared against that.
>
> OK, I will look into that.
> > > + if (valid_ao_ref_for_dse (write)
> > > + && operand_equal_p (write->base, ref->base, OEP_ADDRESS_OF)
> > > + && known_eq (write->size, write->max_size)
> > > + && normalize_ref (write, ref)
> >
> > normalize_ref alters 'write', I think we should work on a local
> > copy here. See live_bytes_read which takes a copy of 'use_ref'.
>
> We never proces same write twice (get_ao_ref is always constructing
> fresh copy), so this should be safe. Or shall I turn the write
> parameter to "ao_ref write" instead of "ao_ref *write" just to be sure
> we do not break infuture?
Yes.
Thanks,
Richard.
@@ -724,6 +724,22 @@ get_modref_function_summary (cgraph_node *func)
return r;
}
+/* Get function summary for CALL if it exists, return NULL otherwise.
+ If INTERPOSED is non-NULL set it to true if call may be interposed. */
+
+modref_summary *
+get_modref_function_summary (gcall *call, bool *interposed)
+{
+ tree callee = gimple_call_fndecl (call);
+ if (!callee)
+ return NULL;
+ struct cgraph_node *node = cgraph_node::get (callee);
+ if (!node)
+ return NULL;
+ if (interposed)
+ *interposed = !node->binds_to_current_def_p ();
+ return get_modref_function_summary (node);
+}
+
namespace {
/* Construct modref_access_node from REF. */
@@ -50,6 +50,7 @@ struct GTY(()) modref_summary
};
modref_summary *get_modref_function_summary (cgraph_node *func);
+modref_summary *get_modref_function_summary (gcall *call, bool *interposed);
void ipa_modref_c_finalize ();
void ipa_merge_modref_summary_after_inlining (cgraph_edge *e);
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-dse2-details" } */
+struct a {int a,b,c;};
+__attribute__ ((noinline))
+void
+kill_me (struct a *a)
+{
+ a->a=0;
+ a->b=0;
+ a->c=0;
+}
+__attribute__ ((noinline))
+void
+my_pleasure (struct a *a)
+{
+ a->a=1;
+ a->c=2;
+}
+void
+set (struct a *a)
+{
+ kill_me (a);
+ my_pleasure (a);
+ a->b=1;
+}
+/* { dg-final { scan-tree-dump "Deleted dead store: kill_me" "dse2" } } */
@@ -209,6 +209,24 @@ normalize_ref (ao_ref *copy, ao_ref *ref)
return true;
}
+/* Update LIVE_BYTES tracking REF for write to WRITE:
+ Verify we have the same base memory address, the write
+ has a known size and overlaps with REF. */
+static void
+clear_live_bytes_for_ref (sbitmap live_bytes, ao_ref *ref, ao_ref *write)
+{
+ HOST_WIDE_INT start, size;
+
+ if (valid_ao_ref_for_dse (write)
+ && operand_equal_p (write->base, ref->base, OEP_ADDRESS_OF)
+ && known_eq (write->size, write->max_size)
+ && normalize_ref (write, ref)
+ && (write->offset - ref->offset).is_constant (&start)
+ && write->size.is_constant (&size))
+ bitmap_clear_range (live_bytes, start / BITS_PER_UNIT,
+ size / BITS_PER_UNIT);
+}
+
/* Clear any bytes written by STMT from the bitmap LIVE_BYTES. The base
address written by STMT must match the one found in REF, which must
have its base address previously initialized.
@@ -220,20 +238,21 @@ static void
clear_bytes_written_by (sbitmap live_bytes, gimple *stmt, ao_ref *ref)
{
ao_ref write;
+
+ if (gcall *call = dyn_cast <gcall *> (stmt))
+ {
+ bool interposed;
+ modref_summary *summary = get_modref_function_summary (call, &interposed);
+
+ if (summary && !interposed)
+ for (auto kill : summary->kills)
+ if (kill.get_ao_ref (as_a <gcall *> (stmt), &write))
+ clear_live_bytes_for_ref (live_bytes, ref, &write);
+ }
if (!initialize_ao_ref_for_dse (stmt, &write))
return;
- /* Verify we have the same base memory address, the write
- has a known size and overlaps with REF. */
- HOST_WIDE_INT start, size;
- if (valid_ao_ref_for_dse (&write)
- && operand_equal_p (write.base, ref->base, OEP_ADDRESS_OF)
- && known_eq (write.size, write.max_size)
- && normalize_ref (&write, ref)
- && (write.offset - ref->offset).is_constant (&start)
- && write.size.is_constant (&size))
- bitmap_clear_range (live_bytes, start / BITS_PER_UNIT,
- size / BITS_PER_UNIT);
+ clear_live_bytes_for_ref (live_bytes, ref, &write);
}
/* REF is a memory write. Extract relevant information from it and