From 2c0d243b0c092585561c732bac490700f41001fb Mon Sep 17 00:00:00 2001
From: Feng Xue <fxue@os.amperecomputing.com>
Date: Mon, 6 Sep 2021 20:34:50 +0800
Subject: [PATCH 2/2] WPD: Enable whole program devirtualization at LTRANS
Whole program assumption would not hold when WPA splits whole compilation
into more than one LTRANS partitions. To avoid information lost for WPD
at LTRANS, we will record all vtable nodes and related member function
references into each partition.
2021-09-07 Feng Xue <fxue@os.amperecomputing.com>
gcc/
* tree.h (TYPE_CXX_LOCAL): New macro for type using
base.nothrow_flag.
* tree-core.h (tree_base): Update comment on using
base.nothrow_flag to represent TYPE_CXX_LOCAL.
* ipa-devirt.c (odr_type_d::whole_program_local): Removed.
(odr_type_d::whole_program_local_p): Check TYPE_CXX_LOCAL flag
on type, and enable WPD at LTRANS when flag_devirtualize_fully
is true.
(get_odr_type): Remove setting whole_program_local flag on type.
(identify_whole_program_local_types): Replace whole_program_local
in odr_type_d by TYPE_CXX_LOCAL on type.
(maybe_record_node): Enable WPD at LTRANS when
flag_devirtualize_fully is true.
* ipa.c (can_remove_vtable_if_no_refs_p): Retain vtables at LTRANS
stage under full devirtualization.
* lto-cgraph.c (compute_ltrans_boundary): Add all defined vtables
to boundary of each LTRANS partition.
* lto-streamer-out.c (get_symbol_initial_value): Streaming out
initial value of vtable even its class is optimized away.
* lto-streamer-in.c (lto_input_tree): There might be more than
one decls in dref_queue, register debuginfo for all of them.
* lto-lang.c (lto_post_options): Disable full devirtualization
if flag_ltrans_devirtualize is false.
* tree-streamer-in.c (unpack_ts_base_value_fields): unpack value
of TYPE_CXX_LOCAL for a type from streaming data.
* tree-streamer-out.c (pack_ts_base_value_fields): pack value
ofTYPE_CXX_LOCAL for a type into streaming data.
temp
---
gcc/ipa-devirt.c | 29 ++++++++++++++++++-----------
gcc/ipa.c | 7 ++++++-
gcc/lto-cgraph.c | 18 ++++++++++++++++++
gcc/lto-streamer-in.c | 3 +--
gcc/lto-streamer-out.c | 12 +++++++++++-
gcc/lto/lto-lang.c | 6 ++++++
gcc/tree-core.h | 3 +++
gcc/tree-streamer-in.c | 11 ++++++++---
gcc/tree-streamer-out.c | 11 ++++++++---
gcc/tree.h | 5 +++++
10 files changed, 84 insertions(+), 21 deletions(-)
@@ -216,8 +216,6 @@ struct GTY(()) odr_type_d
int id;
/* Is it in anonymous namespace? */
bool anonymous_namespace;
- /* Set when type is not used outside of program. */
- bool whole_program_local;
/* Did we report ODR violation here? */
bool odr_violated;
/* Set when virtual table without RTTI prevailed table with. */
@@ -290,10 +288,18 @@ get_type_vtable (tree type)
bool
odr_type_d::whole_program_local_p ()
{
- if (flag_ltrans)
+ if (flag_ltrans && !flag_devirtualize_fully)
return false;
- return whole_program_local;
+ if (in_lto_p)
+ return TYPE_CXX_LOCAL (type);
+
+ /* Although a local class is always considered as whole program local in
+ LGEN stage, but may not in LTO stage if multiple duplicated primary
+ vtables are attached to the class due to C++ privatizing via -fno-weak.
+ Thus, we can not set TYPE_CXX_LOCAL flag for local class at LGEN stage
+ when building ORD type. */
+ return anonymous_namespace || decl_function_context (TYPE_NAME (type));
}
/* Return TRUE if ODR type may have any instance. */
@@ -2007,11 +2013,6 @@ get_odr_type (tree type, bool insert)
else
val->anonymous_namespace = 0;
- if (!in_lto_p
- && (val->anonymous_namespace
- || decl_function_context (TYPE_NAME (type))))
- val->whole_program_local = true;
-
build_bases = COMPLETE_TYPE_P (val->type);
insert_to_odr_array = true;
*slot = val;
@@ -2607,9 +2608,15 @@ identify_whole_program_local_types (void)
if (multi_vtable_p)
continue;
+
+ /* Mark all equivalent types in the ODR type as whole program local,
+ because representative type of the ODR type at LTRANS might not
+ be the one at WPA. */
+ FOR_EACH_VEC_ELT (*(type->types), i, equiv_type)
+ TYPE_CXX_LOCAL (equiv_type) = 1;
}
- type->whole_program_local = true;
+ TYPE_CXX_LOCAL (type->type) = 1;
}
delete no_rtti_files;
@@ -2726,7 +2733,7 @@ maybe_record_node (vec <cgraph_node *> &nodes,
Currently we ignore these functions in speculative devirtualization.
??? Maybe it would make sense to be more aggressive for LTO even
elsewhere. */
- if (!flag_ltrans
+ if ((!flag_ltrans || flag_devirtualize_fully)
&& !pure_virtual
&& type_in_anonymous_namespace_p (DECL_CONTEXT (target))
&& (!target_node
@@ -268,7 +268,7 @@ can_remove_vtable_if_no_refs_p (varpool_node *vnode)
if (!flag_devirtualize_fully)
return true;
- if (DECL_EXTERNAL (vnode->decl))
+ if (DECL_EXTERNAL (vnode->decl) && !vnode->in_other_partition)
return true;
/* We will force generating vtables in LGEN stage even they are "unused",
@@ -276,6 +276,11 @@ can_remove_vtable_if_no_refs_p (varpool_node *vnode)
if (!in_lto_p && flag_generate_lto)
return false;
+ /* All vtables seen at LTRANS stage are result of dead class elimination
+ at WPA, no need to prune them further. */
+ if (flag_ltrans)
+ return false;
+
return true;
}
@@ -859,6 +859,24 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder)
lto_set_symtab_encoder_encode_initializer (encoder, vnode);
create_references (encoder, vnode);
}
+
+ /* Add all defined vtables to streaming encoder, so that we could
+ reconstruct whole type inheritance graph in LTRANS as what it was in
+ WPA to enable late full devirtualization.
+
+ TODO: For a partition, only need to encode a subset of vtables, not
+ all of them. */
+ if (flag_devirtualize_fully && flag_ltrans_devirtualize && flag_wpa)
+ {
+ varpool_node *vnode;
+
+ FOR_EACH_DEFINED_VARIABLE (vnode)
+ if (DECL_VIRTUAL_P (vnode->decl) && !DECL_EXTERNAL (vnode->decl)
+ && TYPE_CXX_LOCAL (DECL_CONTEXT (vnode->decl))
+ && lto_symtab_encoder_lookup (encoder, vnode) == LCC_NOT_FOUND)
+ lto_symtab_encoder_encode (encoder, vnode);
+ }
+
/* Pickle in also the initializer of all referenced readonly variables
to help folding. Constant pool variables are not shared, so we must
pickle those too. */
@@ -1905,11 +1905,10 @@ lto_input_tree (class lto_input_block *ib, class data_in *data_in)
}
tree t = lto_input_tree_1 (ib, data_in, tag, 0);
- if (!dref_queue.is_empty ())
+ while (!dref_queue.is_empty ())
{
dref_entry e = dref_queue.pop ();
debug_hooks->register_external_die (e.decl, e.sym, e.off);
- gcc_checking_assert (dref_queue.is_empty ());
}
return t;
}
@@ -437,7 +437,17 @@ get_symbol_initial_value (lto_symtab_encoder_t encoder, tree expr)
scalar values. */
if (!(vnode = varpool_node::get (expr))
|| !lto_symtab_encoder_encode_initializer_p (encoder, vnode))
- initial = error_mark_node;
+ {
+ /* Even when a class is optimized away, devirtualization at LTRANS
+ still needs to extract addresses of member virtual functions from
+ initial value of vtable. */
+ if (flag_devirtualize_fully && flag_ltrans_devirtualize
+ && flag_wpa && DECL_VIRTUAL_P (expr) && !DECL_EXTERNAL (expr))
+ return initial;
+
+ initial = error_mark_node;
+ }
+
if (initial != error_mark_node)
{
long max_size = 30;
@@ -870,6 +870,12 @@ lto_post_options (const char **pfilename ATTRIBUTE_UNUSED)
/* During LTRANS, we are not looking at the whole program, only
a subset of the whole callgraph. */
flag_whole_program = 0;
+
+ /* Since full devirtualization at LTRANS requires streaming extra data
+ on vtables, it only takes effect when flag_ltrans_devirtualize is
+ also on. */
+ if (!flag_ltrans_devirtualize)
+ flag_devirtualize_fully = 0;
}
if (flag_wpa)
@@ -1328,6 +1328,9 @@ struct GTY(()) tree_base {
DECL_NONALIASED in
VAR_DECL
+ TYPE_CXX_LOCAL in
+ all types
+
deprecated_flag:
TREE_DEPRECATED in
@@ -129,10 +129,15 @@ unpack_ts_base_value_fields (struct bitpack_d *bp, tree expr)
bp_unpack_value (bp, 1);
TREE_ASM_WRITTEN (expr) = (unsigned) bp_unpack_value (bp, 1);
if (TYPE_P (expr))
- TYPE_ARTIFICIAL (expr) = (unsigned) bp_unpack_value (bp, 1);
+ {
+ TYPE_ARTIFICIAL (expr) = (unsigned) bp_unpack_value (bp, 1);
+ TYPE_CXX_LOCAL (expr) = (unsigned) bp_unpack_value (bp, 1);
+ }
else
- TREE_NO_WARNING (expr) = (unsigned) bp_unpack_value (bp, 1);
- TREE_NOTHROW (expr) = (unsigned) bp_unpack_value (bp, 1);
+ {
+ TREE_NO_WARNING (expr) = (unsigned) bp_unpack_value (bp, 1);
+ TREE_NOTHROW (expr) = (unsigned) bp_unpack_value (bp, 1);
+ }
TREE_STATIC (expr) = (unsigned) bp_unpack_value (bp, 1);
if (TREE_CODE (expr) != TREE_BINFO)
TREE_PRIVATE (expr) = (unsigned) bp_unpack_value (bp, 1);
@@ -102,10 +102,15 @@ pack_ts_base_value_fields (struct bitpack_d *bp, tree expr)
bp_pack_value (bp, (TREE_CODE (expr) != SSA_NAME
? 0 : TREE_ASM_WRITTEN (expr)), 1);
if (TYPE_P (expr))
- bp_pack_value (bp, TYPE_ARTIFICIAL (expr), 1);
+ {
+ bp_pack_value (bp, TYPE_ARTIFICIAL (expr), 1);
+ bp_pack_value (bp, TYPE_CXX_LOCAL (expr), 1);
+ }
else
- bp_pack_value (bp, TREE_NO_WARNING (expr), 1);
- bp_pack_value (bp, TREE_NOTHROW (expr), 1);
+ {
+ bp_pack_value (bp, TREE_NO_WARNING (expr), 1);
+ bp_pack_value (bp, TREE_NOTHROW (expr), 1);
+ }
bp_pack_value (bp, TREE_STATIC (expr), 1);
if (TREE_CODE (expr) != TREE_BINFO)
bp_pack_value (bp, TREE_PRIVATE (expr), 1);
@@ -2254,6 +2254,11 @@ extern tree vector_element_bits_tree (const_tree);
#define TYPE_FINAL_P(NODE) \
(RECORD_OR_UNION_CHECK (NODE)->base.default_def_flag)
+/* If nonzero, it indicates that a C++ type (mainly polymorphic class) may
+ be referenced outside the program being compiled. This is used in C++
+ devirtualization. */
+#define TYPE_CXX_LOCAL(NODE) (TYPE_CHECK (NODE)->base.nothrow_flag)
+
/* The debug output functions use the symtab union field to store
information specific to the debugging format. The different debug
output hooks store different types in the union field. These three
--
2.17.1