From patchwork Tue Apr 21 12:28:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark Wielaard X-Patchwork-Id: 39116 From: mark@klomp.org (Mark J. Wielaard) Date: Tue, 21 Apr 2020 14:28:18 +0200 Subject: [PATCH 1/4] Add named-types-ids to use name ids after the type name instead of numbers. In-Reply-To: <20200421122821.13769-1-mark@klomp.org> References: <20200421122821.13769-1-mark@klomp.org> Message-ID: <20200421122821.13769-2-mark@klomp.org> From: Mark Wielaard To make the XML output more readable and a bit more stable generate type ids based on the underlying type name. When types are inserted, removed or rearranged the type ids will (mostly) be the same so that references can stay the same. This also makes it easier to read and compare the XML corpus representation. * doc/manuals/abidw.rst: Document --named-type-ids. * include/abg-ir.h (is_interned_string): New method. * include/abg-libxml-utils.h (replace_xml_type_string): Likewise. * include/abg-writer.h (set_named_type_ids): New function. (set_common_options): Call it. * src/abg-ir.cc (environment::is_interned_string): New method. * src/abg-libxml-utils.cc (replace_xml_type_string): New function. * src/abg-writer.cc (id_manager): Add get_id_for_type. (write_context): Add m_named_type_ids bool, get_named_type_ids and set_named_type_ids functions. (write_context::get_id_for_type): Check get_named_type_ids, use get_id_for_type. (set_named_type_ids): New function. * tools/abidw.cc (options): Add named_type_ids. (display_usage): Describe --named_type-ids. (parse_command_line): Parse --named-type-ids. Signed-off-by: Mark Wielaard --- doc/manuals/abidw.rst | 8 ++++ include/abg-ir.h | 3 ++ include/abg-libxml-utils.h | 2 + include/abg-writer.h | 4 ++ src/abg-ir.cc | 7 ++++ src/abg-libxml-utils.cc | 58 ++++++++++++++++++++++++++ src/abg-writer.cc | 85 ++++++++++++++++++++++++++++++++++++-- tools/abidw.cc | 7 +++- 8 files changed, 170 insertions(+), 4 deletions(-) diff --git a/doc/manuals/abidw.rst b/doc/manuals/abidw.rst index 6cc4693c..1e427d32 100644 --- a/doc/manuals/abidw.rst +++ b/doc/manuals/abidw.rst @@ -178,6 +178,14 @@ Options In the emitted ABI representation, do not show file, line or column where ABI artifacts are defined. + * ``--named-type-ids`` + + Without this option ids used to reference types in the XML file + use simple numbers. With this option the ids used are derived + from the type name to make it easier to see which type is + referenced and make the XML file more stable in case new types are + added (without this option that might mean all id numbers change). + * ``--check-alternate-debug-info-base-name`` <*elf-path*> diff --git a/include/abg-ir.h b/include/abg-ir.h index fda10de5..406a1719 100644 --- a/include/abg-ir.h +++ b/include/abg-ir.h @@ -209,6 +209,9 @@ public: interned_string intern(const string&) const; + bool + is_interned_string(const string&) const; + friend class class_or_union; friend class class_decl; friend class function_type; diff --git a/include/abg-libxml-utils.h b/include/abg-libxml-utils.h index 6331bde5..bd677027 100644 --- a/include/abg-libxml-utils.h +++ b/include/abg-libxml-utils.h @@ -120,6 +120,8 @@ unescape_xml_comment(const std::string& str, std::string unescape_xml_comment(const std::string& str); +std::string +replace_xml_type_string(const std::string& str); }//end namespace xml }//end namespace abigail #endif //__ABG_LIBXML_UTILS_H__ diff --git a/include/abg-writer.h b/include/abg-writer.h index ed739ef1..f1598a15 100644 --- a/include/abg-writer.h +++ b/include/abg-writer.h @@ -65,6 +65,9 @@ set_write_comp_dir(write_context& ctxt, bool flag); void set_short_locs(write_context& ctxt, bool flag); +void +set_named_type_ids(write_context& ctxt, bool flag); + /// A convenience generic function to set common options (usually used /// by Libabigail tools) from a generic options carrying-object, into /// a given @ref write_context. @@ -84,6 +87,7 @@ set_common_options(write_context& ctxt, const OPTS& opts) set_write_corpus_path(ctxt, opts.write_corpus_path); set_write_comp_dir(ctxt, opts.write_comp_dir); set_short_locs(ctxt, opts.short_locs); + set_named_type_ids(ctxt, opts.named_type_ids); } void diff --git a/src/abg-ir.cc b/src/abg-ir.cc index 27831352..f4fee60a 100644 --- a/src/abg-ir.cc +++ b/src/abg-ir.cc @@ -2884,6 +2884,13 @@ interned_string environment::intern(const string& s) const {return const_cast(this)->priv_->string_pool_.create_string(s);} +bool +environment::is_interned_string(const string& s) const +{ + const char *c = s.c_str(); + return const_cast(this)->priv_->string_pool_.has_string(c); +} + // // diff --git a/src/abg-libxml-utils.cc b/src/abg-libxml-utils.cc index 2c46aad8..e8c1c1b8 100644 --- a/src/abg-libxml-utils.cc +++ b/src/abg-libxml-utils.cc @@ -249,6 +249,64 @@ escape_xml_string(const std::string& str) return result; } +/// Replace the various special characters in a type string as used in +/// a type-id attribute. +/// +/// The characters '<', '>', ''', '"' and ' ' are all replaced by '_'. +/// The characters '&' and '*' at the end of a string are simply dropped, +/// otherwise they are also replaced by an '_'. +/// +/// The result is not reversible. +/// +//// @param str the input string to read to search for the characters +//// to replace. +//// +//// @param replaced the output string where to write the resulting +//// string that contains the replaced characters, +void +replace_xml_type_string(const std::string& str, std::string& replaced) +{ + for (std::string::const_iterator i = str.begin(); i != str.end(); ++i) + switch (*i) + { + case ' ': + case '<': + case '>': + case '"': + case '\'': + replaced += '_'; + break; + case '&': + case '*': + if (i + 1 != str.end()) + replaced += '_'; + break; + default: + replaced += *i; + } +} + +/// Replace the various special characters in a type string as used in +/// a type-id attribute. +/// +/// The characters '<', '>', ''', '"' are all replaced by '_'. +/// The character '&' is replaced by the string "-ref". +/// +/// The result is not reversible. +/// +//// @param str the input string to read to search for the characters +//// to replace. +//// +//// @return the resulting string that contains the string with the +//// replaced characters. +std::string +replace_xml_type_string(const std::string& str) +{ + std::string result; + replace_xml_type_string(str, result); + return result; +} + /// Escape the '-' character, to avoid having a '--' in a comment. /// /// The resulting entity for '-' is '-'. diff --git a/src/abg-writer.cc b/src/abg-writer.cc index 74fa1a8c..c240443c 100644 --- a/src/abg-writer.cc +++ b/src/abg-writer.cc @@ -127,6 +127,54 @@ public: ABG_ASSERT(env); return env->intern(o.str()); } + + /// Return a unique string representing a name, prefixed by a type + /// string. The returned string will be made unique by postfixing + /// underscores if necessary. + /// + /// @param type to create an unique id string for + interned_string + get_id_for_type(const type_base* type) const + { + ostringstream o; + /* Try to find an appropriate prefix. */ + if (is_type_decl(type)) + o << "type-"; + else if (is_class_type(type)) + o << "class-"; + else if (is_union_type(type)) + o << "union-"; + else if (is_enum_type(type)) + o << "enum-"; + else if (is_typedef(type)) + o << "typedef-"; + else if (is_qualified_type(type)) + o << "qual-"; + else if (is_pointer_type(type)) + o << "ptr-"; + else if (is_reference_type(type)) + o << "ref-"; + else if (is_array_type(type)) + o << "array-"; + else if (is_subrange_type(type)) + o << "subrng-"; + else if (is_function_type(type)) + o << "func-"; + else + ABG_ASSERT_NOT_REACHED; + + string name = xml::replace_xml_type_string(get_type_name(type)); + o << name; + + /* We want to make sure the id is unique. See if it is already + interned in this environment, if it is, it isn't unique and we + add some underscores to it till it is. */ + const environment* env = get_environment(); + ABG_ASSERT(env); + while (env->is_interned_string(o.str())) + o << "_"; + return env->intern(o.str()); + } }; /// A hashing functor that should be as fast as possible. @@ -174,6 +222,7 @@ class write_context bool m_write_corpus_path; bool m_write_comp_dir; bool m_short_locs; + bool m_named_type_ids; mutable type_ptr_map m_type_id_map; mutable type_ptr_set_type m_emitted_type_set; type_ptr_set_type m_emitted_decl_only_set; @@ -208,7 +257,8 @@ public: m_write_architecture(true), m_write_corpus_path(true), m_write_comp_dir(true), - m_short_locs(false) + m_short_locs(false), + m_named_type_ids(false) {} /// Getter of the environment we are operating from. @@ -306,6 +356,20 @@ public: set_short_locs(bool f) {m_short_locs = f;} + /// Getter of the named-type-ids option. + /// + /// @return true iff named type ids shall be emitted + bool + get_named_type_ids() const + {return m_named_type_ids;} + + /// Setter of the named-type-ids option + /// + /// @param f the new value of the flag. + void + set_named_type_ids(bool f) + {m_named_type_ids = f;} + /// Getter of the "show-locs" option. /// /// When this option is true then the XML writer emits location @@ -375,8 +439,11 @@ public: type_ptr_map::const_iterator it = m_type_id_map.find(c); if (it == m_type_id_map.end()) { - interned_string id = - get_id_manager().get_id_with_prefix("type-id-"); + interned_string id; + if (get_named_type_ids()) + id = get_id_manager().get_id_for_type(c); + else + id = get_id_manager().get_id_with_prefix("type-id-"); m_type_id_map[c] = id; return id; } @@ -2033,6 +2100,18 @@ void set_short_locs(write_context& ctxt, bool flag) {ctxt.set_short_locs(flag);} +/// Set the 'named-type-ids' flag. +/// +/// When this flag is set then the XML writer will emit type ids +/// based on the name of types, instead of numbered ids. +/// +/// @param ctxt the context to set this flag on to. +/// +/// @param flag the new value of the 'named-type_ids' flag. +void +set_named_type_ids(write_context& ctxt, bool flag) +{ctxt.set_named_type_ids(flag);} + /// Serialize the canonical types of a given scope. /// /// @param scope the scope to consider. diff --git a/tools/abidw.cc b/tools/abidw.cc index 72a8b0f1..7251c98d 100644 --- a/tools/abidw.cc +++ b/tools/abidw.cc @@ -111,6 +111,7 @@ struct options bool do_log; bool drop_private_types; bool drop_undefined_syms; + bool named_type_ids; options() : display_version(), @@ -130,7 +131,8 @@ struct options annotate(), do_log(), drop_private_types(false), - drop_undefined_syms(false) + drop_undefined_syms(false), + named_type_ids(false) {} ~options() @@ -164,6 +166,7 @@ display_usage(const string& prog_name, ostream& out) << " --short-locs only print filenames rather than paths\n" << " --drop-private-types drop private types from representation\n" << " --drop-undefined-syms drop undefined symbols from representation\n" + << " --named_type-ids use id attributes based on type names in XML file\n" << " --no-comp-dir-path do not show compilation path information\n" << " --check-alternate-debug-info check alternate debug info " "of \n" @@ -304,6 +307,8 @@ parse_command_line(int argc, char* argv[], options& opts) opts.drop_private_types = true; else if (!strcmp(argv[i], "--drop-undefined-syms")) opts.drop_undefined_syms = true; + else if (!strcmp(argv[i], "--named-type-ids")) + opts.named_type_ids = true; else if (!strcmp(argv[i], "--no-linux-kernel-mode")) opts.linux_kernel_mode = false; else if (!strcmp(argv[i], "--abidiff")) From patchwork Tue Apr 21 12:28:19 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark Wielaard X-Patchwork-Id: 39117 From: mark@klomp.org (Mark J. Wielaard) Date: Tue, 21 Apr 2020 14:28:19 +0200 Subject: [PATCH 2/4] Add no-parameter-names to drop function parameter names. In-Reply-To: <20200421122821.13769-1-mark@klomp.org> References: <20200421122821.13769-1-mark@klomp.org> Message-ID: <20200421122821.13769-3-mark@klomp.org> From: Mark Wielaard The function parameter names are not relevant for the (exported) ABI. So provide an option to simply drop them from the corpus. * doc/manuals/abidw.rst: Add documentation for --no-parameter-names. * include/abg-writer.h (set_write_parameter_names): New function. (set_write_parameter_names): Call it. * src/abg-writer.cc (write_context): Add m_write_parameter_names bool, get_write_parameter_names and set_write_parameter_names functions. (write_context::write_function_decl): Check write_parameter_names. * tools/abidw.cc (options): Add write_parameter_names. (display_usage): Describe --no-parameter-names. (parse_command_line): Parse --no-parameter-names. Signed-off-by: Mark Wielaard --- doc/manuals/abidw.rst | 5 +++++ include/abg-writer.h | 4 ++++ src/abg-writer.cc | 30 +++++++++++++++++++++++++++++- tools/abidw.cc | 5 +++++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/doc/manuals/abidw.rst b/doc/manuals/abidw.rst index 1e427d32..dd72d149 100644 --- a/doc/manuals/abidw.rst +++ b/doc/manuals/abidw.rst @@ -178,6 +178,11 @@ Options In the emitted ABI representation, do not show file, line or column where ABI artifacts are defined. + * ``--no-parameter-names`` + + In the emitted ABI representation, do not show names of function + parameters, just the types. + * ``--named-type-ids`` Without this option ids used to reference types in the XML file diff --git a/include/abg-writer.h b/include/abg-writer.h index f1598a15..71b7efe6 100644 --- a/include/abg-writer.h +++ b/include/abg-writer.h @@ -68,6 +68,9 @@ set_short_locs(write_context& ctxt, bool flag); void set_named_type_ids(write_context& ctxt, bool flag); +void +set_write_parameter_names(write_context& ctxt, bool flag); + /// A convenience generic function to set common options (usually used /// by Libabigail tools) from a generic options carrying-object, into /// a given @ref write_context. @@ -86,6 +89,7 @@ set_common_options(write_context& ctxt, const OPTS& opts) set_write_architecture(ctxt, opts.write_architecture); set_write_corpus_path(ctxt, opts.write_corpus_path); set_write_comp_dir(ctxt, opts.write_comp_dir); + set_write_parameter_names(ctxt, opts.write_parameter_names); set_short_locs(ctxt, opts.short_locs); set_named_type_ids(ctxt, opts.named_type_ids); } diff --git a/src/abg-writer.cc b/src/abg-writer.cc index c240443c..6c57166f 100644 --- a/src/abg-writer.cc +++ b/src/abg-writer.cc @@ -221,6 +221,7 @@ class write_context bool m_write_architecture; bool m_write_corpus_path; bool m_write_comp_dir; + bool m_write_parameter_names; bool m_short_locs; bool m_named_type_ids; mutable type_ptr_map m_type_id_map; @@ -257,6 +258,7 @@ public: m_write_architecture(true), m_write_corpus_path(true), m_write_comp_dir(true), + m_write_parameter_names(true), m_short_locs(false), m_named_type_ids(false) {} @@ -370,6 +372,20 @@ public: set_named_type_ids(bool f) {m_named_type_ids = f;} + /// Getter of the parameter-names option. + /// + /// @return true iff parameter names shall be emitted + bool + get_write_parameter_names() const + {return m_write_parameter_names;} + + /// Setter of the parameter-names option + /// + /// @param f the new value of the flag. + void + set_write_parameter_names(bool f) + {m_write_parameter_names = f;} + /// Getter of the "show-locs" option. /// /// When this option is true then the XML writer emits location @@ -2112,6 +2128,18 @@ void set_named_type_ids(write_context& ctxt, bool flag) {ctxt.set_named_type_ids(flag);} +/// Set the 'parameter-names' flag. +/// +/// When this flag is set then the XML writer will emit the names of +/// function parameters. +/// +/// @param ctxt the context to set this flag on to. +/// +/// @param flag the new value of the 'parameter-names' flag. +void +set_write_parameter_names(write_context& ctxt, bool flag) +{ctxt.set_write_parameter_names(flag);} + /// Serialize the canonical types of a given scope. /// /// @param scope the scope to consider. @@ -3225,7 +3253,7 @@ write_function_decl(const function_decl_sptr& decl, write_context& ctxt, << "'"; ctxt.record_type_as_referenced(parm_type); - if (!(*pi)->get_name().empty()) + if (ctxt.get_write_parameter_names() && !(*pi)->get_name().empty()) o << " name='" << (*pi)->get_name() << "'"; } write_is_artificial(*pi, o); diff --git a/tools/abidw.cc b/tools/abidw.cc index 7251c98d..510a0707 100644 --- a/tools/abidw.cc +++ b/tools/abidw.cc @@ -99,6 +99,7 @@ struct options bool write_architecture; bool write_corpus_path; bool write_comp_dir; + bool write_parameter_names; bool short_locs; bool load_all_types; bool linux_kernel_mode; @@ -120,6 +121,7 @@ struct options write_architecture(true), write_corpus_path(true), write_comp_dir(true), + write_parameter_names(true), short_locs(false), load_all_types(), linux_kernel_mode(true), @@ -168,6 +170,7 @@ display_usage(const string& prog_name, ostream& out) << " --drop-undefined-syms drop undefined symbols from representation\n" << " --named_type-ids use id attributes based on type names in XML file\n" << " --no-comp-dir-path do not show compilation path information\n" + << " --no-parameter-names do not show names of function parameters\n" << " --check-alternate-debug-info check alternate debug info " "of \n" << " --check-alternate-debug-info-base-name check alternate " @@ -288,6 +291,8 @@ parse_command_line(int argc, char* argv[], options& opts) opts.short_locs = true; else if (!strcmp(argv[i], "--no-comp-dir-path")) opts.write_comp_dir = false; + else if (!strcmp(argv[i], "--no-parameter-names")) + opts.write_parameter_names = false; else if (!strcmp(argv[i], "--check-alternate-debug-info") || !strcmp(argv[i], "--check-alternate-debug-info-base-name")) { From patchwork Tue Apr 21 12:28:20 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark Wielaard X-Patchwork-Id: 39118 From: mark@klomp.org (Mark J. Wielaard) Date: Tue, 21 Apr 2020 14:28:20 +0200 Subject: [PATCH 3/4] Add --no-elf-needed option to drop DT_NEEDED list from corpus. In-Reply-To: <20200421122821.13769-1-mark@klomp.org> References: <20200421122821.13769-1-mark@klomp.org> Message-ID: <20200421122821.13769-4-mark@klomp.org> From: Mark Wielaard The elf-needed list is not relevant for the exported ABI of a library so provide an option to drop it. * doc/manuals/abidw.rst: Document --no-elf-needed. * include/abg-writer.h (set_write_elf_needed): New function. (set_common_options): Call it. * src/abg-writer.cc (write_context): Add m_write_elf_needed bool, get_write_elf_needed and set_write_elf_needed methods. (set_write_elf_needed): New function. (write_context::write_corpus): Check write_elf_needed. * tools/abidw.cc (options): Add write_elf_needed bool. (display_usage): Describe --no-elf-needed. (parse_command_line): Parse --no-elf-needed. Signed-off-by: Mark Wielaard --- doc/manuals/abidw.rst | 5 +++++ include/abg-writer.h | 4 ++++ src/abg-writer.cc | 30 +++++++++++++++++++++++++++++- tools/abidw.cc | 5 +++++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/doc/manuals/abidw.rst b/doc/manuals/abidw.rst index dd72d149..7ae44737 100644 --- a/doc/manuals/abidw.rst +++ b/doc/manuals/abidw.rst @@ -146,6 +146,11 @@ Options representation build by Libabigail to represent the ABI and will not end up in the abi XML file. + * ``--no-elf-needed`` + + Do not include the list of DT_NEEDED dependency names in the + corpus. + * ``--drop-undefined-syms`` With this option functions or variables for which the (exported) diff --git a/include/abg-writer.h b/include/abg-writer.h index 71b7efe6..bece11e6 100644 --- a/include/abg-writer.h +++ b/include/abg-writer.h @@ -62,6 +62,9 @@ set_write_corpus_path(write_context& ctxt, bool flag); void set_write_comp_dir(write_context& ctxt, bool flag); +void +set_write_elf_needed(write_context& ctxt, bool flag); + void set_short_locs(write_context& ctxt, bool flag); @@ -89,6 +92,7 @@ set_common_options(write_context& ctxt, const OPTS& opts) set_write_architecture(ctxt, opts.write_architecture); set_write_corpus_path(ctxt, opts.write_corpus_path); set_write_comp_dir(ctxt, opts.write_comp_dir); + set_write_elf_needed(ctxt, opts.write_elf_needed); set_write_parameter_names(ctxt, opts.write_parameter_names); set_short_locs(ctxt, opts.short_locs); set_named_type_ids(ctxt, opts.named_type_ids); diff --git a/src/abg-writer.cc b/src/abg-writer.cc index 6c57166f..ccfb3f63 100644 --- a/src/abg-writer.cc +++ b/src/abg-writer.cc @@ -221,6 +221,7 @@ class write_context bool m_write_architecture; bool m_write_corpus_path; bool m_write_comp_dir; + bool m_write_elf_needed; bool m_write_parameter_names; bool m_short_locs; bool m_named_type_ids; @@ -258,6 +259,7 @@ public: m_write_architecture(true), m_write_corpus_path(true), m_write_comp_dir(true), + m_write_elf_needed(true), m_write_parameter_names(true), m_short_locs(false), m_named_type_ids(false) @@ -316,6 +318,20 @@ public: set_write_architecture(bool f) {m_write_architecture = f;} + /// Getter of the elf-needed option. + /// + /// @return true iff elf needed information shall be emitted + bool + get_write_elf_needed() + {return m_write_elf_needed;} + + /// Setter of the elf-needed option. + /// + /// @param f the new value of the flag. + void + set_write_elf_needed(bool f) + {m_write_elf_needed = f;} + /// Getter of the write-corpus-path option. /// /// @return true iff corpus-path information shall be emitted @@ -2140,6 +2156,18 @@ void set_write_parameter_names(write_context& ctxt, bool flag) {ctxt.set_write_parameter_names(flag);} +/// Set the 'elf-needed' flag. +/// +/// When this flag is set then the XML writer will emit corpus +/// get_needed() (DT_NEEDED) information. +/// +/// @param ctxt the context to set this flag on to. +/// +/// @param flag the new value of the 'elf-needed' flag. +void +set_write_elf_needed(write_context& ctxt, bool flag) +{ctxt.set_write_elf_needed(flag);} + /// Serialize the canonical types of a given scope. /// /// @param scope the scope to consider. @@ -4529,7 +4557,7 @@ write_corpus(write_context& ctxt, // Write the list of needed corpora. - if (!corpus->get_needed().empty()) + if (ctxt.get_write_elf_needed () && !corpus->get_needed().empty()) { do_indent_to_level(ctxt, indent, 1); out << "\n"; diff --git a/tools/abidw.cc b/tools/abidw.cc index 510a0707..3f4b3f42 100644 --- a/tools/abidw.cc +++ b/tools/abidw.cc @@ -99,6 +99,7 @@ struct options bool write_architecture; bool write_corpus_path; bool write_comp_dir; + bool write_elf_needed; bool write_parameter_names; bool short_locs; bool load_all_types; @@ -121,6 +122,7 @@ struct options write_architecture(true), write_corpus_path(true), write_comp_dir(true), + write_elf_needed(true), write_parameter_names(true), short_locs(false), load_all_types(), @@ -170,6 +172,7 @@ display_usage(const string& prog_name, ostream& out) << " --drop-undefined-syms drop undefined symbols from representation\n" << " --named_type-ids use id attributes based on type names in XML file\n" << " --no-comp-dir-path do not show compilation path information\n" + << " --no-elf-needed do not show the DT_NEEDED information\n" << " --no-parameter-names do not show names of function parameters\n" << " --check-alternate-debug-info check alternate debug info " "of \n" @@ -291,6 +294,8 @@ parse_command_line(int argc, char* argv[], options& opts) opts.short_locs = true; else if (!strcmp(argv[i], "--no-comp-dir-path")) opts.write_comp_dir = false; + else if (!strcmp(argv[i], "--no-elf-needed")) + opts.write_elf_needed = false; else if (!strcmp(argv[i], "--no-parameter-names")) opts.write_parameter_names = false; else if (!strcmp(argv[i], "--check-alternate-debug-info") From patchwork Tue Apr 21 12:28:21 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark Wielaard X-Patchwork-Id: 39119 From: mark@klomp.org (Mark J. Wielaard) Date: Tue, 21 Apr 2020 14:28:21 +0200 Subject: [PATCH 4/4] Add --merge-translation-units option. In-Reply-To: <20200421122821.13769-1-mark@klomp.org> References: <20200421122821.13769-1-mark@klomp.org> Message-ID: <20200421122821.13769-5-mark@klomp.org> From: Mark Wielaard It is not always necessary to know in which translation unit a type, variable or function was defined first. Provide an option to simply merge all translation units for the same language (and address size). This also reduces the size of the XML representation of the corpus by ~10% on a simple C only library. Currently only implemented for the DWARF reader and abidw tool. * doc/manuals/abidw.rst: Document --merge-translation-units. * include/abg-dwarf-reader.h (set_merge_translation_units): New function. * src/abg-dwarf-reader.cc (read_context): Add merge_translation_units_ bool, merge_translation_units and merge_translation_units methods. (set_merge_translation_units): New function. (read_context::build_translation_unit_and_add_to_ir): Check merge_translation_units to see how to find an existing (compatible) translation_unit to use. Drop path and compilation_dir when merging translation_units. * src/abg-reader.cc (read_context): Add m_merge_translation_units bool, merge_translation_units and merge_translation_units methods. * tools/abidw.cc (options): Add merge_translation_units bool. (display_usage): Describe --merge-translation-units. (parse_command_line): Parse --merge-translation-units. (main): Call set_merge_translation_units. Signed-off-by: Mark Wielaard --- doc/manuals/abidw.rst | 7 ++++ include/abg-dwarf-reader.h | 4 ++ src/abg-dwarf-reader.cc | 85 +++++++++++++++++++++++++++++++------- src/abg-reader.cc | 20 ++++++++- tools/abidw.cc | 6 +++ 5 files changed, 105 insertions(+), 17 deletions(-) diff --git a/doc/manuals/abidw.rst b/doc/manuals/abidw.rst index 7ae44737..256c4ad6 100644 --- a/doc/manuals/abidw.rst +++ b/doc/manuals/abidw.rst @@ -158,6 +158,13 @@ Options representation build by Libabigail to represent the ABI and will not end up in the abi XML file. + * ``--merge-translation-units`` + + With this option translation units for the same language (and + address size) will be merged together as if the functions, + variables and types were all defined together. Note that this + also drops the compilation paths used. + * ``--no-linux-kernel-mode`` Without this option, if abipkgiff detects that the binaries it is diff --git a/include/abg-dwarf-reader.h b/include/abg-dwarf-reader.h index d0329aed..430cc222 100644 --- a/include/abg-dwarf-reader.h +++ b/include/abg-dwarf-reader.h @@ -192,6 +192,10 @@ void set_drop_undefined_syms(read_context& ctxt, bool f); +void +set_merge_translation_units(read_context& ctxt, + bool f); + void set_do_log(read_context& ctxt, bool f); diff --git a/src/abg-dwarf-reader.cc b/src/abg-dwarf-reader.cc index 1c0d6ea0..f7cbe6a5 100644 --- a/src/abg-dwarf-reader.cc +++ b/src/abg-dwarf-reader.cc @@ -3260,6 +3260,7 @@ public: corpus::exported_decls_builder* exported_decls_builder_; options_type options_; bool drop_undefined_syms_; + bool merge_translation_units_; read_context(); public: @@ -3429,6 +3430,7 @@ public: options_.load_in_linux_kernel_mode = linux_kernel_mode; options_.load_all_types = load_all_types; drop_undefined_syms_ = false; + merge_translation_units_ = false; load_in_linux_kernel_mode(linux_kernel_mode); } @@ -3537,6 +3539,22 @@ public: drop_undefined_syms(bool f) {drop_undefined_syms_ = f;} + /// Setter for the flag that tells us if we are merging translation + /// units. + /// + /// @param f the new value of the flag. + void + merge_translation_units(bool f) + {merge_translation_units_ = f;} + + /// Getter for the flag that tells us if we are merging translation + /// units. + /// + /// @return true iff we are merging translation units. + bool + merge_translation_units() const + {return merge_translation_units_;} + /// Getter of the suppression specifications to be used during /// ELF/DWARF parsing. /// @@ -9487,6 +9505,17 @@ void set_drop_undefined_syms(read_context& ctxt, bool f) {ctxt.drop_undefined_syms(f);} +/// Setter of the "merge_translation_units" flag. +/// +/// This flag tells if we should merge translation units. +/// +/// @param ctxt the read context to consider for this flag. +/// +/// @param f the value of the flag. +void +set_merge_translation_units(read_context& ctxt, bool f) +{ctxt.merge_translation_units(f);} + /// Setter of the "do_log" flag. /// /// This flag tells if we should emit verbose logs for various @@ -14267,28 +14296,52 @@ build_translation_unit_and_add_to_ir(read_context& ctxt, string path = die_string_attribute(die, DW_AT_name); string compilation_dir = die_string_attribute(die, DW_AT_comp_dir); - // See if the same translation unit exits already in the current - // corpus. Sometimes, the same translation unit can be present - // several times in the same debug info. The content of the - // different instances of the translation unit are different. So to - // represent that, we are going to re-use the same translation - // unit. That is, it's going to be the union of all the translation - // units of the same path. - { - string abs_path = compilation_dir + "/" + path; - result = ctxt.current_corpus()->find_translation_unit(abs_path); - } + uint64_t lang = 0; + die_unsigned_constant_attribute(die, DW_AT_language, lang); + translation_unit::language language = dwarf_language_to_tu_language(lang); + + corpus_sptr corp = ctxt.current_corpus(); + + if (ctxt.merge_translation_units()) + { + // See if there is already a translation for the address_size + // and language. If so, just reuse that one. + for (translation_units::const_iterator tu = + corp->get_translation_units().begin(); + tu != corp->get_translation_units().end(); + ++tu) + { + if ((*tu)->get_address_size() == address_size + && (*tu)->get_language() == language) + { + result = (*tu); + break; + } + } + } + else + { + // See if the same translation unit exits already in the current + // corpus. Sometimes, the same translation unit can be present + // several times in the same debug info. The content of the + // different instances of the translation unit are different. So to + // represent that, we are going to re-use the same translation + // unit. That is, it's going to be the union of all the translation + // units of the same path. + string abs_path = compilation_dir + "/" + path; + result = corp->find_translation_unit(abs_path); + } if (!result) { result.reset(new translation_unit(ctxt.env(), - path, + (ctxt.merge_translation_units() + ? "" : path), address_size)); - result->set_compilation_dir_path(compilation_dir); + if (!ctxt.merge_translation_units()) + result->set_compilation_dir_path(compilation_dir); ctxt.current_corpus()->add(result); - uint64_t l = 0; - die_unsigned_constant_attribute(die, DW_AT_language, l); - result->set_language(dwarf_language_to_tu_language(l)); + result->set_language(language); } ctxt.cur_transl_unit(result); diff --git a/src/abg-reader.cc b/src/abg-reader.cc index dcaa27e1..7f9374c5 100644 --- a/src/abg-reader.cc +++ b/src/abg-reader.cc @@ -132,6 +132,7 @@ private: suppr::suppressions_type m_supprs; bool m_tracking_non_reachable_types; bool m_drop_undefined_syms; + bool m_merge_translation_units; read_context(); @@ -143,7 +144,8 @@ public: m_corp_node(), m_exported_decls_builder(), m_tracking_non_reachable_types(), - m_drop_undefined_syms() + m_drop_undefined_syms(), + m_merge_translation_units() {} /// Getter for the flag that tells us if we are tracking types that @@ -181,6 +183,22 @@ public: drop_undefined_syms(bool f) {m_drop_undefined_syms = f;} + /// Getter for the flag that tells us if we are merging translation + /// units. + /// + /// @return true iff we are merging translation units. + bool + merge_translation_units() const + {return m_merge_translation_units;} + + /// Setter for the flag that tells us if we are merging translation + /// units. + /// + /// @param f the new value of the flag. + void + merge_translation_units(bool f) + {m_merge_translation_units = f;} + /// Getter of the path to the ABI file. /// /// @return the path to the native xml abi file. diff --git a/tools/abidw.cc b/tools/abidw.cc index 3f4b3f42..87248a2c 100644 --- a/tools/abidw.cc +++ b/tools/abidw.cc @@ -113,6 +113,7 @@ struct options bool do_log; bool drop_private_types; bool drop_undefined_syms; + bool merge_translation_units; bool named_type_ids; options() @@ -136,6 +137,7 @@ struct options do_log(), drop_private_types(false), drop_undefined_syms(false), + merge_translation_units(false), named_type_ids(false) {} @@ -170,6 +172,7 @@ display_usage(const string& prog_name, ostream& out) << " --short-locs only print filenames rather than paths\n" << " --drop-private-types drop private types from representation\n" << " --drop-undefined-syms drop undefined symbols from representation\n" + << " --merge-translation-units merge translation units for same language\n" << " --named_type-ids use id attributes based on type names in XML file\n" << " --no-comp-dir-path do not show compilation path information\n" << " --no-elf-needed do not show the DT_NEEDED information\n" @@ -317,6 +320,8 @@ parse_command_line(int argc, char* argv[], options& opts) opts.drop_private_types = true; else if (!strcmp(argv[i], "--drop-undefined-syms")) opts.drop_undefined_syms = true; + else if (!strcmp(argv[i], "--merge-translation-units")) + opts.merge_translation_units = true; else if (!strcmp(argv[i], "--named-type-ids")) opts.named_type_ids = true; else if (!strcmp(argv[i], "--no-linux-kernel-mode")) @@ -806,6 +811,7 @@ main(int argc, char* argv[]) opts.linux_kernel_mode); read_context& ctxt = *c; set_drop_undefined_syms(ctxt, opts.drop_undefined_syms); + set_merge_translation_units(ctxt, opts.merge_translation_units); set_show_stats(ctxt, opts.show_stats); set_suppressions(ctxt, opts); abigail::dwarf_reader::set_do_log(ctxt, opts.do_log);