From patchwork Tue Apr 21 12:28:18 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mark Wielaard X-Patchwork-Id: 39116 From: mark@klomp.org (Mark J. Wielaard) Date: Tue, 21 Apr 2020 14:28:18 +0200 Subject: [PATCH 1/4] Add named-types-ids to use name ids after the type name instead of numbers. In-Reply-To: <20200421122821.13769-1-mark@klomp.org> References: <20200421122821.13769-1-mark@klomp.org> Message-ID: <20200421122821.13769-2-mark@klomp.org> From: Mark Wielaard To make the XML output more readable and a bit more stable generate type ids based on the underlying type name. When types are inserted, removed or rearranged the type ids will (mostly) be the same so that references can stay the same. This also makes it easier to read and compare the XML corpus representation. * doc/manuals/abidw.rst: Document --named-type-ids. * include/abg-ir.h (is_interned_string): New method. * include/abg-libxml-utils.h (replace_xml_type_string): Likewise. * include/abg-writer.h (set_named_type_ids): New function. (set_common_options): Call it. * src/abg-ir.cc (environment::is_interned_string): New method. * src/abg-libxml-utils.cc (replace_xml_type_string): New function. * src/abg-writer.cc (id_manager): Add get_id_for_type. (write_context): Add m_named_type_ids bool, get_named_type_ids and set_named_type_ids functions. (write_context::get_id_for_type): Check get_named_type_ids, use get_id_for_type. (set_named_type_ids): New function. * tools/abidw.cc (options): Add named_type_ids. (display_usage): Describe --named_type-ids. (parse_command_line): Parse --named-type-ids. Signed-off-by: Mark Wielaard --- doc/manuals/abidw.rst | 8 ++++ include/abg-ir.h | 3 ++ include/abg-libxml-utils.h | 2 + include/abg-writer.h | 4 ++ src/abg-ir.cc | 7 ++++ src/abg-libxml-utils.cc | 58 ++++++++++++++++++++++++++ src/abg-writer.cc | 85 ++++++++++++++++++++++++++++++++++++-- tools/abidw.cc | 7 +++- 8 files changed, 170 insertions(+), 4 deletions(-) diff --git a/doc/manuals/abidw.rst b/doc/manuals/abidw.rst index 6cc4693c..1e427d32 100644 --- a/doc/manuals/abidw.rst +++ b/doc/manuals/abidw.rst @@ -178,6 +178,14 @@ Options In the emitted ABI representation, do not show file, line or column where ABI artifacts are defined. + * ``--named-type-ids`` + + Without this option ids used to reference types in the XML file + use simple numbers. With this option the ids used are derived + from the type name to make it easier to see which type is + referenced and make the XML file more stable in case new types are + added (without this option that might mean all id numbers change). + * ``--check-alternate-debug-info-base-name`` <*elf-path*> diff --git a/include/abg-ir.h b/include/abg-ir.h index fda10de5..406a1719 100644 --- a/include/abg-ir.h +++ b/include/abg-ir.h @@ -209,6 +209,9 @@ public: interned_string intern(const string&) const; + bool + is_interned_string(const string&) const; + friend class class_or_union; friend class class_decl; friend class function_type; diff --git a/include/abg-libxml-utils.h b/include/abg-libxml-utils.h index 6331bde5..bd677027 100644 --- a/include/abg-libxml-utils.h +++ b/include/abg-libxml-utils.h @@ -120,6 +120,8 @@ unescape_xml_comment(const std::string& str, std::string unescape_xml_comment(const std::string& str); +std::string +replace_xml_type_string(const std::string& str); }//end namespace xml }//end namespace abigail #endif //__ABG_LIBXML_UTILS_H__ diff --git a/include/abg-writer.h b/include/abg-writer.h index ed739ef1..f1598a15 100644 --- a/include/abg-writer.h +++ b/include/abg-writer.h @@ -65,6 +65,9 @@ set_write_comp_dir(write_context& ctxt, bool flag); void set_short_locs(write_context& ctxt, bool flag); +void +set_named_type_ids(write_context& ctxt, bool flag); + /// A convenience generic function to set common options (usually used /// by Libabigail tools) from a generic options carrying-object, into /// a given @ref write_context. @@ -84,6 +87,7 @@ set_common_options(write_context& ctxt, const OPTS& opts) set_write_corpus_path(ctxt, opts.write_corpus_path); set_write_comp_dir(ctxt, opts.write_comp_dir); set_short_locs(ctxt, opts.short_locs); + set_named_type_ids(ctxt, opts.named_type_ids); } void diff --git a/src/abg-ir.cc b/src/abg-ir.cc index 27831352..f4fee60a 100644 --- a/src/abg-ir.cc +++ b/src/abg-ir.cc @@ -2884,6 +2884,13 @@ interned_string environment::intern(const string& s) const {return const_cast(this)->priv_->string_pool_.create_string(s);} +bool +environment::is_interned_string(const string& s) const +{ + const char *c = s.c_str(); + return const_cast(this)->priv_->string_pool_.has_string(c); +} + // // diff --git a/src/abg-libxml-utils.cc b/src/abg-libxml-utils.cc index 2c46aad8..e8c1c1b8 100644 --- a/src/abg-libxml-utils.cc +++ b/src/abg-libxml-utils.cc @@ -249,6 +249,64 @@ escape_xml_string(const std::string& str) return result; } +/// Replace the various special characters in a type string as used in +/// a type-id attribute. +/// +/// The characters '<', '>', ''', '"' and ' ' are all replaced by '_'. +/// The characters '&' and '*' at the end of a string are simply dropped, +/// otherwise they are also replaced by an '_'. +/// +/// The result is not reversible. +/// +//// @param str the input string to read to search for the characters +//// to replace. +//// +//// @param replaced the output string where to write the resulting +//// string that contains the replaced characters, +void +replace_xml_type_string(const std::string& str, std::string& replaced) +{ + for (std::string::const_iterator i = str.begin(); i != str.end(); ++i) + switch (*i) + { + case ' ': + case '<': + case '>': + case '"': + case '\'': + replaced += '_'; + break; + case '&': + case '*': + if (i + 1 != str.end()) + replaced += '_'; + break; + default: + replaced += *i; + } +} + +/// Replace the various special characters in a type string as used in +/// a type-id attribute. +/// +/// The characters '<', '>', ''', '"' are all replaced by '_'. +/// The character '&' is replaced by the string "-ref". +/// +/// The result is not reversible. +/// +//// @param str the input string to read to search for the characters +//// to replace. +//// +//// @return the resulting string that contains the string with the +//// replaced characters. +std::string +replace_xml_type_string(const std::string& str) +{ + std::string result; + replace_xml_type_string(str, result); + return result; +} + /// Escape the '-' character, to avoid having a '--' in a comment. /// /// The resulting entity for '-' is '-'. diff --git a/src/abg-writer.cc b/src/abg-writer.cc index 74fa1a8c..c240443c 100644 --- a/src/abg-writer.cc +++ b/src/abg-writer.cc @@ -127,6 +127,54 @@ public: ABG_ASSERT(env); return env->intern(o.str()); } + + /// Return a unique string representing a name, prefixed by a type + /// string. The returned string will be made unique by postfixing + /// underscores if necessary. + /// + /// @param type to create an unique id string for + interned_string + get_id_for_type(const type_base* type) const + { + ostringstream o; + /* Try to find an appropriate prefix. */ + if (is_type_decl(type)) + o << "type-"; + else if (is_class_type(type)) + o << "class-"; + else if (is_union_type(type)) + o << "union-"; + else if (is_enum_type(type)) + o << "enum-"; + else if (is_typedef(type)) + o << "typedef-"; + else if (is_qualified_type(type)) + o << "qual-"; + else if (is_pointer_type(type)) + o << "ptr-"; + else if (is_reference_type(type)) + o << "ref-"; + else if (is_array_type(type)) + o << "array-"; + else if (is_subrange_type(type)) + o << "subrng-"; + else if (is_function_type(type)) + o << "func-"; + else + ABG_ASSERT_NOT_REACHED; + + string name = xml::replace_xml_type_string(get_type_name(type)); + o << name; + + /* We want to make sure the id is unique. See if it is already + interned in this environment, if it is, it isn't unique and we + add some underscores to it till it is. */ + const environment* env = get_environment(); + ABG_ASSERT(env); + while (env->is_interned_string(o.str())) + o << "_"; + return env->intern(o.str()); + } }; /// A hashing functor that should be as fast as possible. @@ -174,6 +222,7 @@ class write_context bool m_write_corpus_path; bool m_write_comp_dir; bool m_short_locs; + bool m_named_type_ids; mutable type_ptr_map m_type_id_map; mutable type_ptr_set_type m_emitted_type_set; type_ptr_set_type m_emitted_decl_only_set; @@ -208,7 +257,8 @@ public: m_write_architecture(true), m_write_corpus_path(true), m_write_comp_dir(true), - m_short_locs(false) + m_short_locs(false), + m_named_type_ids(false) {} /// Getter of the environment we are operating from. @@ -306,6 +356,20 @@ public: set_short_locs(bool f) {m_short_locs = f;} + /// Getter of the named-type-ids option. + /// + /// @return true iff named type ids shall be emitted + bool + get_named_type_ids() const + {return m_named_type_ids;} + + /// Setter of the named-type-ids option + /// + /// @param f the new value of the flag. + void + set_named_type_ids(bool f) + {m_named_type_ids = f;} + /// Getter of the "show-locs" option. /// /// When this option is true then the XML writer emits location @@ -375,8 +439,11 @@ public: type_ptr_map::const_iterator it = m_type_id_map.find(c); if (it == m_type_id_map.end()) { - interned_string id = - get_id_manager().get_id_with_prefix("type-id-"); + interned_string id; + if (get_named_type_ids()) + id = get_id_manager().get_id_for_type(c); + else + id = get_id_manager().get_id_with_prefix("type-id-"); m_type_id_map[c] = id; return id; } @@ -2033,6 +2100,18 @@ void set_short_locs(write_context& ctxt, bool flag) {ctxt.set_short_locs(flag);} +/// Set the 'named-type-ids' flag. +/// +/// When this flag is set then the XML writer will emit type ids +/// based on the name of types, instead of numbered ids. +/// +/// @param ctxt the context to set this flag on to. +/// +/// @param flag the new value of the 'named-type_ids' flag. +void +set_named_type_ids(write_context& ctxt, bool flag) +{ctxt.set_named_type_ids(flag);} + /// Serialize the canonical types of a given scope. /// /// @param scope the scope to consider. diff --git a/tools/abidw.cc b/tools/abidw.cc index 72a8b0f1..7251c98d 100644 --- a/tools/abidw.cc +++ b/tools/abidw.cc @@ -111,6 +111,7 @@ struct options bool do_log; bool drop_private_types; bool drop_undefined_syms; + bool named_type_ids; options() : display_version(), @@ -130,7 +131,8 @@ struct options annotate(), do_log(), drop_private_types(false), - drop_undefined_syms(false) + drop_undefined_syms(false), + named_type_ids(false) {} ~options() @@ -164,6 +166,7 @@ display_usage(const string& prog_name, ostream& out) << " --short-locs only print filenames rather than paths\n" << " --drop-private-types drop private types from representation\n" << " --drop-undefined-syms drop undefined symbols from representation\n" + << " --named_type-ids use id attributes based on type names in XML file\n" << " --no-comp-dir-path do not show compilation path information\n" << " --check-alternate-debug-info check alternate debug info " "of \n" @@ -304,6 +307,8 @@ parse_command_line(int argc, char* argv[], options& opts) opts.drop_private_types = true; else if (!strcmp(argv[i], "--drop-undefined-syms")) opts.drop_undefined_syms = true; + else if (!strcmp(argv[i], "--named-type-ids")) + opts.named_type_ids = true; else if (!strcmp(argv[i], "--no-linux-kernel-mode")) opts.linux_kernel_mode = false; else if (!strcmp(argv[i], "--abidiff"))