[v3,03/21] Simplify generation of symbol whitelist regex.
Commit Message
The code to build the symbol whitelist regex uses things like seekp
and tellp to generate regexes like "^foo$|^bar$".
This patch simplifies the code, for further enhancement, resulting in
generated regexes like "^(foo|bar)$".
There should be no change in behaviour, unless whitelisted symbol
names contain special regex characters.
* include/abg-regex.h (generate_from_strings): Declare new
function to build a regex from some strings, representing a
membership test.
* src/abg-regex.cc (generate_from_strings): Implement new
function to build a regex from some strings, representing a
membership test, in a straightfoward fashion.
* src/abg-tools-utils.cc
(gen_suppr_spec_from_kernel_abi_whitelists): Replace
regex-building code with a call to generate_from_strings.
* tests/test-kmi-whitelist.cc: Update regexes in test.
Signed-off-by: Giuliano Procida <gprocida@google.com>
---
include/abg-regex.h | 6 ++++++
src/abg-regex.cc | 28 ++++++++++++++++++++++++++++
src/abg-tools-utils.cc | 10 +++-------
tests/test-kmi-whitelist.cc | 10 +++++-----
4 files changed, 42 insertions(+), 12 deletions(-)
Comments
On Fri, Apr 24, 2020 at 10:21:14AM +0100, Giuliano Procida wrote:
>The code to build the symbol whitelist regex uses things like seekp
>and tellp to generate regexes like "^foo$|^bar$".
>
>This patch simplifies the code, for further enhancement, resulting in
>generated regexes like "^(foo|bar)$".
>
>There should be no change in behaviour, unless whitelisted symbol
>names contain special regex characters.
>
> * include/abg-regex.h (generate_from_strings): Declare new
> function to build a regex from some strings, representing a
> membership test.
> * src/abg-regex.cc (generate_from_strings): Implement new
> function to build a regex from some strings, representing a
> membership test, in a straightfoward fashion.
> * src/abg-tools-utils.cc
> (gen_suppr_spec_from_kernel_abi_whitelists): Replace
> regex-building code with a call to generate_from_strings.
> * tests/test-kmi-whitelist.cc: Update regexes in test.
>
>Signed-off-by: Giuliano Procida <gprocida@google.com>
>---
> include/abg-regex.h | 6 ++++++
> src/abg-regex.cc | 28 ++++++++++++++++++++++++++++
> src/abg-tools-utils.cc | 10 +++-------
> tests/test-kmi-whitelist.cc | 10 +++++-----
> 4 files changed, 42 insertions(+), 12 deletions(-)
>
>diff --git a/include/abg-regex.h b/include/abg-regex.h
>index 84c386a9..2f638ef2 100644
>--- a/include/abg-regex.h
>+++ b/include/abg-regex.h
>@@ -27,6 +27,9 @@
>
> #include <regex.h>
>
>+#include <string>
>+#include <vector>
>+
> #include "abg-cxx-compat.h"
> #include "abg-sptr-utils.h"
>
>@@ -55,6 +58,9 @@ struct regex_t_deleter
> }
> };//end struct regex_deleter
>
>+std::string
>+generate_from_strings(const std::vector<std::string>& strs);
>+
> }// end namespace regex
>
> /// Specialization of sptr_utils::build_sptr for regex_t.
>diff --git a/src/abg-regex.cc b/src/abg-regex.cc
>index 13f5841d..79a89033 100644
>--- a/src/abg-regex.cc
>+++ b/src/abg-regex.cc
>@@ -23,6 +23,7 @@
> /// Some specialization for shared pointer utility templates.
> ///
>
>+#include <sstream>
> #include "abg-sptr-utils.h"
> #include "abg-regex.h"
>
>@@ -52,4 +53,31 @@ regex::regex_t_sptr
> sptr_utils::build_sptr<regex_t>()
> {return sptr_utils::build_sptr(new regex_t);}
>
>+namespace regex
>+{
>+
>+/// Generate a regex pattern equivalent to testing set membership.
>+///
>+/// A string will match the resulting pattern regex, if and only if it
>+/// was present in the vector.
>+///
>+/// @param strs a vector of strings
>+///
>+/// @return a regex pattern
>+std::string
>+generate_from_strings(const std::vector<std::string>& strs)
>+{
>+ if (strs.empty())
>+ return "^_^";
I am not aware of this constant. Looks like an invalid regex. Can you
document it? Or am I missing the joke? :-)
Cheers,
Matthias
>+ std::ostringstream os;
>+ std::vector<std::string>::const_iterator i = strs.begin();
>+ os << "^(" << *i++;
>+ while (i != strs.end())
>+ os << "|" << *i++;
>+ os << ")$";
>+ return os.str();
>+}
>+
>+}//end namespace regex
>+
> }//end namespace abigail
>diff --git a/src/abg-tools-utils.cc b/src/abg-tools-utils.cc
>index a06e8615..11486a21 100644
>--- a/src/abg-tools-utils.cc
>+++ b/src/abg-tools-utils.cc
>@@ -61,6 +61,8 @@
> #include "abg-dwarf-reader.h"
> #include "abg-internal.h"
> #include "abg-cxx-compat.h"
>+#include "abg-regex.h"
>+
> // <headers defining libabigail's API go under here>
> ABG_BEGIN_EXPORT_DECLARATIONS
>
>@@ -2002,13 +2004,7 @@ gen_suppr_spec_from_kernel_abi_whitelists
>
> // Build a regular expression representing the union of all
> // the function and variable names expressed in the white list.
>- std::stringstream regex_ss;
>- regex_ss << "^";
>- std::copy(whitelisted_names.begin(), whitelisted_names.end(),
>- std::ostream_iterator<std::string>(regex_ss, "$|^"));
>- regex_ss.seekp(0, std::ios::end);
>- const std::string& regex =
>- regex_ss.str().substr(0, static_cast<size_t>(regex_ss.tellp()) - 2);
>+ const std::string regex = regex::generate_from_strings(whitelisted_names);
>
> // Build a suppression specification which *keeps* functions
> // whose ELF symbols match the regular expression contained
>diff --git a/tests/test-kmi-whitelist.cc b/tests/test-kmi-whitelist.cc
>index 2aa0f463..bcc5adee 100644
>--- a/tests/test-kmi-whitelist.cc
>+++ b/tests/test-kmi-whitelist.cc
>@@ -96,7 +96,7 @@ TEST_CASE("WhitelistWithASingleEntry", "[whitelists]")
> suppressions_type suppr
> = gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> REQUIRE(!suppr.empty());
>- test_suppressions_are_consistent(suppr, "^test_symbol$");
>+ test_suppressions_are_consistent(suppr, "^(test_symbol)$");
> }
>
> TEST_CASE("WhitelistWithADuplicateEntry", "[whitelists]")
>@@ -106,7 +106,7 @@ TEST_CASE("WhitelistWithADuplicateEntry", "[whitelists]")
> suppressions_type suppr
> = gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> REQUIRE(!suppr.empty());
>- test_suppressions_are_consistent(suppr, "^test_symbol$");
>+ test_suppressions_are_consistent(suppr, "^(test_symbol)$");
> }
>
> TEST_CASE("TwoWhitelists", "[whitelists]")
>@@ -118,7 +118,7 @@ TEST_CASE("TwoWhitelists", "[whitelists]")
> gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> REQUIRE(!suppr.empty());
> test_suppressions_are_consistent(suppr,
>- "^test_another_symbol$|^test_symbol$");
>+ "^(test_another_symbol|test_symbol)$");
> }
>
> TEST_CASE("TwoWhitelistsWithDuplicates", "[whitelists]")
>@@ -130,7 +130,7 @@ TEST_CASE("TwoWhitelistsWithDuplicates", "[whitelists]")
> = gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> REQUIRE(!suppr.empty());
> test_suppressions_are_consistent(suppr,
>- "^test_another_symbol$|^test_symbol$");
>+ "^(test_another_symbol|test_symbol)$");
> }
>
> TEST_CASE("WhitelistWithTwoSections", "[whitelists]")
>@@ -140,5 +140,5 @@ TEST_CASE("WhitelistWithTwoSections", "[whitelists]")
> suppressions_type suppr
> = gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> REQUIRE(!suppr.empty());
>- test_suppressions_are_consistent(suppr, "^test_symbol1$|^test_symbol2$");
>+ test_suppressions_are_consistent(suppr, "^(test_symbol1|test_symbol2)$");
> }
>--
>2.26.2.303.gf8c07b1a785-goog
>
It is an efficient and nice-looking regex that will never match anything.
I'll add a code and a commit comment.
Giuliano.
On Mon, 27 Apr 2020 at 12:01, Matthias Maennich <maennich@google.com> wrote:
>
> On Fri, Apr 24, 2020 at 10:21:14AM +0100, Giuliano Procida wrote:
> >The code to build the symbol whitelist regex uses things like seekp
> >and tellp to generate regexes like "^foo$|^bar$".
> >
> >This patch simplifies the code, for further enhancement, resulting in
> >generated regexes like "^(foo|bar)$".
> >
> >There should be no change in behaviour, unless whitelisted symbol
> >names contain special regex characters.
> >
> > * include/abg-regex.h (generate_from_strings): Declare new
> > function to build a regex from some strings, representing a
> > membership test.
> > * src/abg-regex.cc (generate_from_strings): Implement new
> > function to build a regex from some strings, representing a
> > membership test, in a straightfoward fashion.
> > * src/abg-tools-utils.cc
> > (gen_suppr_spec_from_kernel_abi_whitelists): Replace
> > regex-building code with a call to generate_from_strings.
> > * tests/test-kmi-whitelist.cc: Update regexes in test.
> >
> >Signed-off-by: Giuliano Procida <gprocida@google.com>
> >---
> > include/abg-regex.h | 6 ++++++
> > src/abg-regex.cc | 28 ++++++++++++++++++++++++++++
> > src/abg-tools-utils.cc | 10 +++-------
> > tests/test-kmi-whitelist.cc | 10 +++++-----
> > 4 files changed, 42 insertions(+), 12 deletions(-)
> >
> >diff --git a/include/abg-regex.h b/include/abg-regex.h
> >index 84c386a9..2f638ef2 100644
> >--- a/include/abg-regex.h
> >+++ b/include/abg-regex.h
> >@@ -27,6 +27,9 @@
> >
> > #include <regex.h>
> >
> >+#include <string>
> >+#include <vector>
> >+
> > #include "abg-cxx-compat.h"
> > #include "abg-sptr-utils.h"
> >
> >@@ -55,6 +58,9 @@ struct regex_t_deleter
> > }
> > };//end struct regex_deleter
> >
> >+std::string
> >+generate_from_strings(const std::vector<std::string>& strs);
> >+
> > }// end namespace regex
> >
> > /// Specialization of sptr_utils::build_sptr for regex_t.
> >diff --git a/src/abg-regex.cc b/src/abg-regex.cc
> >index 13f5841d..79a89033 100644
> >--- a/src/abg-regex.cc
> >+++ b/src/abg-regex.cc
> >@@ -23,6 +23,7 @@
> > /// Some specialization for shared pointer utility templates.
> > ///
> >
> >+#include <sstream>
> > #include "abg-sptr-utils.h"
> > #include "abg-regex.h"
> >
> >@@ -52,4 +53,31 @@ regex::regex_t_sptr
> > sptr_utils::build_sptr<regex_t>()
> > {return sptr_utils::build_sptr(new regex_t);}
> >
> >+namespace regex
> >+{
> >+
> >+/// Generate a regex pattern equivalent to testing set membership.
> >+///
> >+/// A string will match the resulting pattern regex, if and only if it
> >+/// was present in the vector.
> >+///
> >+/// @param strs a vector of strings
> >+///
> >+/// @return a regex pattern
> >+std::string
> >+generate_from_strings(const std::vector<std::string>& strs)
> >+{
> >+ if (strs.empty())
> >+ return "^_^";
>
> I am not aware of this constant. Looks like an invalid regex. Can you
> document it? Or am I missing the joke? :-)
>
> Cheers,
> Matthias
>
> >+ std::ostringstream os;
> >+ std::vector<std::string>::const_iterator i = strs.begin();
> >+ os << "^(" << *i++;
> >+ while (i != strs.end())
> >+ os << "|" << *i++;
> >+ os << ")$";
> >+ return os.str();
> >+}
> >+
> >+}//end namespace regex
> >+
> > }//end namespace abigail
> >diff --git a/src/abg-tools-utils.cc b/src/abg-tools-utils.cc
> >index a06e8615..11486a21 100644
> >--- a/src/abg-tools-utils.cc
> >+++ b/src/abg-tools-utils.cc
> >@@ -61,6 +61,8 @@
> > #include "abg-dwarf-reader.h"
> > #include "abg-internal.h"
> > #include "abg-cxx-compat.h"
> >+#include "abg-regex.h"
> >+
> > // <headers defining libabigail's API go under here>
> > ABG_BEGIN_EXPORT_DECLARATIONS
> >
> >@@ -2002,13 +2004,7 @@ gen_suppr_spec_from_kernel_abi_whitelists
> >
> > // Build a regular expression representing the union of all
> > // the function and variable names expressed in the white list.
> >- std::stringstream regex_ss;
> >- regex_ss << "^";
> >- std::copy(whitelisted_names.begin(), whitelisted_names.end(),
> >- std::ostream_iterator<std::string>(regex_ss, "$|^"));
> >- regex_ss.seekp(0, std::ios::end);
> >- const std::string& regex =
> >- regex_ss.str().substr(0, static_cast<size_t>(regex_ss.tellp()) - 2);
> >+ const std::string regex = regex::generate_from_strings(whitelisted_names);
> >
> > // Build a suppression specification which *keeps* functions
> > // whose ELF symbols match the regular expression contained
> >diff --git a/tests/test-kmi-whitelist.cc b/tests/test-kmi-whitelist.cc
> >index 2aa0f463..bcc5adee 100644
> >--- a/tests/test-kmi-whitelist.cc
> >+++ b/tests/test-kmi-whitelist.cc
> >@@ -96,7 +96,7 @@ TEST_CASE("WhitelistWithASingleEntry", "[whitelists]")
> > suppressions_type suppr
> > = gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> > REQUIRE(!suppr.empty());
> >- test_suppressions_are_consistent(suppr, "^test_symbol$");
> >+ test_suppressions_are_consistent(suppr, "^(test_symbol)$");
> > }
> >
> > TEST_CASE("WhitelistWithADuplicateEntry", "[whitelists]")
> >@@ -106,7 +106,7 @@ TEST_CASE("WhitelistWithADuplicateEntry", "[whitelists]")
> > suppressions_type suppr
> > = gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> > REQUIRE(!suppr.empty());
> >- test_suppressions_are_consistent(suppr, "^test_symbol$");
> >+ test_suppressions_are_consistent(suppr, "^(test_symbol)$");
> > }
> >
> > TEST_CASE("TwoWhitelists", "[whitelists]")
> >@@ -118,7 +118,7 @@ TEST_CASE("TwoWhitelists", "[whitelists]")
> > gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> > REQUIRE(!suppr.empty());
> > test_suppressions_are_consistent(suppr,
> >- "^test_another_symbol$|^test_symbol$");
> >+ "^(test_another_symbol|test_symbol)$");
> > }
> >
> > TEST_CASE("TwoWhitelistsWithDuplicates", "[whitelists]")
> >@@ -130,7 +130,7 @@ TEST_CASE("TwoWhitelistsWithDuplicates", "[whitelists]")
> > = gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> > REQUIRE(!suppr.empty());
> > test_suppressions_are_consistent(suppr,
> >- "^test_another_symbol$|^test_symbol$");
> >+ "^(test_another_symbol|test_symbol)$");
> > }
> >
> > TEST_CASE("WhitelistWithTwoSections", "[whitelists]")
> >@@ -140,5 +140,5 @@ TEST_CASE("WhitelistWithTwoSections", "[whitelists]")
> > suppressions_type suppr
> > = gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
> > REQUIRE(!suppr.empty());
> >- test_suppressions_are_consistent(suppr, "^test_symbol1$|^test_symbol2$");
> >+ test_suppressions_are_consistent(suppr, "^(test_symbol1|test_symbol2)$");
> > }
> >--
> >2.26.2.303.gf8c07b1a785-goog
> >
Giuliano Procida <gprocida@google.com> a ?crit:
> The code to build the symbol whitelist regex uses things like seekp
> and tellp to generate regexes like "^foo$|^bar$".
>
> This patch simplifies the code, for further enhancement, resulting in
> generated regexes like "^(foo|bar)$".
>
> There should be no change in behaviour, unless whitelisted symbol
> names contain special regex characters.
>
> * include/abg-regex.h (generate_from_strings): Declare new
> function to build a regex from some strings, representing a
> membership test.
> * src/abg-regex.cc (generate_from_strings): Implement new
> function to build a regex from some strings, representing a
> membership test, in a straightfoward fashion.
> * src/abg-tools-utils.cc
> (gen_suppr_spec_from_kernel_abi_whitelists): Replace
> regex-building code with a call to generate_from_strings.
> * tests/test-kmi-whitelist.cc: Update regexes in test.
Applied to master, thanks!
Cheers,
@@ -27,6 +27,9 @@
#include <regex.h>
+#include <string>
+#include <vector>
+
#include "abg-cxx-compat.h"
#include "abg-sptr-utils.h"
@@ -55,6 +58,9 @@ struct regex_t_deleter
}
};//end struct regex_deleter
+std::string
+generate_from_strings(const std::vector<std::string>& strs);
+
}// end namespace regex
/// Specialization of sptr_utils::build_sptr for regex_t.
@@ -23,6 +23,7 @@
/// Some specialization for shared pointer utility templates.
///
+#include <sstream>
#include "abg-sptr-utils.h"
#include "abg-regex.h"
@@ -52,4 +53,31 @@ regex::regex_t_sptr
sptr_utils::build_sptr<regex_t>()
{return sptr_utils::build_sptr(new regex_t);}
+namespace regex
+{
+
+/// Generate a regex pattern equivalent to testing set membership.
+///
+/// A string will match the resulting pattern regex, if and only if it
+/// was present in the vector.
+///
+/// @param strs a vector of strings
+///
+/// @return a regex pattern
+std::string
+generate_from_strings(const std::vector<std::string>& strs)
+{
+ if (strs.empty())
+ return "^_^";
+ std::ostringstream os;
+ std::vector<std::string>::const_iterator i = strs.begin();
+ os << "^(" << *i++;
+ while (i != strs.end())
+ os << "|" << *i++;
+ os << ")$";
+ return os.str();
+}
+
+}//end namespace regex
+
}//end namespace abigail
@@ -61,6 +61,8 @@
#include "abg-dwarf-reader.h"
#include "abg-internal.h"
#include "abg-cxx-compat.h"
+#include "abg-regex.h"
+
// <headers defining libabigail's API go under here>
ABG_BEGIN_EXPORT_DECLARATIONS
@@ -2002,13 +2004,7 @@ gen_suppr_spec_from_kernel_abi_whitelists
// Build a regular expression representing the union of all
// the function and variable names expressed in the white list.
- std::stringstream regex_ss;
- regex_ss << "^";
- std::copy(whitelisted_names.begin(), whitelisted_names.end(),
- std::ostream_iterator<std::string>(regex_ss, "$|^"));
- regex_ss.seekp(0, std::ios::end);
- const std::string& regex =
- regex_ss.str().substr(0, static_cast<size_t>(regex_ss.tellp()) - 2);
+ const std::string regex = regex::generate_from_strings(whitelisted_names);
// Build a suppression specification which *keeps* functions
// whose ELF symbols match the regular expression contained
@@ -96,7 +96,7 @@ TEST_CASE("WhitelistWithASingleEntry", "[whitelists]")
suppressions_type suppr
= gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
REQUIRE(!suppr.empty());
- test_suppressions_are_consistent(suppr, "^test_symbol$");
+ test_suppressions_are_consistent(suppr, "^(test_symbol)$");
}
TEST_CASE("WhitelistWithADuplicateEntry", "[whitelists]")
@@ -106,7 +106,7 @@ TEST_CASE("WhitelistWithADuplicateEntry", "[whitelists]")
suppressions_type suppr
= gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
REQUIRE(!suppr.empty());
- test_suppressions_are_consistent(suppr, "^test_symbol$");
+ test_suppressions_are_consistent(suppr, "^(test_symbol)$");
}
TEST_CASE("TwoWhitelists", "[whitelists]")
@@ -118,7 +118,7 @@ TEST_CASE("TwoWhitelists", "[whitelists]")
gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
REQUIRE(!suppr.empty());
test_suppressions_are_consistent(suppr,
- "^test_another_symbol$|^test_symbol$");
+ "^(test_another_symbol|test_symbol)$");
}
TEST_CASE("TwoWhitelistsWithDuplicates", "[whitelists]")
@@ -130,7 +130,7 @@ TEST_CASE("TwoWhitelistsWithDuplicates", "[whitelists]")
= gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
REQUIRE(!suppr.empty());
test_suppressions_are_consistent(suppr,
- "^test_another_symbol$|^test_symbol$");
+ "^(test_another_symbol|test_symbol)$");
}
TEST_CASE("WhitelistWithTwoSections", "[whitelists]")
@@ -140,5 +140,5 @@ TEST_CASE("WhitelistWithTwoSections", "[whitelists]")
suppressions_type suppr
= gen_suppr_spec_from_kernel_abi_whitelists(abi_whitelist_paths);
REQUIRE(!suppr.empty());
- test_suppressions_are_consistent(suppr, "^test_symbol1$|^test_symbol2$");
+ test_suppressions_are_consistent(suppr, "^(test_symbol1|test_symbol2)$");
}