new file mode 100644
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// -*- mode: C++ -*-
+//
+// Copyright (C) 2020 Google, Inc.
+//
+// Author: Giuliano Procida
+
+#ifndef __ABG_SCC_H__
+#define __ABG_SCC_H__
+
+#include <cassert>
+#include <cstdint>
+#include <optional>
+#include <unordered_map>
+#include <vector>
+
+namespace abigail {
+
+/*
+ * This is a streamlined Strongly-Connected Component finder for use with
+ * procedurally generated or explored graphs, where the nodes and edges are not
+ * known a priori.
+ *
+ * REQUIREMENTS
+ *
+ * The Node type must be copyable and have a suitable hash function.
+ *
+ * The user code must take the form of a Depth First Search which can be
+ * repeatedly invoked on unvisited nodes until the whole graph has been
+ * traversed.
+ *
+ * The user code must always follow edges to child nodes, even if it knows the
+ * node has already been visited. The SCC finder needs to know about all edges.
+ *
+ * The user code must ensure that nodes are not re-examined once they have been
+ * assigned to an SCC. The finder does not maintain any state for such nodes.
+ *
+ * GUARANTEES
+ *
+ * The SCC finder will ensure each node is examined exactly once.
+ *
+ * The SCCs will be presented in a topological order, leaves first.
+ *
+ * Note that within each SCC, nodes will be presented in DFS traversal order,
+ * roots first. However, this is just an implemention detail, not a guarantee.
+ *
+ * USAGE
+ *
+ * Before examining a node, check it's not been assigned to an SCC already and
+ * then call Open. If the node is already "open" (i.e., is already waiting to be
+ * assigned to an SCC), this will return an empty optional value and the node
+ * should not be examined. If Open succeeds, a numerical node handle will be
+ * returned and the node will be recorded as waiting to be assigned to an SCC.
+ *
+ * Now examine the node, making recursive calls to follow edges to other nodes.
+ * Infomation about the node can be stored provisionally, but must NOT be used
+ * to make decisions about whether to revisit it - that is Open's job.
+ *
+ * Once the examination is done, call Close, passing in the handle. If the node
+ * has been identified as the "root" of an SCC, the whole SCC will be returned
+ * as a vector of nodes. If any processing needs to be done (such as recording
+ * the nodes as visited), this should be done now. Otherwise, an empty vector
+ * will be returned.
+ */
+template<typename Node, typename Hash = std::hash<Node>> class SCC {
+ public:
+ ~SCC() {
+ assert(open_.empty());
+ assert(is_open_.empty());
+ assert(root_index_.empty());
+ }
+
+ std::optional<size_t> Open(const Node &node) {
+ // Insertion will fail if the node is already open.
+ auto [it, inserted] = is_open_.insert({node, is_open_.size()});
+ auto ix = it->second;
+ if (!inserted) {
+ // Pop indices to nodes which cannot be the root of their SCC.
+ while (root_index_.back() > ix)
+ root_index_.pop_back();
+ return {};
+ }
+ // Unvisited, record open node and record root index.
+ open_.push_back(node);
+ root_index_.push_back(ix);
+ return ix;
+ }
+
+ std::vector<Node> Close(size_t ix) {
+ std::vector<Node> scc;
+ assert(ix < open_.size());
+ if (ix == root_index_.back()) {
+ // Close SCC.
+ for (size_t o = ix; o < open_.size(); ++o)
+ is_open_.erase(open_[o]);
+ std::move(open_.begin() + ix, open_.end(), std::back_inserter(scc));
+ open_.resize(ix);
+ root_index_.pop_back();
+ }
+ return scc;
+ }
+
+ private:
+ std::vector<Node> open_; // index to node
+ std::unordered_map<Node, size_t, Hash> is_open_; // node to index
+ std::vector<size_t> root_index_;
+};
+
+} // namespace abigail
+
+#endif // __ABG_SCC_H__
@@ -53,6 +53,10 @@ else
TESTS += runtestdefaultsupprs.py
endif
+if ENABLE_CXX17
+TESTS += runtestscc
+endif
+
EXTRA_DIST = \
runtestcanonicalizetypes.sh.in \
runtestfedabipkgdiff.py.in \
@@ -166,6 +170,9 @@ testdiff2_LDADD=$(top_builddir)/src/libabigail.la
printdifftree_SOURCES = print-diff-tree.cc
printdifftree_LDADD = $(top_builddir)/src/libabigail.la
+runtestscc_SOURCES = test-scc.cc
+runtestscc_LDADD = libcatch.la
+
runtestslowselfcompare_sh_SOURCES =
runtestslowselfcompare.sh$(EXEEXT):
new file mode 100644
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// -*- mode: C++ -*-
+//
+// Copyright (C) 2020 Google, Inc.
+//
+// Author: Giuliano Procida
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <iostream>
+#include <map>
+#include <random>
+#include <set>
+#include <sstream>
+#include <vector>
+
+#include "lib/catch.hpp"
+
+#include "abg-scc.h"
+
+namespace Test {
+
+using abigail::SCC;
+
+// Nodes are [0, N), the sets are the out-edges.
+typedef std::vector<std::set<size_t>> Graph;
+
+std::ostream &operator<<(std::ostream &os, const Graph &g) {
+ for (size_t i = 0; i < g.size(); ++i) {
+ os << i << ':';
+ for (auto o : g[i])
+ os << ' ' << o;
+ os << '\n';
+ }
+ return os;
+}
+
+template<typename G> Graph invent(size_t n, G& gen) {
+ Graph graph(n);
+ std::uniform_int_distribution<int> toss(0, 1);
+ for (auto &node : graph) {
+ for (size_t o = 0; o < n; ++o) {
+ if (toss(gen))
+ node.insert(o);
+ }
+ }
+ return graph;
+}
+
+// Generate a graph g' where a -> b iff a and b are strongly connected in g.
+Graph symmetric_subset_of_reflexive_transitive_closure(Graph g) {
+ const size_t n = g.size();
+ // transitive closure using Floyd-Warshall
+ for (size_t o = 0; o < n; ++o)
+ g[o].insert(o);
+ for (size_t k = 0; k < n; ++k) {
+ for (size_t i = 0; i < n; ++i) {
+ for (size_t j = 0; j < n; ++j) {
+ if (!g[i].count(j) && g[i].count(k) && g[k].count(j))
+ g[i].insert(j);
+ }
+ }
+ }
+ // now a -> b iff a has path to b in g
+ for (size_t i = 0; i < n; ++i) {
+ for (size_t j = i + 1; j < n; ++j) {
+ // discard a -> b if not b -> a and vice versa
+ auto ij = g[i].count(j);
+ auto ji = g[j].count(i);
+ if (ij < ji) g[j].erase(i);
+ if (ji < ij) g[i].erase(j);
+ }
+ }
+ // now have a -> b iff a has path to b and b has path to a in g
+ return g;
+}
+
+// Generate a graph where a -> b iff a and b are in the same SCC.
+Graph scc_strong_connectivity(const std::vector<std::set<size_t>> &sccs) {
+ size_t n = 0;
+ std::map<size_t, const std::set<size_t> *> edges;
+ for (const auto &scc : sccs) {
+ for (auto o : scc) {
+ if (o >= n)
+ n = o + 1;
+ edges[o] = &scc;
+ }
+ }
+ Graph g(n);
+ for (size_t o = 0; o < n; ++o)
+ g[o] = *edges[o];
+ return g;
+}
+
+void dfs(std::set<size_t> &visited, SCC<size_t> &scc,
+ const Graph &g, size_t node,
+ std::vector<std::set<size_t>> &sccs) {
+ if (visited.count(node))
+ return;
+ auto handle = scc.Open(node);
+ if (!handle)
+ return;
+ for (auto o : g[node])
+ dfs(visited, scc, g, o, sccs);
+ auto nodes = scc.Close(handle.value());
+ if (!nodes.empty()) {
+ std::set<size_t> scc_set;
+ for (auto o : nodes) {
+ CHECK(visited.insert(o).second);
+ CHECK(scc_set.insert(o).second);
+ }
+ sccs.push_back(scc_set);
+ }
+}
+
+void process(const Graph &g) {
+ const size_t n = g.size();
+
+ // find SCCs
+ std::set<size_t> visited;
+ SCC<size_t> scc;
+ std::vector<std::set<size_t>> sccs;
+ for (size_t o = 0; o < n; ++o)
+ dfs(visited, scc, g, o, sccs);
+
+ // check partition and topological order properties
+ std::set<size_t> seen;
+ for (const auto &nodes : sccs) {
+ CHECK(!nodes.empty());
+ for (auto node : nodes) {
+ // value in range [0, n)
+ CHECK(node < n);
+ // value seen at most once
+ CHECK(seen.insert(node).second);
+ }
+ for (auto node : nodes) {
+ for (auto o : g[node]) {
+ // edges point to nodes in this or earlier SCCs
+ CHECK(seen.count(o));
+ }
+ }
+ }
+ // exactly n values seen
+ CHECK(seen.size() == n);
+
+ // check strong connectivity
+ auto g_scc_closure = scc_strong_connectivity(sccs);
+ auto g_closure = symmetric_subset_of_reflexive_transitive_closure(g);
+ // catch isn't printing nicely
+ if (g_scc_closure != g_closure) {
+ std::cerr << "original:\n" << g
+ << "SCC finder:\n" << g_scc_closure
+ << "SCCs independently:\n" << g_closure;
+ }
+ CHECK(g_scc_closure == g_closure);
+}
+
+TEST_CASE("randomly-generated graphs") {
+ std::mt19937 gen;
+ auto seed = gen();
+ // NOTES:
+ // Graphs of size 6 are plenty big enough to shake out bugs.
+ // There are O(2^k^2) possible directed graphs of size k.
+ // Testing costs are O(k^3) so we restrict accordingly.
+ uint64_t budget = 10000;
+ for (size_t k = 0; k < 7; ++k) {
+ uint64_t count = std::min(static_cast<uint64_t>(1) << (k*k),
+ budget / (k ? k * k * k : 1));
+ INFO("testing with " << count << " graphs of size " << k);
+ for (uint64_t n = 0; n < count; ++n, ++seed) {
+ gen.seed(seed);
+ Graph g = invent(k, gen);
+ std::ostringstream os;
+ os << "a graph of " << k << " nodes generated using seed " << seed;
+ GIVEN(os.str()) {
+ process(g);
+ }
+ }
+ }
+}
+
+} // namespace Test