diff mbox series

RFC: Uniform handling of sharing and cycles in compare_dies

Message ID	20220303131908.351487-1-gprocida@google.com
State	New
Headers	DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 56D993858D39 Date: Thu, 3 Mar 2022 13:19:08 +0000 Message-Id: <20220303131908.351487-1-gprocida@google.com> Mime-Version: 1.0 Subject: [PATCH] RFC: Uniform handling of sharing and cycles in compare_dies To: libabigail@sourceware.org Content-Type: text/plain; charset="UTF-8" Precedence: list From: Giuliano Procida via Libabigail <libabigail@sourceware.org> Reply-To: Giuliano Procida <gprocida@google.com> Cc: maennich@google.com, kernel-team@android.com Errors-To: libabigail-bounces+patchwork=sourceware.org@sourceware.org Sender: "Libabigail" <libabigail-bounces+patchwork=sourceware.org@sourceware.org>
Series	RFC: Uniform handling of sharing and cycles in compare_dies \| RFC: Uniform handling of sharing and cycles in compare_dies

Commit Message

Giuliano Procida March 3, 2022, 1:19 p.m. UTC

  As mentioned in rmail thread relating to PR26646, here is a first
attempt at using the strongly-connected component finder with
compare_dies, instead of aggregates_being_compared.

I've annotated the changes and more with notes about what's going on
and what might not be working properly.

Signed-off-by: Giuliano Procida <gprocida@google.com>
---
 src/abg-dwarf-reader.cc | 304 ++++++++++++++++++++++------------------
 src/abg-scc.h           | 129 +++++++++++++++++
 2 files changed, 298 insertions(+), 135 deletions(-)
 create mode 100644 src/abg-scc.h

diff mbox series

Patch

diff --git a/src/abg-dwarf-reader.cc b/src/abg-dwarf-reader.cc
index dc82cf3b..4339f569 100644
--- a/src/abg-dwarf-reader.cc
+++ b/src/abg-dwarf-reader.cc
@@ -44,6 +44,7 @@ 
 ABG_BEGIN_EXPORT_DECLARATIONS
 
 #include "abg-dwarf-reader.h"
+#include "abg-scc.h"
 #include "abg-sptr-utils.h"
 #include "abg-symtab-reader.h"
 #include "abg-tools-utils.h"
@@ -161,18 +162,36 @@  typedef unordered_map<GElf_Addr, elf_symbol_sptr> addr_elf_symbol_sptr_map_type;
 /// Convenience typedef for a set of ELF addresses.
 typedef unordered_set<GElf_Addr> address_set_type;
 
-/// A hasher for a pair of Dwarf_Off.  This is used as a hasher for
-/// the type @ref dwarf_offset_pair_set_type.
-struct dwarf_offset_pair_hash
+/// Unique identifier for a DWARF DIE.
+///
+/// NOTES. Offset alone is not enough to recover a DIE. Actually, offset may not
+/// uniquely specify a DIE as the same offset may appear in different DIE sources.
+struct dwarf_id
+{
+  die_source source;
+  Dwarf_Off offset;
+  bool operator==(const dwarf_id& other) const
+  {return source == other.source && offset == other.offset;}
+};// end struct dwarf_id
+
+/// A hasher for a dwarf_id.
+struct dwarf_id_hash
 {
   size_t
-  operator()(const std::pair<Dwarf_Off, Dwarf_Off>& p) const
-  {return abigail::hashing::combine_hashes(p.first, p.second);}
-};// end struct dwarf_offset_pair_hash
+  operator()(const dwarf_id& id) const
+  {return abigail::hashing::combine_hashes(id.source, id.offset);}
+};// end struct dwarf_id_hash
 
-typedef unordered_set<std::pair<Dwarf_Off,
-				Dwarf_Off>,
-		      dwarf_offset_pair_hash> dwarf_offset_pair_set_type;
+/// A hasher for a pair of dwarf_id.
+struct dwarf_id_pair_hash
+{
+  size_t
+  operator()(const std::pair<dwarf_id, dwarf_id>& p) const
+  {
+    dwarf_id_hash h;
+    return abigail::hashing::combine_hashes(h(p.first), h(p.second));
+  }
+};// end struct dwarf_id_pair_hash
 
 /// Convenience typedef for a shared pointer to an @ref address_set_type.
 typedef shared_ptr<address_set_type> address_set_sptr;
@@ -1919,6 +1938,27 @@  struct dwarf_expr_eval_context
 class read_context
 {
 public:
+  // NOTES
+  //
+  // These are mutable because otherwise I have to either adjust arguments to
+  // compare_dies or make lots of functions non-const. One of these courses of
+  // action is better than the other, but I'm unsure as to which, and both are
+  // better than using mutable.
+  //
+  // There are cases where compare_dies is called recursively via other
+  // functions and this breaks the shared aggregates_being_compared chain.
+  // Having the data structures in the read context means we can share
+  // pending_comparisons to wherever it may be needed and share
+  // completed_comparisons to the widest applicable scope.
+  //
+  // completed_comparisons is probably not needed as 1. almost all DIEs compared
+  // will be canonicalsiable and if two DIEs are found to be equivalent, their
+  // canonical DIEs should be recorded and this will short cut comparison on the
+  // next visit and 2. non-equivalent DIE comparisons are likely to be very cheap.
+  mutable std::unordered_map<std::pair<dwarf_id, dwarf_id>, bool, dwarf_id_pair_hash>
+      completed_comparisons;
+  mutable stg::SCC<std::pair<dwarf_id, dwarf_id>, dwarf_id_pair_hash>
+      pending_comparisons;
   struct options_type
   {
     environment*	env;
@@ -2215,6 +2255,9 @@  public:
 	     bool		load_all_types,
 	     bool		linux_kernel_mode)
   {
+    completed_comparisons.clear();
+    ABG_ASSERT(pending_comparisons.Empty());
+
     dwarf_version_ = 0;
     dwarf_ = 0;
     handle_.reset();
@@ -2822,6 +2865,11 @@  public:
       {
 	cur_die_offset = *o;
 	get_die_from_offset(source, cur_die_offset, &potential_canonical_die);
+	// NOTE: This recursive call to has the potential to break the
+	// invariants that the SCC finder relies on. However, it is called with
+	// update_canonical_dies_on_the_fly set to false which inhibits updating
+	// canonical DIE status. Perhaps it should also inhibit the SCC finder
+	// logic.
 	if (compare_dies(*this, &die, &potential_canonical_die,
 			 /*update_canonical_dies_on_the_fly=*/false))
 	  {
@@ -2926,6 +2974,12 @@  public:
 	cur_die_offset = *o;
 	get_die_from_offset(source, cur_die_offset, &canonical_die);
 	// compare die and canonical_die.
+	//
+	// NOTE: This recursive call to has the potential to break the
+	// invariants that the SCC finder relies on. It is also called with
+	// update_canonical_dies_on_the_fly set to true may be able to cause
+	// changes to comparison outcomes in a way which upsets SCC finding
+	// logic.
 	if (compare_dies(*this, die, &canonical_die,
 			 /*update_canonical_dies_on_the_fly=*/true))
 	  {
@@ -3046,6 +3100,12 @@  public:
 	Dwarf_Off die_offset = i->second[n];
 	get_die_from_offset(source, die_offset, &canonical_die);
 	// compare die and canonical_die.
+	//
+	// NOTE: This recursive call to has the potential to break the
+	// invariants that the SCC finder relies on. It is also called with
+	// update_canonical_dies_on_the_fly set to true may be able to cause
+	// changes to comparison outcomes in a way which upsets SCC finding
+	// logic.
 	if (compare_dies(*this, die, &canonical_die,
 			 /*update_canonical_dies_on_the_fly=*/true))
 	  {
@@ -10132,50 +10192,6 @@  fn_die_equal_by_linkage_name(const read_context &ctxt,
 	  && llinkage_name == rlinkage_name);
 }
 
-/// Test if the pair of offset {p1,p2} is present in a set.
-///
-/// @param set the set of pairs of DWARF offsets to consider.
-///
-/// @param p1 the first value of the pair.
-///
-/// @param p2 the second value of the pair.
-///
-/// @return if the pair {p1,p2} is present in the set.
-static bool
-has_offset_pair(const dwarf_offset_pair_set_type& set,
-		Dwarf_Off p1, Dwarf_Off p2)
-{
-  if (set.find(std::make_pair(p1, p2)) != set.end())
-    return true;
-  return false;
-}
-
-/// Insert a new pair of offset into the set of pair.
-///
-/// @param set the set of pairs of DWARF offsets to consider.
-///
-/// @param p1 the first value of the pair.
-///
-/// @param p2 the second value of the pair.
-static void
-insert_offset_pair(dwarf_offset_pair_set_type& set, Dwarf_Off p1, Dwarf_Off p2)
-{set.insert(std::make_pair(p1, p2));}
-
-/// Erase a pair of DWARF offset from a set of pairs.
-///
-///
-/// @param set the set of pairs of DWARF offsets to consider.
-///
-/// @param p1 the first value of the pair.
-///
-/// @param p2 the second value of the pair.
-static void
-erase_offset_pair(dwarf_offset_pair_set_type& set, Dwarf_Off p1, Dwarf_Off p2)
-{
-  std::pair<Dwarf_Off, Dwarf_Off> p(p1, p2);
-  set.erase(p);
-}
-
 /// Compare two DIEs emitted by a C compiler.
 ///
 /// @param ctxt the read context used to load the DWARF information.
@@ -10202,12 +10218,17 @@  erase_offset_pair(dwarf_offset_pair_set_type& set, Dwarf_Off p1, Dwarf_Off p2)
 static bool
 compare_dies(const read_context& ctxt,
 	     const Dwarf_Die *l, const Dwarf_Die *r,
-	     dwarf_offset_pair_set_type& aggregates_being_compared,
 	     bool update_canonical_dies_on_the_fly)
 {
   ABG_ASSERT(l);
   ABG_ASSERT(r);
 
+  // NOTES
+  //
+  // Some of work that gets done here gets duplicated at the end of the
+  // function. This is because we cannot store Dwarf_Die addresses in data
+  // structures and the end of the function may need to update multiple
+  // canonical DIE offsets.
   int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(l)),
     r_tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
 
@@ -10235,8 +10256,38 @@  compare_dies(const read_context& ctxt,
   if (l_has_canonical_die_offset && r_has_canonical_die_offset)
     return l_canonical_die_offset == r_canonical_die_offset;
 
+  // NOTE: A bit of paranoia as we'll need to do this later. If it fails now,
+  // that's an obvious issues. If it fails later, something stranger is
+  // happening.
+  Dwarf_Die junk;
+  ctxt.get_die_from_offset(l_die_source, die_offset(l), &junk);
+  ctxt.get_die_from_offset(r_die_source, die_offset(r), &junk);
+  // NOTES
+  //
+  // As mentioned elsewhere this is probably a bit redundant.
+  //
+  // In fact, we should try to maintain the invariant that if there is a record
+  // of the comparison and it's true, then the two DIEs should have equal
+  // canonical DIE offsets. At this point, we could make this cache false values
+  // only (and make it a set called unequal_comparisons) or drop it altogether.
+  auto comp = std::make_pair(dwarf_id{l_die_source, die_offset(l)},
+			     dwarf_id{r_die_source, die_offset(r)});
+  auto it = ctxt.completed_comparisons.find(comp);
+  if (it != ctxt.completed_comparisons.end())
+    return it->second;
+
+  auto handle = ctxt.pending_comparisons.Open(comp);
+  if (!handle)
+    return true;
+  // NOTES
+  //
+  // That was the first SCC helper call.
+  //
+  // This function must not return without calling Close.
+  //
+  // All recursive calls to compare_dies should happen before Close. This is not
+  // the case with calls to compute_canonical_die_offset etc.
   bool result = true;
-  bool aggregate_redundancy_detected = false;
 
   switch (l_tag)
     {
@@ -10293,7 +10344,6 @@  compare_dies(const read_context& ctxt,
 	  result = false;
 	else
 	  result = compare_dies(ctxt, &lu_type_die, &ru_type_die,
-				aggregates_being_compared,
 				update_canonical_dies_on_the_fly);
       }
       break;
@@ -10348,19 +10398,10 @@  compare_dies(const read_context& ctxt,
     case DW_TAG_structure_type:
     case DW_TAG_union_type:
       {
-	if (has_offset_pair(aggregates_being_compared,
-			    die_offset(l), die_offset(r)))
-	  {
-	    result = true;
-	    aggregate_redundancy_detected = true;
-	    break;
-	  }
-	else if (!compare_as_decl_dies(l, r) || !compare_as_type_dies(l, r))
+	if (!compare_as_decl_dies(l, r) || !compare_as_type_dies(l, r))
 	  result = false;
 	else
 	  {
-	    insert_offset_pair(aggregates_being_compared,
-			       die_offset(l), die_offset(r));
 	    Dwarf_Die l_member, r_member;
 	    bool found_l_member, found_r_member;
 	    for (found_l_member = dwarf_child(const_cast<Dwarf_Die*>(l),
@@ -10382,7 +10423,6 @@  compare_dies(const read_context& ctxt,
 		  continue;
 
 		if (!compare_dies(ctxt, &l_member, &r_member,
-				  aggregates_being_compared,
 				  update_canonical_dies_on_the_fly))
 		  {
 		    result = false;
@@ -10391,9 +10431,6 @@  compare_dies(const read_context& ctxt,
 	      }
 	    if (found_l_member != found_r_member)
 	      result = false;
-
-	    erase_offset_pair(aggregates_being_compared,
-			      die_offset(l), die_offset(r));
 	  }
       }
       break;
@@ -10415,7 +10452,6 @@  compare_dies(const read_context& ctxt,
 	    if (l_child_tag == DW_TAG_subrange_type
 		|| r_child_tag == DW_TAG_subrange_type)
 	      if (!compare_dies(ctxt, &l_child, &r_child,
-				aggregates_being_compared,
 				update_canonical_dies_on_the_fly))
 		{
 		  result = false;
@@ -10431,9 +10467,8 @@  compare_dies(const read_context& ctxt,
 	ABG_ASSERT(found_ltype && found_rtype);
 
 	if (!compare_dies(ctxt, &ltype_die, &rtype_die,
-			  aggregates_being_compared,
 			  update_canonical_dies_on_the_fly))
-	  return false;
+	  result = false;
       }
       break;
 
@@ -10478,14 +10513,7 @@  compare_dies(const read_context& ctxt,
 	interned_string ln = ctxt.get_die_pretty_type_representation(l, 0);
 	interned_string rn = ctxt.get_die_pretty_type_representation(r, 0);
 
-	if (has_offset_pair(aggregates_being_compared, die_offset(l),
-			    die_offset(r)))
-	  {
-	    result = true;
-	    aggregate_redundancy_detected = true;
-	    break;
-	  }
-	else if (l_tag == DW_TAG_subroutine_type)
+	if (l_tag == DW_TAG_subroutine_type)
 	  {
 	    // So, we are looking at types that are pointed to by a
 	    // function pointer.  These are not real concrete function
@@ -10539,7 +10567,6 @@  compare_dies(const read_context& ctxt,
 		|| (!l_return_type_is_void
 		    && !compare_dies(ctxt,
 				     &l_return_type, &r_return_type,
-				     aggregates_being_compared,
 				     update_canonical_dies_on_the_fly)))
 	      result = false;
 	    else
@@ -10561,7 +10588,6 @@  compare_dies(const read_context& ctxt,
 		    if (l_child_tag != r_child_tag
 			|| (l_child_tag == DW_TAG_formal_parameter
 			    && !compare_dies(ctxt, &l_child, &r_child,
-					     aggregates_being_compared,
 					     update_canonical_dies_on_the_fly)))
 		      {
 			result = false;
@@ -10572,9 +10598,6 @@  compare_dies(const read_context& ctxt,
 		  result = false;
 	      }
 	  }
-
-	erase_offset_pair(aggregates_being_compared,
-			  die_offset(l), die_offset(r));
       }
       break;
 
@@ -10585,7 +10608,6 @@  compare_dies(const read_context& ctxt,
 	bool r_type_is_void = !die_die_attribute(r, DW_AT_type, r_type);
 	if ((l_type_is_void != r_type_is_void)
 	    || !compare_dies(ctxt, &l_type, &r_type,
-			     aggregates_being_compared,
 			     update_canonical_dies_on_the_fly))
 	  result = false;
       }
@@ -10611,7 +10633,6 @@  compare_dies(const read_context& ctxt,
 	      ABG_ASSERT(die_die_attribute(l, DW_AT_type, l_type));
 	      ABG_ASSERT(die_die_attribute(r, DW_AT_type, r_type));
 	      if (!compare_dies(ctxt, &l_type, &r_type,
-				aggregates_being_compared,
 				update_canonical_dies_on_the_fly))
 		result = false;
 	    }
@@ -10677,60 +10698,73 @@  compare_dies(const read_context& ctxt,
       ABG_ASSERT_NOT_REACHED;
     }
 
+  // NOTES
+  //
+  // This is the second call to the SCC finder.
+  //
+  // We can only process the comparisons it gives us and if it gives us nothing,
+  // it means that the comparison requested in this instance of compare_dies is
+  // still in progress.
+  auto comparisons = ctxt.pending_comparisons.Close(handle.value());
+  // NOTE: As mentioned, may be largely redundant with canonical offsets.
+  for (const auto& comparison : comparisons)
+    ctxt.completed_comparisons[comparison] = result;
+
   if (result == true
-      && !aggregate_redundancy_detected
-      && update_canonical_dies_on_the_fly
-      && is_canonicalizeable_type_tag(l_tag))
+      && update_canonical_dies_on_the_fly) // is this still needed?
     {
-      // If 'l' has no canonical DIE and if 'r' has one, then propagage
-      // the canonical DIE of 'r' to 'l'.
-      //
-      // In case 'r' has no canonical DIE, then compute it, and then
-      // propagate that canonical DIE to 'r'.
-      const die_source l_source = ctxt.get_die_source(l);
-      const die_source r_source = ctxt.get_die_source(r);
-
-      if (!l_has_canonical_die_offset
-	  // A DIE can be equivalent only to another DIE of the same
-	  // source.
-	  && l_source == r_source)
+      for (const auto& comparison : comparisons)
 	{
-	  if (!r_has_canonical_die_offset)
-	    ctxt.compute_canonical_die_offset(r, r_canonical_die_offset,
+	  // NOTES
+	  //
+	  // We have to recover the original DIEs.
+	  //
+	  // Alas, I think this sometimes fails as there is an assertion from
+	  // get_die_from_offset.
+	  const auto& l_dwarf_id = comparison.first;
+	  const auto& r_dwarf_id = comparison.second;
+	  const die_source l_die_source = l_dwarf_id.source;
+	  const die_source r_die_source = r_dwarf_id.source;
+	  Dwarf_Off l_offset = l_dwarf_id.offset;
+	  Dwarf_Off r_offset = r_dwarf_id.offset;
+	  Dwarf_Die l_die;
+	  Dwarf_Die r_die;
+	  ctxt.get_die_from_offset(l_die_source, l_offset, &l_die);
+	  ctxt.get_die_from_offset(r_die_source, r_offset, &r_die);
+	  int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(&l_die));
+	  // NOTE: r_tag is guaranteed to be equal to l_tag at this point.
+	  if (is_canonicalizeable_type_tag(l_tag))
+	    {
+	      // NOTE: I tried various things here. This version is an attempt
+	      // to avoid calling functions (like compute_canonical_die_offset)
+	      // which can call compare_dies recursively. And this version
+	      // passes almost all of the test suite, but it is slower,
+	      // presumably because we're not actually determining canonical
+	      // offsets when we should. Doing things like the original code
+	      // causes more serious trouble though.
+	      //
+	      // If only one of l and r has an canonical DIE offset, propagate it to the other.
+	      Dwarf_Off l_canonical_die_offset = 0;
+	      Dwarf_Off r_canonical_die_offset = 0;
+	      bool l_has_canonical_die_offset =
+		  (l_canonical_die_offset =
+		   ctxt.get_canonical_die_offset(l_offset, l_die_source,
+						 /*die_as_type=*/true));
+	      bool r_has_canonical_die_offset =
+		  (r_canonical_die_offset =
+		   ctxt.get_canonical_die_offset(r_offset, r_die_source,
+						 /*die_as_type=*/true));
+	      if (l_has_canonical_die_offset < r_has_canonical_die_offset)
+		ctxt.set_canonical_die_offset(&l_die, r_canonical_die_offset,
+					      /*die_as_type=*/true);
+	      if (l_has_canonical_die_offset > r_has_canonical_die_offset)
+		ctxt.set_canonical_die_offset(&r_die, l_canonical_die_offset,
 					      /*die_as_type=*/true);
-	  ABG_ASSERT(r_canonical_die_offset);
-	  ctxt.set_canonical_die_offset(l, r_canonical_die_offset,
-					/*die_as_type=*/true);
+	    }
 	}
     }
-  return result;
-}
 
-/// Compare two DIEs emitted by a C compiler.
-///
-/// @param ctxt the read context used to load the DWARF information.
-///
-/// @param l the left-hand-side argument of this comparison operator.
-///
-/// @param r the righ-hand-side argument of this comparison operator.
-///
-/// @param update_canonical_dies_on_the_fly if yes, then this function
-/// updates the canonical DIEs of sub-type DIEs of 'l' and 'r', while
-/// comparing l and r.  This helps in making so that sub-type DIEs of
-/// 'l' and 'r' are compared structurally only once.  This is how we
-/// turn this exponential comparison problem into a problem that is a
-/// closer to a linear one.
-///
-/// @return true iff @p l equals @p r.
-static bool
-compare_dies(const read_context& ctxt,
-	     const Dwarf_Die *l,
-	     const Dwarf_Die *r,
-	     bool update_canonical_dies_on_the_fly)
-{
-  dwarf_offset_pair_set_type aggregates_being_compared;
-  return compare_dies(ctxt, l, r, aggregates_being_compared,
-		      update_canonical_dies_on_the_fly);
+  return result;
 }
 
 // ----------------------------------
diff --git a/src/abg-scc.h b/src/abg-scc.h
new file mode 100644
index 00000000..c85d792e
--- /dev/null
+++ b/src/abg-scc.h
@@ -0,0 +1,129 @@ 
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+// -*- mode: C++ -*-
+//
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions (the
+// "License"); you may not use this file except in compliance with the
+// License.  You may obtain a copy of the License at
+//
+//     https://llvm.org/LICENSE.txt
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Author: Giuliano Procida
+
+#ifndef STG_SCC_H_
+#define STG_SCC_H_
+
+#include <cstddef>
+#include <iterator>
+#include <memory>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "abg-cxx-compat.h"
+
+namespace stg {
+
+/*
+ * This is a streamlined Strongly-Connected Component finder for use with
+ * procedurally generated or explored graphs, where the nodes and edges are not
+ * known a priori.
+ *
+ * REQUIREMENTS
+ *
+ * The Node type must be copyable and have a suitable hash function.
+ *
+ * The user code must take the form of a Depth First Search which can be
+ * repeatedly invoked on unvisited nodes until the whole graph has been
+ * traversed.
+ *
+ * The user code must always follow edges to child nodes, even if it knows the
+ * node has already been visited. The SCC finder needs to know about all edges.
+ *
+ * The user code must ensure that nodes are not re-examined once they have been
+ * assigned to an SCC. The finder does not maintain any state for such nodes.
+ *
+ * GUARANTEES
+ *
+ * The SCC finder will ensure each node is examined exactly once.
+ *
+ * The SCCs will be presented in a topological order, leaves first.
+ *
+ * Note that within each SCC, nodes will be presented in DFS traversal order,
+ * roots first. However, this is just an implementation detail, not a guarantee.
+ *
+ * USAGE
+ *
+ * Before examining a node, check it's not been assigned to an SCC already and
+ * then call Open. If the node is already "open" (i.e., is already waiting to be
+ * assigned to an SCC), this will return an empty optional value and the node
+ * should not be examined. If Open succeeds, a numerical node handle will be
+ * returned and the node will be recorded as waiting to be assigned to an SCC.
+ *
+ * Now examine the node, making recursive calls to follow edges to other nodes.
+ * Information about the node can be stored provisionally, but must NOT be used
+ * to make decisions about whether to revisit it - that is Open's job.
+ *
+ * Once the examination is done, call Close, passing in the handle. If the node
+ * has been identified as the "root" of an SCC, the whole SCC will be returned
+ * as a vector of nodes. If any processing needs to be done (such as recording
+ * the nodes as visited), this should be done now. Otherwise, an empty vector
+ * will be returned.
+ *
+ * After a top-level DFS has completed, the SCC finder should be carrying no
+ * state. This can be verified by calling Empty.
+ */
+template <typename Node, typename Hash = std::hash<Node>>
+class SCC {
+ public:
+  bool Empty() const {
+    return open_.empty() && is_open_.empty() && root_index_.empty();
+  }
+
+  abg_compat::optional<size_t> Open(const Node& node) {
+    // Insertion will fail if the node is already open.
+    const auto insertion = is_open_.insert({node, is_open_.size()});
+    const auto inserted = insertion.second;
+    const auto ix = insertion.first->second;
+    if (!inserted) {
+      // Pop indices to nodes which cannot be the root of their SCC.
+      while (root_index_.back() > ix)
+        root_index_.pop_back();
+      return {};
+    }
+    // Unvisited, record open node and record root index.
+    open_.push_back(node);
+    root_index_.push_back(ix);
+    return {ix};
+  }
+
+  std::vector<Node> Close(size_t ix) {
+    std::vector<Node> scc;
+    ABG_ASSERT(ix < open_.size());
+    if (ix == root_index_.back()) {
+      // Close SCC.
+      for (size_t o = ix; o < open_.size(); ++o)
+        is_open_.erase(open_[o]);
+      std::move(open_.begin() + ix, open_.end(), std::back_inserter(scc));
+      open_.resize(ix);
+      root_index_.pop_back();
+    }
+    return scc;
+  }
+
+ private:
+  std::vector<Node> open_;  // index to node
+  std::unordered_map<Node, size_t, Hash> is_open_;  // node to index
+  std::vector<size_t> root_index_;
+};
+
+}  // namespace stg
+
+#endif  // STG_SCC_H_