[RFC,7/9] Add pass to eliminate duplicate member-type fragments

Message ID 20210325215146.3597963-8-gprocida@google.com
State Superseded, archived
Headers
Series Utility to manipulate ABI XML |

Commit Message

Giuliano Procida March 25, 2021, 9:51 p.m. UTC
  One common kind of duplicate type in ABI XML occurs when the XNL
writer emits a type declaration with its surrounding scope but
that scope actually includes one or more user-defined types.

An additional complication is that a nested member type's access can
be misapplied to its containing type.

The types themselves are later emitted in full and with correct member
access specifiers.

	* scripts/abitidy.pl (sub_tree): New function to determine if
	one XML node is a sub-tree of another (in the sense that the
	former can be overlaid without changing the latter), ignoring
	possibly-incorrect access attributes.
	(eliminate_duplicate_types): New function that checks nodes
	with the same type id, determines if there is a maximal node
	and, if so, remmoves the other nodes.

Signed-off-by: Giuliano Procida <gprocida@google.com>
---
 scripts/abitidy.pl | 112 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 111 insertions(+), 1 deletion(-)
  

Patch

diff --git a/scripts/abitidy.pl b/scripts/abitidy.pl
index d5ddd7ea..35b3c054 100755
--- a/scripts/abitidy.pl
+++ b/scripts/abitidy.pl
@@ -12,9 +12,11 @@  use experimental 'signatures';
 
 use autodie;
 
+use Algorithm::Diff qw(diff);
 use Data::Dumper;
 use Getopt::Long;
 use IO::File;
+use List::Util qw(any all uniq);
 use XML::LibXML;
 
 # Overview of ABI XML elements and their roles
@@ -376,6 +378,108 @@  sub normalise_anonymous_type_names($) {
   }
 }
 
+sub sub_tree;
+sub sub_tree($l, $r) {
+  # Node name must be an exact match.
+  return 0 if $l->getName() ne $r->getName();
+  # Left attributes may be missing on the left, but otherwise must match.
+  # With one exception: libabigail emits the access specifier for the
+  # type it's trying to "emit in scope" rather than what may be a
+  # containing type; so allow access to differ on member-type nodes.
+  for my $k (keys %$l) {
+    my $lv = $l->{$k};
+    my $rv = $r->{$k};
+    return 0 unless defined $rv;
+    next if $k eq 'access' && $l->getName() eq 'member-type';
+    return 0 unless $lv eq $rv;
+  }
+  # Left elements must be a subsequence of right elements.
+  my @lc = grep { $_->nodeType != XML_COMMENT_NODE } $l->childNodes();
+  my @rc = grep { $_->nodeType != XML_COMMENT_NODE } $r->childNodes();
+  unless (scalar @lc <= scalar @rc) {
+    return 0;
+  }
+  # Only handle three forms of subsequence. Otherwise need some kind
+  # of backtracking check.
+  unless (@lc) {
+    # empty
+    return 1;
+  }
+  if (scalar @lc == 1) {
+    # singleton
+    return any { $_ } map { sub_tree($lc[0], $_) } @rc;
+  }
+  if (scalar @lc == scalar @rc) {
+    # same length
+    return all { $_ } map { sub_tree($lc[$_], $rc[$_]) } (0..$#lc);
+  }
+  warn "XML elements have interestingly different subelements\n",
+    $l->toString(), "\n", $r->toString(), "\n";
+  return 0;
+}
+
+sub get_scope($node) {
+  my @names;
+  while (1) {
+    $node = $node->parentNode;
+    last unless defined $node && $node->nodeType == XML_ELEMENT_NODE;
+    my $kind = $node->getName();
+    last if $kind eq 'abi-instr';
+    # Some C++ enums introduce scope but we don't need to worry.
+    next unless $kind eq 'class-decl' || $kind eq 'union-decl' || $kind eq 'namespace-decl';
+    # Anonymous type are not permitted to contain type members (but
+    # this still works with g++ -fpermissive).
+    my $name = $node->getAttribute('name');
+    unshift @names, $name;
+  }
+  return @names;
+}
+
+sub eliminate_duplicate_types($dom) {
+  my %hash;
+  # Collect all (unnested) types and their namespace scopes.
+  for my $type ($dom->findnodes('//abi-corpus/abi-instr/*[@id] | //abi-corpus/abi-instr//namespace-decl/*[@id]')) {
+    my $id = $type->getAttribute('id');
+    my $scope = join(':', get_scope($type));
+    for my $list ($hash{$id}{$scope}) {
+      $list //= [];
+      push @$list, $type;
+    }
+  }
+  for my $id (keys %hash) {
+    my $scopes = $hash{$id};
+    if (scalar keys %$scopes > 1) {
+      warn "inconsistent scopes found for duplicate types with id $id\n";
+      next;
+    }
+    my ($ns) = keys %$scopes;
+    my $types = $scopes->{$ns};
+    next if scalar @$types == 1;
+    # Find a potential maximal candidate.
+    my $candidate = 0;
+    for my $ix (1..$#$types) {
+      $candidate = $ix if sub_tree($types->[$candidate], $types->[$ix]);
+    }
+    # Verify it is indeed maximal.
+    my @losers = grep { $_ != $candidate } (0..$#$types);
+    for my $ix (@losers) {
+      unless (sub_tree($types->[$ix], $types->[$candidate])) {
+        warn "conflicting duplicate types with id $id\n";
+        my @strs = map { $types->[$_]->toString() } ($ix, $candidate);
+        map { $_ =~ s;><;>\n<;g } @strs;
+        my @lines = map { [split("\n", $_)] } @strs;
+        warn Dumper(diff(@lines));
+        $candidate = undef;
+        last;
+      }
+    }
+    if (defined $candidate) {
+      map { remove_node($types->[$_]) } @losers;
+      warn "successfully eliminated duplicate types with id $id\n";
+    }
+  }
+}
+
 sub report_duplicate_types($dom) {
   my %hash;
   for my $type ($dom->findnodes('*[@id]')) {
@@ -400,16 +504,18 @@  my $all_opt;
 my $drop_opt;
 my $prune_opt;
 my $normalise_opt;
+my $eliminate_opt;
 my $report_opt;
 GetOptions('i|input=s' => \$input_opt,
            'o|output=s' => \$output_opt,
            's|symbols=s' => \$symbols_opt,
            'a|all' => sub {
-             $drop_opt = $prune_opt = $normalise_opt = $report_opt = 1
+             $drop_opt = $prune_opt = $normalise_opt = $eliminate_opt = $report_opt = 1
            },
            'd|drop-empty!' => \$drop_opt,
            'p|prune-unreachable!' => \$prune_opt,
            'n|normalise-anonymous!' => \$normalise_opt,
+           'e|eliminate-duplicates!' => \$eliminate_opt,
            'r|report-duplicates!' => \$report_opt,
   ) and !@ARGV or die("usage: $0",
                       map { (' ', $_) } (
@@ -420,6 +526,7 @@  GetOptions('i|input=s' => \$input_opt,
                         '[-d|--[no-]drop-empty]',
                         '[-p|--[no-]prune-unreachable]',
                         '[-n|--[no-]normalise-anonymous]',
+                        '[-e|--[no-]eliminate-duplicates]',
                         '[-r|--[no-]report-duplicates]',
                       ), "\n");
 
@@ -439,6 +546,9 @@  filter_symbols(read_symbols($symbols_opt), $dom) if defined $symbols_opt;
 # Normalise anonymous type names.
 normalise_anonymous_type_names($dom) if $normalise_opt;
 
+# Eliminate complete duplicates and extra fragments of types.
+eliminate_duplicate_types($dom) if $eliminate_opt;
+
 # Check for duplicate types.
 report_duplicate_types($dom) if $report_opt;