[10/12,contrib] validate_failures.py: Add new option --invert_match

Message ID 20230602152052.1874860-11-maxim.kuvyrkov@linaro.org
State Committed
Commit 22a0ade86cb5ccb6c8e8ff047a511374cbc799ce
Headers
Series [01/12,contrib] validate_failures.py: Avoid testsuite aliasing |

Checks

Context Check Description
linaro-tcwg-bot/tcwg_gcc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_gcc_check--master-arm success Testing passed

Commit Message

Maxim Kuvyrkov June 2, 2023, 3:20 p.m. UTC
  This option is used to detect flaky tests that FAILed in the clean
build (or manifest), but PASSed in the current build (or manifest).

The option inverts output logic similar to what "-v/--invert-match"
does for grep.
---
 .../testsuite-management/validate_failures.py | 34 +++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)
  

Patch

diff --git a/contrib/testsuite-management/validate_failures.py b/contrib/testsuite-management/validate_failures.py
index 1919935cf53..6eb1acd473f 100755
--- a/contrib/testsuite-management/validate_failures.py
+++ b/contrib/testsuite-management/validate_failures.py
@@ -217,11 +217,17 @@  class ResultSet(set):
   Attributes:
     current_tool: Name of the current top-level DejaGnu testsuite.
     current_exp: Name of the current .exp testsuite file.
+    testsuites: A set of (tool, exp) tuples representing encountered testsuites.
   """
 
   def __init__(self):
     super().__init__()
     self.ResetToolExp()
+    self.testsuites=set()
+
+  def update(self, other):
+    super().update(other)
+    self.testsuites.update(other.testsuites)
 
   def ResetToolExp(self):
     self.current_tool = None
@@ -246,6 +252,10 @@  class ResultSet(set):
 
     outfile.write(_SUMMARY_LINE_FORMAT % 'Results')
 
+  # Check if testsuite of expected_result is present in current results.
+  # This is used to compare partial test results against a full manifest.
+  def HasTestsuite(self, expected_result):
+    return (expected_result.tool, expected_result.exp) in self.testsuites
 
 def GetMakefileValue(makefile_name, value_name):
   if os.path.exists(makefile_name):
@@ -391,6 +401,8 @@  def ParseSummary(sum_fname):
       result_set.add(result)
     elif IsExpLine(line):
       result_set.current_exp = _EXP_LINE_REX.match(line).groups()[0]
+      result_set.testsuites.add((result_set.current_tool,
+                                 result_set.current_exp))
     elif IsToolLine(line):
       result_set.current_tool = _TOOL_LINE_REX.match(line).groups()[0]
       result_set.current_exp = None
@@ -433,7 +445,7 @@  def GetResults(sum_files, build_results = None):
   for sum_fname in sum_files:
     if _OPTIONS.verbosity >= 3:
       print('\t%s' % sum_fname)
-    build_results |= ParseSummary(sum_fname)
+    build_results.update(ParseSummary(sum_fname))
   return build_results
 
 
@@ -458,7 +470,11 @@  def CompareResults(manifest, actual):
     # Ignore tests marked flaky.
     if 'flaky' in expected_result.attrs:
       continue
-    if expected_result not in actual:
+    # We try to support comparing partial results vs full manifest
+    # (e.g., manifest has failures for gcc, g++, gfortran, but we ran only
+    # g++ testsuite).  To achieve this we record encountered testsuites in
+    # actual.testsuites set, and then we check it here using HasTestsuite().
+    if expected_result not in actual and actual.HasTestsuite(expected_result):
       manifest_vs_actual.add(expected_result)
 
   return actual_vs_manifest, manifest_vs_actual
@@ -520,6 +536,13 @@  def GetSumFiles(results, build_dir):
 def PerformComparison(expected, actual):
   actual_vs_expected, expected_vs_actual = CompareResults(expected, actual)
 
+  if _OPTIONS.inverse_match:
+    # Switch results if inverse comparison is requested.
+    # This is useful in detecting flaky tests that FAILed in expected set,
+    # but PASSed in actual set.
+    actual_vs_expected, expected_vs_actual \
+      = expected_vs_actual, actual_vs_expected
+
   tests_ok = True
   if len(actual_vs_expected) > 0:
     if _OPTIONS.verbosity >= 3:
@@ -613,6 +636,13 @@  def Main(argv):
                     default=False, help='When used with --produce_manifest, '
                     'it will overwrite an existing manifest file '
                     '(default = False)')
+  parser.add_option('--inverse_match', action='store_true',
+                    dest='inverse_match', default=False,
+                    help='Inverse result sets in comparison. '
+                    'Output unexpected passes as unexpected failures and '
+                    'unexpected failures as unexpected passes. '
+                    'This is used to catch FAIL->PASS flaky tests. '
+                    '(default = False)')
   parser.add_option('--manifest', action='store', type='string',
                     dest='manifest', default=None,
                     help='Name of the manifest file to use (default = '