[v4] benchtests: Fix validate_benchout.py exceptions

Message ID 20210913134412.751102-1-naohirot@fujitsu.com
State Superseded
Headers
Series [v4] benchtests: Fix validate_benchout.py exceptions |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Naohiro Tamura Sept. 13, 2021, 1:44 p.m. UTC
  This patch fixed validate_benchout.py two exceptions, AttributeError
if benchout_strings.schema.json is specified and
json.decoder.JSONDecodeError if benchout is not JSON.

AttributeError unconditionally occurs with a correct JSON benchout
file such as below because the code
"bench['functions'][func][k].keys()" is either  "bench-variant",
"ifunc", or "results" that doesn't have keys()."

$ ~/glibc/benchtests/scripts/validate_benchout.py bench-memcpy.out \
  ~/glibc/benchtests/scripts/benchout_strings.schema.json
Traceback (most recent call last):
  File "/home/naohirot/work/github/glibc/benchtests/scripts/validate_benchout.py", line 86, in <module>
    sys.exit(main(sys.argv[1:]))
  File "/home/naohirot/work/github/glibc/benchtests/scripts/validate_benchout.py", line 69, in main
    bench.parse_bench(args[0], args[1])
  File "/home/naohirot/work/github/glibc/benchtests/scripts/import_bench.py", line 139, in parse_bench
    do_for_all_timings(bench, lambda b, f, v:
  File "/home/naohirot/work/github/glibc/benchtests/scripts/import_bench.py", line 107, in do_for_all_timings
    if 'timings' not in bench['functions'][func][k].keys():
AttributeError: 'str' object has no attribute 'keys'

$ cat bench-memcpy.out
  1 {
  2  "timing_type": "hp_timing",
  3  "functions": {
  4   "memcpy": {
  5    "bench-variant": "default",
  6    "ifuncs": ["generic_memcpy", "__memcpy_thunderx", "__memcpy_thunderx2", "__memcpy_falkor", "__memcpy_simd", "__memcpy_a64fx", "__memcpy_generic"],
  7    "results": [
  8     {
  9      "length": 1,
 10      "align1": 0,
 11      "align2": 0,
 12      "dst > src": 0,
 13      "timings": [10.9326, 11.0449, 11.5515, 13.5693, 11.5198, 6.77368, 11.5259]
 14     },
 ...
---
 benchtests/scripts/import_bench.py      | 17 +++++++++++------
 benchtests/scripts/validate_benchout.py |  6 +++++-
 2 files changed, 16 insertions(+), 7 deletions(-)
  

Comments

Siddhesh Poyarekar Sept. 15, 2021, 3:23 a.m. UTC | #1
On 9/13/21 7:14 PM, Naohiro Tamura wrote:
> This patch fixed validate_benchout.py two exceptions, AttributeError
> if benchout_strings.schema.json is specified and
> json.decoder.JSONDecodeError if benchout is not JSON.
> 
> AttributeError unconditionally occurs with a correct JSON benchout
> file such as below because the code
> "bench['functions'][func][k].keys()" is either  "bench-variant",
> "ifunc", or "results" that doesn't have keys()."
> 
> $ ~/glibc/benchtests/scripts/validate_benchout.py bench-memcpy.out \
>    ~/glibc/benchtests/scripts/benchout_strings.schema.json
> Traceback (most recent call last):
>    File "/home/naohirot/work/github/glibc/benchtests/scripts/validate_benchout.py", line 86, in <module>
>      sys.exit(main(sys.argv[1:]))
>    File "/home/naohirot/work/github/glibc/benchtests/scripts/validate_benchout.py", line 69, in main
>      bench.parse_bench(args[0], args[1])
>    File "/home/naohirot/work/github/glibc/benchtests/scripts/import_bench.py", line 139, in parse_bench
>      do_for_all_timings(bench, lambda b, f, v:
>    File "/home/naohirot/work/github/glibc/benchtests/scripts/import_bench.py", line 107, in do_for_all_timings
>      if 'timings' not in bench['functions'][func][k].keys():
> AttributeError: 'str' object has no attribute 'keys'
> 
> $ cat bench-memcpy.out
>    1 {
>    2  "timing_type": "hp_timing",
>    3  "functions": {
>    4   "memcpy": {
>    5    "bench-variant": "default",
>    6    "ifuncs": ["generic_memcpy", "__memcpy_thunderx", "__memcpy_thunderx2", "__memcpy_falkor", "__memcpy_simd", "__memcpy_a64fx", "__memcpy_generic"],
>    7    "results": [
>    8     {
>    9      "length": 1,
>   10      "align1": 0,
>   11      "align2": 0,
>   12      "dst > src": 0,
>   13      "timings": [10.9326, 11.0449, 11.5515, 13.5693, 11.5198, 6.77368, 11.5259]
>   14     },
>   ...
> ---
>   benchtests/scripts/import_bench.py      | 17 +++++++++++------
>   benchtests/scripts/validate_benchout.py |  6 +++++-
>   2 files changed, 16 insertions(+), 7 deletions(-)
> 
> diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py
> index a799b4e1b7dc..f5e67570d4c5 100644
> --- a/benchtests/scripts/import_bench.py
> +++ b/benchtests/scripts/import_bench.py
> @@ -101,13 +101,18 @@ def do_for_all_timings(bench, callback):
>       Args:
>           bench: The benchmark object
>           callback: The callback function
> +    Raises:
> +        validator.exceptions.ValidationError: if 'timings' key not found
>       """
>       for func in bench['functions'].keys():
>           for k in bench['functions'][func].keys():
> -            if 'timings' not in bench['functions'][func][k].keys():
> -                continue
> -
> -            callback(bench, func, k)
> +            if k == 'results':
> +                for r in range(len(bench['functions'][func][k])):
> +                    if 'timings' not in bench['functions'][func][k][r].keys():
> +                        raise validator.exceptions.ValidationError(
> +                            "'timings' key not found")
> +                    else:
> +                        callback(bench, func, k, r)
>   
>   
>   def compress_timings(points):
> @@ -136,6 +141,6 @@ def parse_bench(filename, schema_filename):
>           with open(filename, 'r') as benchfile:
>               bench = json.load(benchfile)
>               validator.validate(bench, schema)
> -            do_for_all_timings(bench, lambda b, f, v:
> -                    b['functions'][f][v]['timings'].sort())
> +            do_for_all_timings(bench, lambda b, f, v, r:
> +                    b['functions'][f][v][r]['timings'].sort())
>               return bench

This will break the original use case, i.e. bench.out, since it doesn't 
have the 'results' nesting timings.  The sorting doesn't seem necessary 
for validation, it's only necessary for compare_bench.py.  You could 
move the do_for_all_timings call into compare_bench.py, which is 
specific to bench.out so that you don't have to modify do_for_all_timings.

Siddhesh
  
develop--- via Libc-alpha Sept. 16, 2021, 1:12 a.m. UTC | #2
Hi Siddhesh,

Thank you for the comment. 

> From: Siddhesh Poyarekar <siddhesh@gotplt.org>
> Sent: Wednesday, September 15, 2021 12:23 PM
> 
> This will break the original use case, i.e. bench.out, since it doesn't
> have the 'results' nesting timings.  The sorting doesn't seem necessary
> for validation, it's only necessary for compare_bench.py.  You could
> move the do_for_all_timings call into compare_bench.py, which is
> specific to bench.out so that you don't have to modify do_for_all_timings.

If do_for_all_timings is moved to compare_bench.py, parse_bench also has
to be moved to compare_bench.py and compare_strings.py.

So I propose simply to catch AttributeError and return such as below.
Because compare_strings.py is not affected.
Can we agree?

--- a/benchtests/scripts/import_bench.py
+++ b/benchtests/scripts/import_bench.py
@@ -104,10 +104,15 @@ def do_for_all_timings(bench, callback):
     """
     for func in bench['functions'].keys():
         for k in bench['functions'][func].keys():
-            if 'timings' not in bench['functions'][func][k].keys():
-                continue
-
-            callback(bench, func, k)
+            try:
+                if 'timings' not in bench['functions'][func][k].keys():
+                    continue
+
+                callback(bench, func, k)
+            # in case that <bench.out file> conforms <bench.out schema>
+            # benchout_strings.schema.json
+            except AttributeError:
+                return

Thanks.
Naohiro
  
Siddhesh Poyarekar Sept. 16, 2021, 1:41 a.m. UTC | #3
On 9/16/21 6:42 AM, naohirot@fujitsu.com wrote:
> Hi Siddhesh,
> 
> Thank you for the comment.
> 
>> From: Siddhesh Poyarekar <siddhesh@gotplt.org>
>> Sent: Wednesday, September 15, 2021 12:23 PM
>>
>> This will break the original use case, i.e. bench.out, since it doesn't
>> have the 'results' nesting timings.  The sorting doesn't seem necessary
>> for validation, it's only necessary for compare_bench.py.  You could
>> move the do_for_all_timings call into compare_bench.py, which is
>> specific to bench.out so that you don't have to modify do_for_all_timings.
> 
> If do_for_all_timings is moved to compare_bench.py, parse_bench also has
> to be moved to compare_bench.py and compare_strings.py.
> 

Ahh no, I didn't suggest moving the implementation, only the call, like  
below.  This way import_bench.parse_bench does not have to know about  
the specific json format.

diff --git a/benchtests/scripts/compare_bench.py  
b/benchtests/scripts/compare_bench.py
index 6fcbd08038..fa7481c76e 100755
--- a/benchtests/scripts/compare_bench.py
+++ b/benchtests/scripts/compare_bench.py
@@ -163,7 +163,11 @@ def plot_graphs(bench1, bench2):

  def main(bench1, bench2, schema, threshold, stats):
      bench1 = bench.parse_bench(bench1, schema)
+    bench.do_for_all_timings(bench1, lambda b, f, v:
+            b['functions'][f][v]['timings'].sort())
      bench2 = bench.parse_bench(bench2, schema)
+    bench.do_for_all_timings(bench2, lambda b, f, v:
+            b['functions'][f][v]['timings'].sort())

      plot_graphs(bench1, bench2)

diff --git a/benchtests/scripts/import_bench.py  
b/benchtests/scripts/import_bench.py
index a799b4e1b7..362c990fe1 100644
--- a/benchtests/scripts/import_bench.py
+++ b/benchtests/scripts/import_bench.py
@@ -136,6 +136,4 @@ def parse_bench(filename, schema_filename):
          with open(filename, 'r') as benchfile:
              bench = json.load(benchfile)
              validator.validate(bench, schema)
-            do_for_all_timings(bench, lambda b, f, v:
-                    b['functions'][f][v]['timings'].sort())
              return bench
  
develop--- via Libc-alpha Sept. 16, 2021, 2:26 a.m. UTC | #4
Hi Siddhesh,

> Ahh no, I didn't suggest moving the implementation, only the call, like
> below.  This way import_bench.parse_bench does not have to know about
> the specific json format.

Sorry about that ?, now I got it.
Please find V5 [1] and check it again.

[1] https://sourceware.org/pipermail/libc-alpha/2021-September/131058.html

Thanks.
Naohiro
  

Patch

diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py
index a799b4e1b7dc..f5e67570d4c5 100644
--- a/benchtests/scripts/import_bench.py
+++ b/benchtests/scripts/import_bench.py
@@ -101,13 +101,18 @@  def do_for_all_timings(bench, callback):
     Args:
         bench: The benchmark object
         callback: The callback function
+    Raises:
+        validator.exceptions.ValidationError: if 'timings' key not found
     """
     for func in bench['functions'].keys():
         for k in bench['functions'][func].keys():
-            if 'timings' not in bench['functions'][func][k].keys():
-                continue
-
-            callback(bench, func, k)
+            if k == 'results':
+                for r in range(len(bench['functions'][func][k])):
+                    if 'timings' not in bench['functions'][func][k][r].keys():
+                        raise validator.exceptions.ValidationError(
+                            "'timings' key not found")
+                    else:
+                        callback(bench, func, k, r)
 
 
 def compress_timings(points):
@@ -136,6 +141,6 @@  def parse_bench(filename, schema_filename):
         with open(filename, 'r') as benchfile:
             bench = json.load(benchfile)
             validator.validate(bench, schema)
-            do_for_all_timings(bench, lambda b, f, v:
-                    b['functions'][f][v]['timings'].sort())
+            do_for_all_timings(bench, lambda b, f, v, r:
+                    b['functions'][f][v][r]['timings'].sort())
             return bench
diff --git a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py
index 47df33ed0252..00d5fa0ee5eb 100755
--- a/benchtests/scripts/validate_benchout.py
+++ b/benchtests/scripts/validate_benchout.py
@@ -73,11 +73,15 @@  def main(args):
 
     except bench.validator.ValidationError as e:
         return print_and_exit("Invalid benchmark output: %s" % e.message,
-            os.EX_DATAERR)
+                os.EX_DATAERR)
 
     except bench.validator.SchemaError as e:
         return print_and_exit("Invalid schema: %s" % e.message, os.EX_DATAERR)
 
+    except json.decoder.JSONDecodeError as e:
+        return print_and_exit("Benchmark output in %s is not JSON." % args[0],
+                os.EX_DATAERR)
+
     print("Benchmark output in %s is valid." % args[0])
     return os.EX_OK