Valid bench.out against a JSON schema

Message ID 20140522105930.GW14500@spoyarek.pnq.redhat.com
State Superseded
Headers

Commit Message

Siddhesh Poyarekar May 22, 2014, 10:59 a.m. UTC
  Hi,

This patch adds a JSON schema for the benchmark output file and also
adds a script that validates the generated output against the schema.

Siddhesh

	* benchtests/scripts/validate_benchout.py: New script.
	* benchtests/Makefile (bench-func): Call it.
	* benchtests/scripts/benchout.schema.json: New file.
  

Comments

Siddhesh Poyarekar May 26, 2014, 6:10 a.m. UTC | #1
Ping!

On Thu, May 22, 2014 at 04:29:30PM +0530, Siddhesh Poyarekar wrote:
> Hi,
> 
> This patch adds a JSON schema for the benchmark output file and also
> adds a script that validates the generated output against the schema.
> 
> Siddhesh
> 
> 	* benchtests/scripts/validate_benchout.py: New script.
> 	* benchtests/Makefile (bench-func): Call it.
> 	* benchtests/scripts/benchout.schema.json: New file.
> 
> 
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index 63a5a7f..6fff7d3 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -131,6 +131,8 @@ bench-func: $(binaries-bench)
>  	  mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
>  	fi; \
>  	mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
> +	$(.)scripts/validate_benchout.py $(objpfx)bench.out \
> +		$(.)scripts/benchout.schema.json
>  
>  $(timing-type) $(binaries-bench) $(binaries-benchset): %: %.o $(objpfx)json-lib.o \
>    $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
> diff --git a/benchtests/scripts/benchout.schema.json b/benchtests/scripts/benchout.schema.json
> new file mode 100644
> index 0000000..affb7c1
> --- /dev/null
> +++ b/benchtests/scripts/benchout.schema.json
> @@ -0,0 +1,42 @@
> +{
> +  "title": "benchmark",
> +  "type": "object",
> +  "properties": {
> +    "timing_type": {
> +      "type": "string"
> +    },
> +    "functions": {
> +      "title": "Associative array of functions",
> +      "type": "object",
> +      "patternProperties": {
> +        "^[_a-zA-Z][_a-zA-Z0-9]+$": {
> +          "title": "Function names",
> +          "type": "object",
> +          "patternProperties": {
> +            "^[_a-zA-Z0-9]*$": {
> +              "title": "Function variants",
> +              "type": "object",
> +              "properties": {
> +                "duration": {"type": "number"},
> +                "iterations": {"type": "number"},
> +                "max": {"type": "number"},
> +                "min": {"type": "number"},
> +                "mean": {"type": "number"},
> +                "timings": {
> +                  "type": "array",
> +                  "items": {"type": "number"}
> +                }
> +              },
> +              "required": ["duration", "iterations", "max", "min", "mean"],
> +              "additionalProperties": false
> +            }
> +          },
> +          "additionalProperties": false
> +        }
> +      },
> +      "minProperties": 1
> +    }
> +  },
> +  "required": ["timing_type", "functions"],
> +  "additionalProperties": false
> +}
> diff --git a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py
> new file mode 100755
> index 0000000..b6a34e2
> --- /dev/null
> +++ b/benchtests/scripts/validate_benchout.py
> @@ -0,0 +1,78 @@
> +#!/usr/bin/python
> +# Copyright (C) 2014 Free Software Foundation, Inc.
> +# This file is part of the GNU C Library.
> +#
> +# The GNU C Library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# The GNU C Library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with the GNU C Library; if not, see
> +# <http://www.gnu.org/licenses/>.
> +"""Benchmark output validator
> +
> +Given a benchmark output file in json format and a benchmark schema file,
> +validate the output against the schema.
> +"""
> +
> +from __future__ import print_function
> +import jsonschema
> +import json
> +import sys
> +import os
> +
> +
> +def validate_bench(benchfile, schemafile):
> +    """Validate benchmark file
> +
> +    Validate a benchmark output file against a JSON schema.
> +
> +    Args:
> +        benchfile: The file name of the bench.out file.
> +        schemafile: The file name of the JSON schema file to validate
> +        bench.out against.
> +
> +    Exceptions:
> +        jsonschema.ValidationError: When bench.out is not valid
> +        jsonschema.SchemaError: When the JSON schema is not valid
> +        IOError: If any of the files are not found.
> +    """
> +    with open(benchfile, 'r') as bfile:
> +        with open(schemafile, 'r') as sfile:
> +            bench = json.load(bfile)
> +            schema = json.load(sfile)
> +            jsonschema.validate(bench, schema)
> +
> +    # If we reach here, we're all good.
> +    print("Benchmark output in %s is valid." % benchfile)
> +
> +
> +def main(args):
> +    """Main entry point
> +
> +    Args:
> +        args: The command line arguments to the program
> +
> +    Returns:
> +        0 on success or a non-zero failure code
> +
> +    Exceptions:
> +        Exceptions thrown by validate_bench
> +    """
> +    if len(args) != 2:
> +        print("Usage: %s <bench.out file> <bench.out schema>" % sys.argv[0],
> +                file=sys.stderr)
> +        return os.EX_USAGE
> +
> +    validate_bench(args[0], args[1])
> +    return os.EX_OK
> +
> +
> +if __name__ == '__main__':
> +    sys.exit(main(sys.argv[1:]))
  
Will Newton May 27, 2014, 8:30 a.m. UTC | #2
On 22 May 2014 11:59, Siddhesh Poyarekar <siddhesh@redhat.com> wrote:

Hi Siddesh,

> This patch adds a JSON schema for the benchmark output file and also
> adds a script that validates the generated output against the schema.
>
> Siddhesh
>
>         * benchtests/scripts/validate_benchout.py: New script.
>         * benchtests/Makefile (bench-func): Call it.
>         * benchtests/scripts/benchout.schema.json: New file.

I like the idea of this patch, however I am not sure how we will
handle other benchmark types (e.g. string, malloc) and if we will need
multiple schema or if we will need to extend this one. I haven't gone
far enough down that road yet to know the answer but I wonder if it is
something you have considered?

> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index 63a5a7f..6fff7d3 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -131,6 +131,8 @@ bench-func: $(binaries-bench)
>           mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
>         fi; \
>         mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
> +       $(.)scripts/validate_benchout.py $(objpfx)bench.out \
> +               $(.)scripts/benchout.schema.json
>
>  $(timing-type) $(binaries-bench) $(binaries-benchset): %: %.o $(objpfx)json-lib.o \
>    $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
> diff --git a/benchtests/scripts/benchout.schema.json b/benchtests/scripts/benchout.schema.json
> new file mode 100644
> index 0000000..affb7c1
> --- /dev/null
> +++ b/benchtests/scripts/benchout.schema.json
> @@ -0,0 +1,42 @@
> +{
> +  "title": "benchmark",
> +  "type": "object",
> +  "properties": {
> +    "timing_type": {
> +      "type": "string"
> +    },
> +    "functions": {
> +      "title": "Associative array of functions",
> +      "type": "object",
> +      "patternProperties": {
> +        "^[_a-zA-Z][_a-zA-Z0-9]+$": {
> +          "title": "Function names",
> +          "type": "object",
> +          "patternProperties": {
> +            "^[_a-zA-Z0-9]*$": {
> +              "title": "Function variants",
> +              "type": "object",
> +              "properties": {
> +                "duration": {"type": "number"},
> +                "iterations": {"type": "number"},
> +                "max": {"type": "number"},
> +                "min": {"type": "number"},
> +                "mean": {"type": "number"},
> +                "timings": {
> +                  "type": "array",
> +                  "items": {"type": "number"}
> +                }
> +              },
> +              "required": ["duration", "iterations", "max", "min", "mean"],
> +              "additionalProperties": false
> +            }
> +          },
> +          "additionalProperties": false
> +        }
> +      },
> +      "minProperties": 1
> +    }
> +  },
> +  "required": ["timing_type", "functions"],
> +  "additionalProperties": false
> +}
> diff --git a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py
> new file mode 100755
> index 0000000..b6a34e2
> --- /dev/null
> +++ b/benchtests/scripts/validate_benchout.py
> @@ -0,0 +1,78 @@
> +#!/usr/bin/python
> +# Copyright (C) 2014 Free Software Foundation, Inc.
> +# This file is part of the GNU C Library.
> +#
> +# The GNU C Library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# The GNU C Library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with the GNU C Library; if not, see
> +# <http://www.gnu.org/licenses/>.
> +"""Benchmark output validator
> +
> +Given a benchmark output file in json format and a benchmark schema file,
> +validate the output against the schema.
> +"""
> +
> +from __future__ import print_function
> +import jsonschema

I think last time I looked at this patch jsonschema was not a module
Python installed by default so I suggested importing inside a
try/catch.

> +import json
> +import sys
> +import os
> +
> +
> +def validate_bench(benchfile, schemafile):
> +    """Validate benchmark file
> +
> +    Validate a benchmark output file against a JSON schema.
> +
> +    Args:
> +        benchfile: The file name of the bench.out file.
> +        schemafile: The file name of the JSON schema file to validate
> +        bench.out against.
> +
> +    Exceptions:
> +        jsonschema.ValidationError: When bench.out is not valid
> +        jsonschema.SchemaError: When the JSON schema is not valid
> +        IOError: If any of the files are not found.
> +    """
> +    with open(benchfile, 'r') as bfile:
> +        with open(schemafile, 'r') as sfile:
> +            bench = json.load(bfile)
> +            schema = json.load(sfile)
> +            jsonschema.validate(bench, schema)
> +
> +    # If we reach here, we're all good.
> +    print("Benchmark output in %s is valid." % benchfile)
> +
> +
> +def main(args):
> +    """Main entry point
> +
> +    Args:
> +        args: The command line arguments to the program
> +
> +    Returns:
> +        0 on success or a non-zero failure code
> +
> +    Exceptions:
> +        Exceptions thrown by validate_bench
> +    """
> +    if len(args) != 2:
> +        print("Usage: %s <bench.out file> <bench.out schema>" % sys.argv[0],
> +                file=sys.stderr)
> +        return os.EX_USAGE
> +
> +    validate_bench(args[0], args[1])
> +    return os.EX_OK
> +
> +
> +if __name__ == '__main__':
> +    sys.exit(main(sys.argv[1:]))
  
Siddhesh Poyarekar May 27, 2014, 9:06 a.m. UTC | #3
On Tue, May 27, 2014 at 09:30:00AM +0100, Will Newton wrote:
> On 22 May 2014 11:59, Siddhesh Poyarekar <siddhesh@redhat.com> wrote:
> 
> Hi Siddesh,
> 
> > This patch adds a JSON schema for the benchmark output file and also
> > adds a script that validates the generated output against the schema.
> >
> > Siddhesh
> >
> >         * benchtests/scripts/validate_benchout.py: New script.
> >         * benchtests/Makefile (bench-func): Call it.
> >         * benchtests/scripts/benchout.schema.json: New file.
> 
> I like the idea of this patch, however I am not sure how we will
> handle other benchmark types (e.g. string, malloc) and if we will need
> multiple schema or if we will need to extend this one. I haven't gone
> far enough down that road yet to know the answer but I wonder if it is
> something you have considered?

I haven't given it a lot of thought either, but given that the current
string tests have a clearly different output format, we would need a
separate schema to JSONify it.  Of course, given that there have been
complaints in the past about the string tests being less than useful,
it might be a good idea to revisit them and maybe even come up with a
compatible output format for them.

The malloc microbenchmark you had posted seems to have a similar
format with some extensions*, so we should be good there.

> > +import jsonschema
> 
> I think last time I looked at this patch jsonschema was not a module
> Python installed by default so I suggested importing inside a
> try/catch.

Thanks, I'll fix this.

Siddhesh

* I'll get to reviewing the malloc benchmark soon-ish.
  
Andreas Schwab May 27, 2014, 1:57 p.m. UTC | #4
Siddhesh Poyarekar <siddhesh@redhat.com> writes:

> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index 63a5a7f..6fff7d3 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -131,6 +131,8 @@ bench-func: $(binaries-bench)
>  	  mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
>  	fi; \
>  	mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
> +	$(.)scripts/validate_benchout.py $(objpfx)bench.out \
> +		$(.)scripts/benchout.schema.json

What is $(.)?

Andreas.
  
Siddhesh Poyarekar May 27, 2014, 2:12 p.m. UTC | #5
On Tue, May 27, 2014 at 03:57:47PM +0200, Andreas Schwab wrote:
> Siddhesh Poyarekar <siddhesh@redhat.com> writes:
> 
> > diff --git a/benchtests/Makefile b/benchtests/Makefile
> > index 63a5a7f..6fff7d3 100644
> > --- a/benchtests/Makefile
> > +++ b/benchtests/Makefile
> > @@ -131,6 +131,8 @@ bench-func: $(binaries-bench)
> >  	  mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
> >  	fi; \
> >  	mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
> > +	$(.)scripts/validate_benchout.py $(objpfx)bench.out \
> > +		$(.)scripts/benchout.schema.json
> 
> What is $(.)?

I just assumed that there's a $(.) analogous to $(..), but apparently
it's called $(CURDIR).

Siddhesh
  
Andreas Schwab May 27, 2014, 2:19 p.m. UTC | #6
Siddhesh Poyarekar <siddhesh@redhat.com> writes:

> I just assumed that there's a $(.) analogous to $(..), but apparently
> it's called $(CURDIR).

There is no CURDIR.

Andreas.
  
Siddhesh Poyarekar May 27, 2014, 2:39 p.m. UTC | #7
On Tue, May 27, 2014 at 04:19:00PM +0200, Andreas Schwab wrote:
> Siddhesh Poyarekar <siddhesh@redhat.com> writes:
> 
> > I just assumed that there's a $(.) analogous to $(..), but apparently
> > it's called $(CURDIR).
> 
> There is no CURDIR.
> 

Oh, then why is it mentioned here:

https://www.gnu.org/software/make/manual/make.html

Siddhesh
  
Andreas Schwab May 27, 2014, 2:53 p.m. UTC | #8
Siddhesh Poyarekar <siddhesh@redhat.com> writes:

> I just assumed that there's a $(.) analogous to $(..), but apparently
> it's called $(CURDIR).

There is no need for CURDIR.

Andreas.
  

Patch

diff --git a/benchtests/Makefile b/benchtests/Makefile
index 63a5a7f..6fff7d3 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -131,6 +131,8 @@  bench-func: $(binaries-bench)
 	  mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
 	fi; \
 	mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out
+	$(.)scripts/validate_benchout.py $(objpfx)bench.out \
+		$(.)scripts/benchout.schema.json
 
 $(timing-type) $(binaries-bench) $(binaries-benchset): %: %.o $(objpfx)json-lib.o \
   $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
diff --git a/benchtests/scripts/benchout.schema.json b/benchtests/scripts/benchout.schema.json
new file mode 100644
index 0000000..affb7c1
--- /dev/null
+++ b/benchtests/scripts/benchout.schema.json
@@ -0,0 +1,42 @@ 
+{
+  "title": "benchmark",
+  "type": "object",
+  "properties": {
+    "timing_type": {
+      "type": "string"
+    },
+    "functions": {
+      "title": "Associative array of functions",
+      "type": "object",
+      "patternProperties": {
+        "^[_a-zA-Z][_a-zA-Z0-9]+$": {
+          "title": "Function names",
+          "type": "object",
+          "patternProperties": {
+            "^[_a-zA-Z0-9]*$": {
+              "title": "Function variants",
+              "type": "object",
+              "properties": {
+                "duration": {"type": "number"},
+                "iterations": {"type": "number"},
+                "max": {"type": "number"},
+                "min": {"type": "number"},
+                "mean": {"type": "number"},
+                "timings": {
+                  "type": "array",
+                  "items": {"type": "number"}
+                }
+              },
+              "required": ["duration", "iterations", "max", "min", "mean"],
+              "additionalProperties": false
+            }
+          },
+          "additionalProperties": false
+        }
+      },
+      "minProperties": 1
+    }
+  },
+  "required": ["timing_type", "functions"],
+  "additionalProperties": false
+}
diff --git a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py
new file mode 100755
index 0000000..b6a34e2
--- /dev/null
+++ b/benchtests/scripts/validate_benchout.py
@@ -0,0 +1,78 @@ 
+#!/usr/bin/python
+# Copyright (C) 2014 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+"""Benchmark output validator
+
+Given a benchmark output file in json format and a benchmark schema file,
+validate the output against the schema.
+"""
+
+from __future__ import print_function
+import jsonschema
+import json
+import sys
+import os
+
+
+def validate_bench(benchfile, schemafile):
+    """Validate benchmark file
+
+    Validate a benchmark output file against a JSON schema.
+
+    Args:
+        benchfile: The file name of the bench.out file.
+        schemafile: The file name of the JSON schema file to validate
+        bench.out against.
+
+    Exceptions:
+        jsonschema.ValidationError: When bench.out is not valid
+        jsonschema.SchemaError: When the JSON schema is not valid
+        IOError: If any of the files are not found.
+    """
+    with open(benchfile, 'r') as bfile:
+        with open(schemafile, 'r') as sfile:
+            bench = json.load(bfile)
+            schema = json.load(sfile)
+            jsonschema.validate(bench, schema)
+
+    # If we reach here, we're all good.
+    print("Benchmark output in %s is valid." % benchfile)
+
+
+def main(args):
+    """Main entry point
+
+    Args:
+        args: The command line arguments to the program
+
+    Returns:
+        0 on success or a non-zero failure code
+
+    Exceptions:
+        Exceptions thrown by validate_bench
+    """
+    if len(args) != 2:
+        print("Usage: %s <bench.out file> <bench.out schema>" % sys.argv[0],
+                file=sys.stderr)
+        return os.EX_USAGE
+
+    validate_bench(args[0], args[1])
+    return os.EX_OK
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv[1:]))