compare_strings.py : Add --gmean flag

Message ID 20230403141105.551198-1-nisha.s.menon@gmail.com
State Committed
Commit 51a121eb36d7cf016005413b4fe6995df2b7cef3
Headers
Series compare_strings.py : Add --gmean flag |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Nisha Poyarekar April 3, 2023, 2:11 p.m. UTC
  To calculate geometric mean for string benchmark results.

Signed-off-by: Nisha Poyarekar <nisha.s.menon@gmail.com>
---
 benchtests/scripts/compare_strings.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)
  

Comments

Noah Goldstein April 3, 2023, 3:34 p.m. UTC | #1
On Mon, Apr 3, 2023 at 9:11 AM Nisha Poyarekar via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> To calculate geometric mean for string benchmark results.
>
> Signed-off-by: Nisha Poyarekar <nisha.s.menon@gmail.com>
> ---
>  benchtests/scripts/compare_strings.py | 20 ++++++++++++++++++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/benchtests/scripts/compare_strings.py b/benchtests/scripts/compare_strings.py
> index 1195a6c741..a4ab3e42c2 100755
> --- a/benchtests/scripts/compare_strings.py
> +++ b/benchtests/scripts/compare_strings.py
> @@ -21,6 +21,7 @@ Given a string benchmark result file, print a table with comparisons with a
>  baseline.  The baseline is the first function, which typically is the builtin
>  function.
>  """
> +import math
>  import matplotlib as mpl
>  mpl.use('Agg')
>
> @@ -83,7 +84,8 @@ def draw_graph(f, v, ifuncs, results):
>      pylab.savefig('%s-%s.png' % (f, v), bbox_inches='tight')
>
>
> -def process_results(results, attrs, funcs, base_func, graph, no_diff, no_header):
> +def process_results(results, attrs, funcs, base_func, graph, no_diff,
> +                    no_header, gmean):
>      """ Process results and print them
>
>      Args:
> @@ -132,6 +134,8 @@ def process_results(results, attrs, funcs, base_func, graph, no_diff, no_header)
>              print("%36s%s" % (' ', '\t'.join(ifuncs)))
>              print("=" * 120)
>
> +        mean_row = [0 for i in range(len(ifuncs))]
> +        total=0
>          graph_res = {}
>          for res in results['functions'][f]['results']:
>              try:
> @@ -144,8 +148,11 @@ def process_results(results, attrs, funcs, base_func, graph, no_diff, no_header)
>              key = ', '.join(attr_list)
>              sys.stdout.write('%36s: ' % key)
>              graph_res[key] = res['timings']
> +
>              for t in res['timings']:
>                  if selected[i]:
> +                    if gmean:
> +                        mean_row[i] = mean_row[i]+math.log(t)

Is there an issue using statistics package and just doing:
`statistics.geometric_mean`
?

Otherwise LGTM.
>                      sys.stdout.write ('%12.2f' % t)
>                      if not no_diff:
>                          if i != base_index:
> @@ -159,6 +166,12 @@ def process_results(results, attrs, funcs, base_func, graph, no_diff, no_header)
>          if graph:
>              draw_graph(f, v, results['functions'][f]['ifuncs'], graph_res)
>
> +        if gmean:
> +            print("=" * 120)
> +            total = len(results['functions'][f]['results'])
> +            sys.stdout.write ('Geo-mean (for %s inputs)'%total)
> +            for m in mean_row:
> +                sys.stdout.write ('%12.2f' % (math.exp(m/total)))
>
>  def main(args):
>      """Program Entry Point
> @@ -180,7 +193,8 @@ def main(args):
>          funcs = None
>
>      results = parse_file(args.input, args.schema)
> -    process_results(results, attrs, funcs, base_func, args.graph, args.no_diff, args.no_header)
> +    process_results(results, attrs, funcs, base_func, args.graph, args.no_diff,
> +                    args.no_header, args.gmean)
>      return os.EX_OK
>
>
> @@ -207,6 +221,8 @@ if __name__ == '__main__':
>                          help='Do not print the difference from baseline.')
>      parser.add_argument('--no-header', action='store_true',
>                          help='Do not print the header.')
> +    parser.add_argument('--gmean', action='store_true',
> +                        help='Print the geometric mean at the end of the output.')
>
>      args = parser.parse_args()
>      sys.exit(main(args))
> --
> 2.37.2
>
  
Florian Weimer April 3, 2023, 9:37 p.m. UTC | #2
* Noah Goldstein via Libc-alpha:

> Is there an issue using statistics package and just doing:
> `statistics.geometric_mean`
> ?

According to

  <https://docs.python.org/3/library/statistics.html#statistics.geometric_mean>

it was added in Python 3.8, which is newer than what we aim for in
general (INSTALL currently mentions Python 3.4).

Thanks,
Florian
  
Nisha Poyarekar April 3, 2023, 10:58 p.m. UTC | #3
On Mon, Apr 3, 2023 at 5:37 PM Florian Weimer <fweimer@redhat.com> wrote:
>
> * Noah Goldstein via Libc-alpha:
>
> > Is there an issue using statistics package and just doing:
> > `statistics.geometric_mean`
> > ?
>
> According to
>
>   <https://docs.python.org/3/library/statistics.html#statistics.geometric_mean>
>
> it was added in Python 3.8, which is newer than what we aim for in
> general (INSTALL currently mentions Python 3.4).
>

So is the fix okay as it is?

Thanks,
Nisha
  
Noah Goldstein April 3, 2023, 11:35 p.m. UTC | #4
On Mon, Apr 3, 2023 at 5:58 PM Nisha Menon <nisha.s.menon@gmail.com> wrote:
>
> On Mon, Apr 3, 2023 at 5:37 PM Florian Weimer <fweimer@redhat.com> wrote:
> >
> > * Noah Goldstein via Libc-alpha:
> >
> > > Is there an issue using statistics package and just doing:
> > > `statistics.geometric_mean`
> > > ?
> >
> > According to
> >
> >   <https://docs.python.org/3/library/statistics.html#statistics.geometric_mean>
> >
> > it was added in Python 3.8, which is newer than what we aim for in
> > general (INSTALL currently mentions Python 3.4).
> >
>
> So is the fix okay as it is?
>
> Thanks,
> Nisha

Yeah LGTM.
  
Nisha Poyarekar April 4, 2023, 12:33 a.m. UTC | #5
On Mon, Apr 3, 2023 at 7:35 PM Noah Goldstein <goldstein.w.n@gmail.com> wrote:
> > So is the fix okay as it is?
> >
> > Thanks,
> > Nisha
>
> Yeah LGTM.

Thanks. Can you please push the patch for me. I don't have commit access.

Thanks,
Nisha
  

Patch

diff --git a/benchtests/scripts/compare_strings.py b/benchtests/scripts/compare_strings.py
index 1195a6c741..a4ab3e42c2 100755
--- a/benchtests/scripts/compare_strings.py
+++ b/benchtests/scripts/compare_strings.py
@@ -21,6 +21,7 @@  Given a string benchmark result file, print a table with comparisons with a
 baseline.  The baseline is the first function, which typically is the builtin
 function.
 """
+import math
 import matplotlib as mpl
 mpl.use('Agg')
 
@@ -83,7 +84,8 @@  def draw_graph(f, v, ifuncs, results):
     pylab.savefig('%s-%s.png' % (f, v), bbox_inches='tight')
 
 
-def process_results(results, attrs, funcs, base_func, graph, no_diff, no_header):
+def process_results(results, attrs, funcs, base_func, graph, no_diff,
+                    no_header, gmean):
     """ Process results and print them
 
     Args:
@@ -132,6 +134,8 @@  def process_results(results, attrs, funcs, base_func, graph, no_diff, no_header)
             print("%36s%s" % (' ', '\t'.join(ifuncs)))
             print("=" * 120)
 
+        mean_row = [0 for i in range(len(ifuncs))]
+        total=0
         graph_res = {}
         for res in results['functions'][f]['results']:
             try:
@@ -144,8 +148,11 @@  def process_results(results, attrs, funcs, base_func, graph, no_diff, no_header)
             key = ', '.join(attr_list)
             sys.stdout.write('%36s: ' % key)
             graph_res[key] = res['timings']
+
             for t in res['timings']:
                 if selected[i]:
+                    if gmean:
+                        mean_row[i] = mean_row[i]+math.log(t)
                     sys.stdout.write ('%12.2f' % t)
                     if not no_diff:
                         if i != base_index:
@@ -159,6 +166,12 @@  def process_results(results, attrs, funcs, base_func, graph, no_diff, no_header)
         if graph:
             draw_graph(f, v, results['functions'][f]['ifuncs'], graph_res)
 
+        if gmean:
+            print("=" * 120)
+            total = len(results['functions'][f]['results'])
+            sys.stdout.write ('Geo-mean (for %s inputs)'%total)
+            for m in mean_row:
+                sys.stdout.write ('%12.2f' % (math.exp(m/total)))
 
 def main(args):
     """Program Entry Point
@@ -180,7 +193,8 @@  def main(args):
         funcs = None
 
     results = parse_file(args.input, args.schema)
-    process_results(results, attrs, funcs, base_func, args.graph, args.no_diff, args.no_header)
+    process_results(results, attrs, funcs, base_func, args.graph, args.no_diff,
+                    args.no_header, args.gmean)
     return os.EX_OK
 
 
@@ -207,6 +221,8 @@  if __name__ == '__main__':
                         help='Do not print the difference from baseline.')
     parser.add_argument('--no-header', action='store_true',
                         help='Do not print the header.')
+    parser.add_argument('--gmean', action='store_true',
+                        help='Print the geometric mean at the end of the output.')
 
     args = parser.parse_args()
     sys.exit(main(args))