From patchwork Wed Aug 16 15:22:39 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Patchwork-Submitter: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
X-Patchwork-Id: 22147
Received: (qmail 93122 invoked by alias); 16 Aug 2017 15:22:54 -0000
Mailing-List: contact libc-alpha-help@sourceware.org; run by ezmlm
Precedence: bulk
List-Id: <libc-alpha.sourceware.org>
List-Unsubscribe: <mailto:libc-alpha-unsubscribe-##L=##H@sourceware.org>
List-Subscribe: <mailto:libc-alpha-subscribe@sourceware.org>
List-Archive: <http://sourceware.org/ml/libc-alpha/>
List-Post: <mailto:libc-alpha@sourceware.org>
List-Help: <mailto:libc-alpha-help@sourceware.org>,
	<http://sourceware.org/ml/#faqs>
Sender: libc-alpha-owner@sourceware.org
Delivered-To: mailing list libc-alpha@sourceware.org
Received: (qmail 77120 invoked by uid 89); 16 Aug 2017 15:22:45 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-25.1 required=5.0 tests=AWL, BAYES_00,
	GIT_PATCH_0, GIT_PATCH_1, GIT_PATCH_2, GIT_PATCH_3,
	RCVD_IN_DNSWL_NONE, SPF_HELO_PASS,
	SPF_PASS autolearn=ham version=3.3.2 spammy=
X-HELO: EUR03-AM5-obe.outbound.protection.outlook.com
From: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
To: Alexander Monakov <amonakov@ispras.ru>, Arjan van de Ven
	<arjan@linux.intel.com>
CC: Siddhesh Poyarekar <siddhesh@gotplt.org>, "libc-alpha@sourceware.org"
	<libc-alpha@sourceware.org>, nd <nd@arm.com>
Subject: Re: [PATCH] Add math benchmark latency test
Date: Wed, 16 Aug 2017 15:22:39 +0000
Message-ID: 
 <DB6PR0801MB205326EE1564C219082724A983820@DB6PR0801MB2053.eurprd08.prod.outlook.com>
References: <0e008f2e-f41a-1bb8-803c-2f798e2c3541@gotplt.org>
	<d8612b78-7a02-40e8-d8bd-9b221eff4b7b@linux.intel.com>
	<a3d0f45c-d283-8144-c770-96f434aef7b4@gotplt.org>
	<b1cfe590-d5af-bba4-0d9a-c639165854a9@linux.intel.com>,
	<alpine.LNX.2.20.13.1708161737010.2420@monopod.intra.ispras.ru>
In-Reply-To: <alpine.LNX.2.20.13.1708161737010.2420@monopod.intra.ispras.ru>
x-ms-publictraffictype: Email
x-microsoft-exchange-diagnostics: 1; DB6PR0801MB2054;
	6:EITJ6rNfHFAYI21USVJSBjjarC20SWgbCHEhrn3kwvc2MjC3PDgrEiys0WvFNRV6A2LZ3psAZa12y/LSi0RbbelWp/K3ZH8/55ByiOkytiMGcUxfNrJI7GZ9Db+RL0lTYvCEwx9hy6+6PjdHcjfzSDtNmS1RgMaOwuzERQ+FNCmS07Wd5/0+i+a+rPnvDHwC5n2hrN6Kyp2A3wHOfundaWLO7szbSbfe7rkFigCKYmS5LVaniHZ5iCkQGGZs8cwGD81uuc9yp9B5tgOeoWRU9GbdaoHFstlo7nxOVTDdpWR3bu3GPZ/PmW7IDwx/iVhFJHBhO73ACo7HeXBIISrFcg==;
	5:3jD+gxey6ejRhH6FOJFV9QKwLqFNKxDUsnBIAGFwRAJYQkQBJ7RAD7PbAjw2Cdl3kLSzwk6+dytW9sKG0SEmIX9RlzCxeC8lhp14ItRAAzqowhb71XE8BhigQMFwtugfi1o0FBbbZl0xjIiGfhRBkg==;
	24:qFqLa/ldD57DfJVkisYz7SaN1qvuGQ4uv0bHnG/SyuBA7o1EJnxrS8JiuSGxgDfcJUxbgrc6JseawWac49vwJHPnIVdhLqRlCAQqOIawWTM=;
	7:10JtnkuiQJz7rtIvPBSE1k7nVjueDhVmtvIy383C9+xVgKavALYP2YQMhH8NyVlNPfOAdDdvnNKq0kFyDy2rpBf/Zv/i2CVVpqQVXQNXGfB6Wonfa3HjOJeFwPqWn5IbG1h9gTrB8UtLO2h7YVO/ofF5FwwPDY+l5PZuAODm/RyRFFYsrF2+PoPlGwo5w0U2MXLEGLtgMbhfab27As3Oxqn0lEu6Vp61RYQMen+M5Zs=
x-ms-exchange-antispam-srfa-diagnostics: SSOS;
x-ms-office365-filtering-correlation-id: 88a99fd7-1374-4473-e820-08d4e4baa19f
x-ms-office365-filtering-ht: Tenant
x-microsoft-antispam: UriScan:; BCL:0; PCL:0;
	RULEID:(300000500095)(300135000095)(300000501095)(300135300095)(22001)(300000502095)(300135100095)(2017030254152)(48565401081)(300000503095)(300135400095)(2017052603157)(201703131423075)(201703031133081)(201702281549075)(300000504095)(300135200095)(300000505095)(300135600095)(300000506095)(300135500095);
	SRVR:DB6PR0801MB2054;
x-ms-traffictypediagnostic: DB6PR0801MB2054:
nodisclaimer: True
x-exchange-antispam-report-test: UriScan:(180628864354917);
x-microsoft-antispam-prvs: 
 <DB6PR0801MB2054CD72E36270E95C506AF183820@DB6PR0801MB2054.eurprd08.prod.outlook.com>
x-exchange-antispam-report-cfa-test: BCL:0; PCL:0;
	RULEID:(100000700101)(100105000095)(100000701101)(100105300095)(100000702101)(100105100095)(6040450)(601004)(2401047)(8121501046)(5005006)(93006095)(93001095)(10201501046)(100000703101)(100105400095)(3002001)(6055026)(6041248)(20161123560025)(20161123562025)(201703131423075)(201702281528075)(201703061421075)(201703061406153)(20161123558100)(20161123555025)(20161123564025)(6072148)(201708071742011)(100000704101)(100105200095)(100000705101)(100105500095);
	SRVR:DB6PR0801MB2054; BCL:0; PCL:0;
	RULEID:(100000800101)(100110000095)(100000801101)(100110300095)(100000802101)(100110100095)(100000803101)(100110400095)(100000804101)(100110200095)(100000805101)(100110500095);
	SRVR:DB6PR0801MB2054;
x-forefront-prvs: 0401647B7F
x-forefront-antispam-report: SFV:NSPM;
	SFS:(10009020)(6009001)(189002)(24454002)(377424004)(199003)(54534003)(189998001)(8936002)(54906002)(102836003)(3846002)(8676002)(81156014)(55016002)(50986999)(305945005)(76176999)(7736002)(99286003)(2900100001)(6506006)(97736004)(54356999)(14454004)(74316002)(101416001)(9686003)(5250100002)(105586002)(106356001)(81166006)(6116002)(93886004)(6436002)(575784001)(53936002)(86362001)(25786009)(33656002)(7696004)(3280700002)(3660700001)(68736007)(72206003)(6246003)(5660300001)(478600001)(2950100002)(2906002)(66066001)(229853002)(4326008);
	DIR:OUT; SFP:1101; SCL:1; SRVR:DB6PR0801MB2054;
	H:DB6PR0801MB2053.eurprd08.prod.outlook.com; FPR:; SPF:None;
	PTR:InfoNoRecords; MX:1; A:1; LANG:en;
received-spf: None (protection.outlook.com: arm.com does not designate
	permitted sender hosts)
authentication-results: spf=none (sender IP is )
	smtp.mailfrom=Wilco.Dijkstra@arm.com;
spamdiagnosticoutput: 1:99
spamdiagnosticmetadata: NSPM
MIME-Version: 1.0
X-OriginatorOrg: arm.com
X-MS-Exchange-CrossTenant-originalarrivaltime: 16 Aug 2017 15:22:39.1962
	(UTC)
X-MS-Exchange-CrossTenant-fromentityheader: Hosted
X-MS-Exchange-CrossTenant-id: f34e5979-57d9-4aaa-ad4d-b122a662184d
X-MS-Exchange-Transport-CrossTenantHeadersStamped: DB6PR0801MB2054

Alexander Monakov wrote:
> I suggest using "reciprocal throughput" if you're looking for a short term
> for 'independent executions per unit time'.  It's easier to recognize and
> already used in practice (e.g. in docs by Agner Fog).

Right what about this?

   "workload-spec2006.wrf": {
    "reciprocal throughput (ns)": 20,
    "latency (ns)": 50,
    "throughput (iters/s)": 5.0e+07
   }

This leads to a question, some targets use an odd header hp-timing.h. What units
does this use? Or is it completely undefined (and could potentially change between
GLIBC versions)?


ChangeLog:
2017-08-16  Wilco Dijkstra  <wdijkstr@arm.com>
  
        * benchtests/bench-skeleton.c (main): Add support for
        latency benchmarking.
        * benchtests/scripts/bench.py: Add support for latency benchmarking.

diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 3c6dad705594ac0a53edcb4e09686252c13127cf..48287be93b432b3acfc2431d1f7959bd00815b3b 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -71,8 +71,10 @@ main (int argc, char **argv)
       bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
       double d_total_i = 0;
       timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
+      timing_t throughput = 0, latency = 0;
       int64_t c = 0;
       uint64_t cur;
+      BENCH_VARS;
       while (1)
 	{
 	  if (is_bench)
@@ -86,7 +88,16 @@ main (int argc, char **argv)
 		  BENCH_FUNC (v, i);
 	      TIMING_NOW (end);
 	      TIMING_DIFF (cur, start, end);
-	      TIMING_ACCUM (total, cur);
+	      TIMING_ACCUM (throughput, cur);
+
+	      TIMING_NOW (start);
+	      for (k = 0; k < iters; k++)
+		for (i = 0; i < NUM_SAMPLES (v); i++)
+		  BENCH_FUNC_LAT (v, i);
+	      TIMING_NOW (end);
+	      TIMING_DIFF (cur, start, end);
+	      TIMING_ACCUM (latency, cur);
+
 	      d_total_i += iters * NUM_SAMPLES (v);
 	    }
 	  else
@@ -131,12 +142,18 @@ main (int argc, char **argv)
       /* Begin variant.  */
       json_attr_object_begin (&json_ctx, VARIANT (v));
 
-      json_attr_double (&json_ctx, "duration", d_total_s);
-      json_attr_double (&json_ctx, "iterations", d_total_i);
       if (is_bench)
-	json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
+	{
+	  json_attr_double (&json_ctx, "reciprocal throughput (ns)",
+			    throughput / d_total_i);
+	  json_attr_double (&json_ctx, "latency (ns)", latency / d_total_i);
+	  json_attr_double (&json_ctx, "throughput (iters/s)",
+			    d_total_i / throughput * 1000000000.0);
+	}
       else
 	{
+	  json_attr_double (&json_ctx, "duration", d_total_s);
+	  json_attr_double (&json_ctx, "iterations", d_total_i);
 	  json_attr_double (&json_ctx, "max", max / d_iters);
 	  json_attr_double (&json_ctx, "min", min / d_iters);
 	  json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
diff --git a/benchtests/scripts/bench.py b/benchtests/scripts/bench.py
index 8c1c9eeb2bc67a16cb8a8e010fd2b8a2ef8ab6df..b7ccb7c8c2bf1822202a2377dfb0675516115cc5 100755
--- a/benchtests/scripts/bench.py
+++ b/benchtests/scripts/bench.py
@@ -45,7 +45,7 @@ DEFINES_TEMPLATE = '''
 # variant is represented by the _VARIANT structure.  The ARGS structure
 # represents a single set of arguments.
 STRUCT_TEMPLATE = '''
-#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)
+#define CALL_BENCH_FUNC(v, i, x) %(func)s (x %(func_args)s)
 
 struct args
 {
@@ -84,7 +84,9 @@ EPILOGUE = '''
 #define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing)
 #define RESULT_ACCUM(r, v, i, old, new) \\
         ((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1))
-#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
+#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, );})
+#define BENCH_FUNC_LAT(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, %(latarg)s);})
+#define BENCH_VARS %(defvar)s
 #define FUNCNAME "%(func)s"
 #include "bench-skeleton.c"'''
 
@@ -122,17 +124,22 @@ def gen_source(func, directives, all_vals):
     # If we have a return value from the function, make sure it is
     # assigned to prevent the compiler from optimizing out the
     # call.
+    getret = ''
+    latarg = ''
+    defvar = ''
+
     if directives['ret']:
         print('static %s volatile ret;' % directives['ret'])
-        getret = 'ret = '
-    else:
-        getret = ''
+        print('static %s zero __attribute__((used)) = 0;' % directives['ret'])
+        getret = 'ret = func_res = '
+        latarg = 'func_res * zero +'
+        defvar = '%s func_res = 0;' % directives['ret']
 
     # Test initialization.
     if directives['init']:
         print('#define BENCH_INIT %s' % directives['init'])
 
-    print(EPILOGUE % {'getret': getret, 'func': func})
+    print(EPILOGUE % {'getret': getret, 'func': func, 'latarg': latarg, 'defvar': defvar })
 
 
 def _print_arg_data(func, directives, all_vals):