Add math benchmark latency test
Commit Message
Alexander Monakov wrote:
> I suggest using "reciprocal throughput" if you're looking for a short term
> for 'independent executions per unit time'. It's easier to recognize and
> already used in practice (e.g. in docs by Agner Fog).
Right what about this?
"workload-spec2006.wrf": {
"reciprocal throughput (ns)": 20,
"latency (ns)": 50,
"throughput (iters/s)": 5.0e+07
}
This leads to a question, some targets use an odd header hp-timing.h. What units
does this use? Or is it completely undefined (and could potentially change between
GLIBC versions)?
ChangeLog:
2017-08-16 Wilco Dijkstra <wdijkstr@arm.com>
* benchtests/bench-skeleton.c (main): Add support for
latency benchmarking.
* benchtests/scripts/bench.py: Add support for latency benchmarking.
--
Comments
On Wednesday 16 August 2017 08:52 PM, Wilco Dijkstra wrote:
> Alexander Monakov wrote:
>> I suggest using "reciprocal throughput" if you're looking for a short term
>> for 'independent executions per unit time'. It's easier to recognize and
>> already used in practice (e.g. in docs by Agner Fog).
>
> Right what about this?
>
> "workload-spec2006.wrf": {
> "reciprocal throughput (ns)": 20,
> "latency (ns)": 50,
> "throughput (iters/s)": 5.0e+07
Please drop the spaces and special chars in the key names; they need to
validate against benchout.schema and IIRC it doesn't allow spaces.
Simple reciprocal-throughput, latency, etc. ought to be sufficient.
> }
>
> This leads to a question, some targets use an odd header hp-timing.h. What units
> does this use? Or is it completely undefined (and could potentially change between
> GLIBC versions)?
These change between architectures and may change between glibc versions
if an architecture adds a hardware time source read later for example.
Siddhesh
>
>
> ChangeLog:
> 2017-08-16 Wilco Dijkstra <wdijkstr@arm.com>
>
> * benchtests/bench-skeleton.c (main): Add support for
> latency benchmarking.
> * benchtests/scripts/bench.py: Add support for latency benchmarking.
> --
>
> diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
> index 3c6dad705594ac0a53edcb4e09686252c13127cf..48287be93b432b3acfc2431d1f7959bd00815b3b 100644
> --- a/benchtests/bench-skeleton.c
> +++ b/benchtests/bench-skeleton.c
> @@ -71,8 +71,10 @@ main (int argc, char **argv)
> bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
> double d_total_i = 0;
> timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
> + timing_t throughput = 0, latency = 0;
> int64_t c = 0;
> uint64_t cur;
> + BENCH_VARS;
> while (1)
> {
> if (is_bench)
> @@ -86,7 +88,16 @@ main (int argc, char **argv)
> BENCH_FUNC (v, i);
> TIMING_NOW (end);
> TIMING_DIFF (cur, start, end);
> - TIMING_ACCUM (total, cur);
> + TIMING_ACCUM (throughput, cur);
> +
> + TIMING_NOW (start);
> + for (k = 0; k < iters; k++)
> + for (i = 0; i < NUM_SAMPLES (v); i++)
> + BENCH_FUNC_LAT (v, i);
> + TIMING_NOW (end);
> + TIMING_DIFF (cur, start, end);
> + TIMING_ACCUM (latency, cur);
> +
> d_total_i += iters * NUM_SAMPLES (v);
> }
> else
> @@ -131,12 +142,18 @@ main (int argc, char **argv)
> /* Begin variant. */
> json_attr_object_begin (&json_ctx, VARIANT (v));
>
> - json_attr_double (&json_ctx, "duration", d_total_s);
> - json_attr_double (&json_ctx, "iterations", d_total_i);
> if (is_bench)
> - json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
> + {
> + json_attr_double (&json_ctx, "reciprocal throughput (ns)",
> + throughput / d_total_i);
> + json_attr_double (&json_ctx, "latency (ns)", latency / d_total_i);
> + json_attr_double (&json_ctx, "throughput (iters/s)",
> + d_total_i / throughput * 1000000000.0);
> + }
> else
> {
> + json_attr_double (&json_ctx, "duration", d_total_s);
> + json_attr_double (&json_ctx, "iterations", d_total_i);
> json_attr_double (&json_ctx, "max", max / d_iters);
> json_attr_double (&json_ctx, "min", min / d_iters);
> json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
> diff --git a/benchtests/scripts/bench.py b/benchtests/scripts/bench.py
> index 8c1c9eeb2bc67a16cb8a8e010fd2b8a2ef8ab6df..b7ccb7c8c2bf1822202a2377dfb0675516115cc5 100755
> --- a/benchtests/scripts/bench.py
> +++ b/benchtests/scripts/bench.py
> @@ -45,7 +45,7 @@ DEFINES_TEMPLATE = '''
> # variant is represented by the _VARIANT structure. The ARGS structure
> # represents a single set of arguments.
> STRUCT_TEMPLATE = '''
> -#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)
> +#define CALL_BENCH_FUNC(v, i, x) %(func)s (x %(func_args)s)
>
> struct args
> {
> @@ -84,7 +84,9 @@ EPILOGUE = '''
> #define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing)
> #define RESULT_ACCUM(r, v, i, old, new) \\
> ((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1))
> -#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
> +#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, );})
> +#define BENCH_FUNC_LAT(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, %(latarg)s);})
> +#define BENCH_VARS %(defvar)s
> #define FUNCNAME "%(func)s"
> #include "bench-skeleton.c"'''
>
> @@ -122,17 +124,22 @@ def gen_source(func, directives, all_vals):
> # If we have a return value from the function, make sure it is
> # assigned to prevent the compiler from optimizing out the
> # call.
> + getret = ''
> + latarg = ''
> + defvar = ''
> +
> if directives['ret']:
> print('static %s volatile ret;' % directives['ret'])
> - getret = 'ret = '
> - else:
> - getret = ''
> + print('static %s zero __attribute__((used)) = 0;' % directives['ret'])
> + getret = 'ret = func_res = '
> + latarg = 'func_res * zero +'
> + defvar = '%s func_res = 0;' % directives['ret']
>
> # Test initialization.
> if directives['init']:
> print('#define BENCH_INIT %s' % directives['init'])
>
> - print(EPILOGUE % {'getret': getret, 'func': func})
> + print(EPILOGUE % {'getret': getret, 'func': func, 'latarg': latarg, 'defvar': defvar })
>
>
> def _print_arg_data(func, directives, all_vals):
>
>
@@ -71,8 +71,10 @@ main (int argc, char **argv)
bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0;
double d_total_i = 0;
timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
+ timing_t throughput = 0, latency = 0;
int64_t c = 0;
uint64_t cur;
+ BENCH_VARS;
while (1)
{
if (is_bench)
@@ -86,7 +88,16 @@ main (int argc, char **argv)
BENCH_FUNC (v, i);
TIMING_NOW (end);
TIMING_DIFF (cur, start, end);
- TIMING_ACCUM (total, cur);
+ TIMING_ACCUM (throughput, cur);
+
+ TIMING_NOW (start);
+ for (k = 0; k < iters; k++)
+ for (i = 0; i < NUM_SAMPLES (v); i++)
+ BENCH_FUNC_LAT (v, i);
+ TIMING_NOW (end);
+ TIMING_DIFF (cur, start, end);
+ TIMING_ACCUM (latency, cur);
+
d_total_i += iters * NUM_SAMPLES (v);
}
else
@@ -131,12 +142,18 @@ main (int argc, char **argv)
/* Begin variant. */
json_attr_object_begin (&json_ctx, VARIANT (v));
- json_attr_double (&json_ctx, "duration", d_total_s);
- json_attr_double (&json_ctx, "iterations", d_total_i);
if (is_bench)
- json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i);
+ {
+ json_attr_double (&json_ctx, "reciprocal throughput (ns)",
+ throughput / d_total_i);
+ json_attr_double (&json_ctx, "latency (ns)", latency / d_total_i);
+ json_attr_double (&json_ctx, "throughput (iters/s)",
+ d_total_i / throughput * 1000000000.0);
+ }
else
{
+ json_attr_double (&json_ctx, "duration", d_total_s);
+ json_attr_double (&json_ctx, "iterations", d_total_i);
json_attr_double (&json_ctx, "max", max / d_iters);
json_attr_double (&json_ctx, "min", min / d_iters);
json_attr_double (&json_ctx, "mean", d_total_s / d_total_i);
@@ -45,7 +45,7 @@ DEFINES_TEMPLATE = '''
# variant is represented by the _VARIANT structure. The ARGS structure
# represents a single set of arguments.
STRUCT_TEMPLATE = '''
-#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s)
+#define CALL_BENCH_FUNC(v, i, x) %(func)s (x %(func_args)s)
struct args
{
@@ -84,7 +84,9 @@ EPILOGUE = '''
#define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing)
#define RESULT_ACCUM(r, v, i, old, new) \\
((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1))
-#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);})
+#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, );})
+#define BENCH_FUNC_LAT(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, %(latarg)s);})
+#define BENCH_VARS %(defvar)s
#define FUNCNAME "%(func)s"
#include "bench-skeleton.c"'''
@@ -122,17 +124,22 @@ def gen_source(func, directives, all_vals):
# If we have a return value from the function, make sure it is
# assigned to prevent the compiler from optimizing out the
# call.
+ getret = ''
+ latarg = ''
+ defvar = ''
+
if directives['ret']:
print('static %s volatile ret;' % directives['ret'])
- getret = 'ret = '
- else:
- getret = ''
+ print('static %s zero __attribute__((used)) = 0;' % directives['ret'])
+ getret = 'ret = func_res = '
+ latarg = 'func_res * zero +'
+ defvar = '%s func_res = 0;' % directives['ret']
# Test initialization.
if directives['init']:
print('#define BENCH_INIT %s' % directives['init'])
- print(EPILOGUE % {'getret': getret, 'func': func})
+ print(EPILOGUE % {'getret': getret, 'func': func, 'latarg': latarg, 'defvar': defvar })
def _print_arg_data(func, directives, all_vals):