[v8,3/9] benchtests: Add arc4random benchtest

Message ID 20220629213428.3065430-4-adhemerval.zanella@linaro.org (mailing list archive)
State Superseded
Series Add arc4random support |


Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Netto June 29, 2022, 9:34 p.m. UTC
  It shows both throughput (total bytes obtained in the test duration)
and latecy for both arc4random and arc4random_buf with different

Checked on x86_64-linux-gnu, aarch64-linux, and powerpc64le-linux-gnu.
 benchtests/Makefile                  |   5 +-
 benchtests/bench-arc4random.c        | 218 +++++++++++++++++++++++++++
 benchtests/bench-hash-funcs-kernel.h |   1 +
 benchtests/bench-hash-funcs.c        |   2 -
 benchtests/bench-util.h              |   6 +
 5 files changed, 229 insertions(+), 3 deletions(-)
 create mode 100644 benchtests/bench-arc4random.c


Florian Weimer July 12, 2022, 9:29 a.m. UTC | #1
* Adhemerval Zanella:

> diff --git a/benchtests/bench-util.h b/benchtests/bench-util.h
> index d0e29423aa..52eded5590 100644
> --- a/benchtests/bench-util.h
> +++ b/benchtests/bench-util.h
> @@ -16,6 +16,12 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
> +/* Prevent compiler to optimize away call.  */
> +#define DO_NOT_OPTIMIZE_OUT(__value)		\
> +  ({						\
> +    __typeof (__value) __v = (__value);		\
> +    asm volatile ("" : : "r,m" (__v) : "memory");\
> +  })

__ is not needed for macro arguments.  I think the macro would be more
useful if it returned __v.  Could you put this into a separate patch?

Adhemerval Zanella Netto July 12, 2022, 5:26 p.m. UTC | #2
On 12/07/22 06:29, Florian Weimer wrote:
> * Adhemerval Zanella:
>> diff --git a/benchtests/bench-util.h b/benchtests/bench-util.h
>> index d0e29423aa..52eded5590 100644
>> --- a/benchtests/bench-util.h
>> +++ b/benchtests/bench-util.h
>> @@ -16,6 +16,12 @@
>>      License along with the GNU C Library; if not, see
>>      <https://www.gnu.org/licenses/>.  */
>> +/* Prevent compiler to optimize away call.  */
>> +#define DO_NOT_OPTIMIZE_OUT(__value)		\
>> +  ({						\
>> +    __typeof (__value) __v = (__value);		\
>> +    asm volatile ("" : : "r,m" (__v) : "memory");\
>> +  })
> __ is not needed for macro arguments.  I think the macro would be more
> useful if it returned __v.  Could you put this into a separate patch?
> Thanks.

I think it should be ok to update this patch, it is only used
on hash benchmark currently.


diff --git a/benchtests/Makefile b/benchtests/Makefile
index c279041e19..d99771be74 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -233,7 +233,10 @@  hash-benchset := \
   nss-hash \
 # hash-benchset
-stdlib-benchset := strtod
+stdlib-benchset := \
+  arc4random \
+  strtod \
+  # stdlib-benchset
 stdio-common-benchset := sprintf
diff --git a/benchtests/bench-arc4random.c b/benchtests/bench-arc4random.c
new file mode 100644
index 0000000000..d8fd40298e
--- /dev/null
+++ b/benchtests/bench-arc4random.c
@@ -0,0 +1,218 @@ 
+/* arc4random benchmarks.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   Lesser General Public License for more details.
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+#include "bench-timing.h"
+#include "bench-util.h"
+#include "json-lib.h"
+#include <array_length.h>
+#include <intprops.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <support/support.h>
+#include <support/timespec.h>
+#include <support/xthread.h>
+static volatile sig_atomic_t timer_finished;
+static void timer_callback (int unused)
+  timer_finished = 1;
+static timer_t timer;
+/* Run for approximately DURATION seconds, and it does not matter who
+   receive the signal (so not need to mask it on main thread).  */
+static void
+timer_start (void)
+  timer_finished = 0;
+  timer = support_create_timer (DURATION, 0, false, timer_callback);
+static void
+timer_stop (void)
+  support_delete_timer (timer);
+static const uint32_t sizes[] = { 0, 16, 32, 48, 64, 80, 96, 112, 128 };
+static double
+bench_throughput (void)
+  uint64_t n = 0;
+  struct timespec start, end;
+  clock_gettime (CLOCK_MONOTONIC, &start);
+  while (1)
+    {
+      DO_NOT_OPTIMIZE_OUT (arc4random ());
+      n++;
+      if (timer_finished == 1)
+	break;
+    }
+  clock_gettime (CLOCK_MONOTONIC, &end);
+  struct timespec diff = timespec_sub (end, start);
+  double total = (double) n * sizeof (uint32_t);
+  double duration = (double) diff.tv_sec
+    + (double) diff.tv_nsec / TIMESPEC_HZ;
+  return total / duration;
+static double
+bench_latency (void)
+  timing_t start, stop, cur;
+  const size_t iters = 1024;
+  TIMING_NOW (start);
+  for (size_t i = 0; i < iters; i++)
+    DO_NOT_OPTIMIZE_OUT (arc4random ());
+  TIMING_NOW (stop);
+  TIMING_DIFF (cur, start, stop);
+  return (double) (cur) / (double) iters;
+static double
+bench_buf_throughput (size_t len)
+  uint8_t buf[len];
+  uint64_t n = 0;
+  struct timespec start, end;
+  clock_gettime (CLOCK_MONOTONIC, &start);
+  while (1)
+    {
+      arc4random_buf (buf, len);
+      n++;
+      if (timer_finished == 1)
+	break;
+    }
+  clock_gettime (CLOCK_MONOTONIC, &end);
+  struct timespec diff = timespec_sub (end, start);
+  double total = (double) n * len;
+  double duration = (double) diff.tv_sec
+    + (double) diff.tv_nsec / TIMESPEC_HZ;
+  return total / duration;
+static double
+bench_buf_latency (size_t len)
+  timing_t start, stop, cur;
+  const size_t iters = 1024;
+  uint8_t buf[len];
+  TIMING_NOW (start);
+  for (size_t i = 0; i < iters; i++)
+    arc4random_buf (buf, len);
+  TIMING_NOW (stop);
+  TIMING_DIFF (cur, start, stop);
+  return (double) (cur) / (double) iters;
+static void
+bench_singlethread (json_ctx_t *json_ctx)
+  json_element_object_begin (json_ctx);
+  json_array_begin (json_ctx, "throughput");
+  for (int i = 0; i < array_length (sizes); i++)
+    {
+      timer_start ();
+      double r = sizes[i] == 0
+	? bench_throughput () : bench_buf_throughput (sizes[i]);
+      timer_stop ();
+      json_element_double (json_ctx, r);
+    }
+  json_array_end (json_ctx);
+  json_array_begin (json_ctx, "latency");
+  for (int i = 0; i < array_length (sizes); i++)
+    {
+      timer_start ();
+      double r = sizes[i] == 0
+	? bench_latency () : bench_buf_latency (sizes[i]);
+      timer_stop ();
+      json_element_double (json_ctx, r);
+    }
+  json_array_end (json_ctx);
+  json_element_object_end (json_ctx);
+static void
+run_bench (json_ctx_t *json_ctx, const char *name,
+	   char *const*fnames, size_t fnameslen,
+	   void (*bench) (json_ctx_t *ctx))
+  json_attr_object_begin (json_ctx, name);
+  json_array_begin (json_ctx, "functions");
+  for (int i = 0; i < fnameslen; i++)
+    json_element_string (json_ctx, fnames[i]);
+  json_array_end (json_ctx);
+  json_array_begin (json_ctx, "results");
+  bench (json_ctx);
+  json_array_end (json_ctx);
+  json_attr_object_end (json_ctx);
+static int
+do_test (void)
+  char *fnames[array_length (sizes)];
+  for (int i = 0; i < array_length (sizes); i++)
+    if (sizes[i] == 0)
+      fnames[i] = xasprintf ("arc4random");
+    else
+      fnames[i] = xasprintf ("arc4random_buf(%u)", sizes[i]);
+  json_ctx_t json_ctx;
+  json_init (&json_ctx, 0, stdout);
+  json_document_begin (&json_ctx);
+  json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
+  run_bench (&json_ctx, "single-thread", fnames, array_length (fnames),
+	     bench_singlethread);
+  json_document_end (&json_ctx);
+  for (int i = 0; i < array_length (sizes); i++)
+    free (fnames[i]);
+  return 0;
+#include <support/test-driver.c>
diff --git a/benchtests/bench-hash-funcs-kernel.h b/benchtests/bench-hash-funcs-kernel.h
index 83995cc0ae..63034f7e44 100644
--- a/benchtests/bench-hash-funcs-kernel.h
+++ b/benchtests/bench-hash-funcs-kernel.h
@@ -17,6 +17,7 @@ 
    <https://www.gnu.org/licenses/>.  */
+#include "bench-util.h"
 /* We go through the trouble of using macros here because many of the
    hash functions are meant to be inlined so its not fair to benchmark
diff --git a/benchtests/bench-hash-funcs.c b/benchtests/bench-hash-funcs.c
index 578c5cbae2..44b349d30c 100644
--- a/benchtests/bench-hash-funcs.c
+++ b/benchtests/bench-hash-funcs.c
@@ -38,8 +38,6 @@ 
 #include <stdlib.h>
 #include <string.h>
-#define DO_NOT_OPTIMIZE_OUT(x) __asm__ volatile("" : : "r,m"(x) : "memory")
   NFIXED_ITERS = 1048576,
diff --git a/benchtests/bench-util.h b/benchtests/bench-util.h
index d0e29423aa..52eded5590 100644
--- a/benchtests/bench-util.h
+++ b/benchtests/bench-util.h
@@ -16,6 +16,12 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
+/* Prevent compiler to optimize away call.  */
+#define DO_NOT_OPTIMIZE_OUT(__value)		\
+  ({						\
+    __typeof (__value) __v = (__value);		\
+    asm volatile ("" : : "r,m" (__v) : "memory");\
+  })
 #ifndef START_ITER
 # define START_ITER (100000000)