[1/2] Add random benchmark

Message ID PAWPR08MB89826762EA450DD0FD209AC9832D2@PAWPR08MB8982.eurprd08.prod.outlook.com
State Committed
Delegated to: Adhemerval Zanella Netto
Headers
Series [1/2] Add random benchmark |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Testing passed
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Testing passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Testing passed

Commit Message

Wilco Dijkstra March 18, 2024, 3:17 p.m. UTC
  Add a simple benchmark to measure the overhead of internal libc locks in
the random() implementation on both single- and multi-threaded cases.
This relies on the implementation of random using internal locks to
access shared global data, and that the runtime uses multi-threaded
locking once a thread has been created (even after it finishes).

OK for commit?

---
  

Comments

Adhemerval Zanella Netto April 16, 2024, 4:55 p.m. UTC | #1
On 18/03/24 12:17, Wilco Dijkstra wrote:
> 
> Add a simple benchmark to measure the overhead of internal libc locks in
> the random() implementation on both single- and multi-threaded cases.
> This relies on the implementation of random using internal locks to
> access shared global data, and that the runtime uses multi-threaded
> locking once a thread has been created (even after it finishes).
> 
> OK for commit?

Ok with the changes below.

> 
> ---
> 
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index e1346bbda125be9fc2b216f9e8be3f2ee7cb0c4d..1ec14078ab73d7c1c0fa1d4d870a075a66543a5c 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -246,6 +246,7 @@ hash-benchset := \
>  
>  stdlib-benchset := \
>    arc4random \
> +  random-lock \
>    strtod \
>    # stdlib-benchset
>  
> diff --git a/benchtests/bench-random-lock.c b/benchtests/bench-random-lock.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..29a02ae9ff3a81114e8dd7e1dddcb3309b92df6c
> --- /dev/null
> +++ b/benchtests/bench-random-lock.c
> @@ -0,0 +1,108 @@
> +/* Benchmark internal libc locking functions used in random.
> +   Copyright (C) 2022-2023 Free Software Foundation, Inc.

s/2023/s2024.

> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define TEST_MAIN
> +#define TEST_NAME "random-lock"
> +
> +#include <pthread.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include "bench-timing.h"
> +#include "json-lib.h"
> +
> +/* Modern cores run 20M iterations in about 1 second.  */
> +#define NUM_ITERS 50000000
> +
> +json_ctx_t json_ctx;

Maybe static here.

> +
> +
> +/* Measure the overhead of __libc_lock_lock and __libc_lock_unlock by
> +   calling random ().  */
> +static void
> +bench_random_lock (size_t iters)
> +{
> +  timing_t start, stop, total;
> +
> +  srandom (0);
> +
> +  /* Warmup to reduce variations due to frequency scaling.  */
> +  for (int i = 0; i < iters / 4; i++)
> +    (void) random ();
> +
> +  TIMING_NOW (start);
> +
> +  for (int i = 0; i < iters; i++)
> +    (void) random ();
> +
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (total, start, stop);
> +
> +  json_element_double (&json_ctx, (double) total / (double) iters);
> +}
> +
> +static void *
> +thread_start (void *p)
> +{
> +  return p;
> +}
> +
> +int
> +do_bench (void)
> +{
> +  json_init (&json_ctx, 0, stdout);
> +
> +  json_document_begin (&json_ctx);
> +
> +  json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
> +  json_attr_object_begin (&json_ctx, "functions");
> +  json_attr_object_begin (&json_ctx, "random");
> +  json_attr_string (&json_ctx, "bench-variant", "single-threaded");
> +  json_array_begin (&json_ctx, "results");
> +
> +  /* Run benchmark single threaded.  */
> +  bench_random_lock (NUM_ITERS);
> +
> +  json_array_end (&json_ctx);
> +  json_attr_object_end (&json_ctx);
> +
> +  json_attr_object_begin (&json_ctx, "random");
> +  json_attr_string (&json_ctx, "bench-variant", "multi-threaded");
> +  json_array_begin (&json_ctx, "results");
> +
> +  /* Start a short thread to force SINGLE_THREAD_P == false.  This relies on
> +     the runtime disabling single-threaded optimizations when multiple
> +     threads are used, even after they finish.  */
> +
> +  pthread_t t;
> +  pthread_create (&t, NULL, thread_start, NULL);
> +  pthread_join (t, NULL);
> +
> +  /* Repeat benchmark with single-threaded optimizations disabled.  */
> +  bench_random_lock (NUM_ITERS);
> +
> +  json_array_end (&json_ctx);
> +  json_attr_object_end (&json_ctx);
> +  json_attr_object_end (&json_ctx);
> +  json_document_end (&json_ctx);
> +  return 0;
> +}
> +
> +#define TEST_FUNCTION do_bench ()
> +
> +#include "../test-skeleton.c"
> 

Use support/test-driver.c here.
  

Patch

diff --git a/benchtests/Makefile b/benchtests/Makefile
index e1346bbda125be9fc2b216f9e8be3f2ee7cb0c4d..1ec14078ab73d7c1c0fa1d4d870a075a66543a5c 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -246,6 +246,7 @@  hash-benchset := \
 
 stdlib-benchset := \
   arc4random \
+  random-lock \
   strtod \
   # stdlib-benchset
 
diff --git a/benchtests/bench-random-lock.c b/benchtests/bench-random-lock.c
new file mode 100644
index 0000000000000000000000000000000000000000..29a02ae9ff3a81114e8dd7e1dddcb3309b92df6c
--- /dev/null
+++ b/benchtests/bench-random-lock.c
@@ -0,0 +1,108 @@ 
+/* Benchmark internal libc locking functions used in random.
+   Copyright (C) 2022-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define TEST_MAIN
+#define TEST_NAME "random-lock"
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "bench-timing.h"
+#include "json-lib.h"
+
+/* Modern cores run 20M iterations in about 1 second.  */
+#define NUM_ITERS 50000000
+
+json_ctx_t json_ctx;
+
+
+/* Measure the overhead of __libc_lock_lock and __libc_lock_unlock by
+   calling random ().  */
+static void
+bench_random_lock (size_t iters)
+{
+  timing_t start, stop, total;
+
+  srandom (0);
+
+  /* Warmup to reduce variations due to frequency scaling.  */
+  for (int i = 0; i < iters / 4; i++)
+    (void) random ();
+
+  TIMING_NOW (start);
+
+  for (int i = 0; i < iters; i++)
+    (void) random ();
+
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (total, start, stop);
+
+  json_element_double (&json_ctx, (double) total / (double) iters);
+}
+
+static void *
+thread_start (void *p)
+{
+  return p;
+}
+
+int
+do_bench (void)
+{
+  json_init (&json_ctx, 0, stdout);
+
+  json_document_begin (&json_ctx);
+
+  json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
+  json_attr_object_begin (&json_ctx, "functions");
+  json_attr_object_begin (&json_ctx, "random");
+  json_attr_string (&json_ctx, "bench-variant", "single-threaded");
+  json_array_begin (&json_ctx, "results");
+
+  /* Run benchmark single threaded.  */
+  bench_random_lock (NUM_ITERS);
+
+  json_array_end (&json_ctx);
+  json_attr_object_end (&json_ctx);
+
+  json_attr_object_begin (&json_ctx, "random");
+  json_attr_string (&json_ctx, "bench-variant", "multi-threaded");
+  json_array_begin (&json_ctx, "results");
+
+  /* Start a short thread to force SINGLE_THREAD_P == false.  This relies on
+     the runtime disabling single-threaded optimizations when multiple
+     threads are used, even after they finish.  */
+
+  pthread_t t;
+  pthread_create (&t, NULL, thread_start, NULL);
+  pthread_join (t, NULL);
+
+  /* Repeat benchmark with single-threaded optimizations disabled.  */
+  bench_random_lock (NUM_ITERS);
+
+  json_array_end (&json_ctx);
+  json_attr_object_end (&json_ctx);
+  json_attr_object_end (&json_ctx);
+  json_document_end (&json_ctx);
+  return 0;
+}
+
+#define TEST_FUNCTION do_bench ()
+
+#include "../test-skeleton.c"