benchtests: Add libc locking benchmark

Message ID AM5PR0801MB16684AC3608638C3D871364C836D9@AM5PR0801MB1668.eurprd08.prod.outlook.com
State Superseded
Headers
Series benchtests: Add libc locking benchmark |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Wilco Dijkstra Aug. 18, 2022, 11:42 a.m. UTC
  Add a simple libc locking benchmark to measure overhead of locking in both
single- and multi-threaded cases.

---
  

Comments

Carlos O'Donell Aug. 18, 2022, 4:40 p.m. UTC | #1
On 8/18/22 07:42, Wilco Dijkstra via Libc-alpha wrote:
> Add a simple libc locking benchmark to measure overhead of locking in both
> single- and multi-threaded cases.

You are using rand as a proxy for __libc_lock_lock() performance.

Given that we build the benchtests/ within the same framework, can
we access glibc internals and try to benchmark the locks directly?

Just like we have some tests using internals? If it doesn't work
out easily, then I think the current direction is fine.

Suggest:
Add a simple benchmark to measure the overhead of internal libc
locks in the random() implementation on both single- and
multi-threaded cases. This assumes that random uses internal
locks to access global data.
 
> ---
> 
> diff --git a/benchtests/Makefile b/benchtests/Makefile
> index d99771be74b40f8afa3953f61c0721b19658d4b7..1bef5bf9ef477a645c3cadc66b2c25774cefb5e5 100644
> --- a/benchtests/Makefile
> +++ b/benchtests/Makefile
> @@ -236,6 +236,7 @@ hash-benchset := \
>  stdlib-benchset := \
>    arc4random \
>    strtod \
> +  libc-lock \

s/libc-lock/random-lock/g

>    # stdlib-benchset
>  
>  stdio-common-benchset := sprintf
> diff --git a/benchtests/bench-libc-lock.c b/benchtests/bench-libc-lock.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..0057c1397a3867df7515759a47ab93430f9ce466
> --- /dev/null
> +++ b/benchtests/bench-libc-lock.c
> @@ -0,0 +1,102 @@
> +/* Benchmark internal libc locking functions.

Benchmark internal libc locking functions used in random.

> +   Copyright (C) 2022 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#define TEST_MAIN
> +#define TEST_NAME "libc-locks"
> +
> +#include <pthread.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include "bench-timing.h"
> +#include "json-lib.h"
> +
> +json_ctx_t json_ctx;
> +
> +
> +/* Measure the overhead of __libc_lock_lock and __libc_lock_unlock by
> +   calling rand().  */
> +static void
> +bench_rand_lock (size_t iters)
> +{
> +  timing_t start, stop, total;
> +
> +  srand (0);
> +  for (int i = 0; i < iters / 4; i++)
> +    (void) rand();
> +
> +  TIMING_NOW (start);
> +
> +  for (int i = 0; i < iters; i++)
> +    (void) rand();
> +
> +  TIMING_NOW (stop);
> +
> +  TIMING_DIFF (total, start, stop);
> +
> +  json_element_double (&json_ctx, (double) total / (double) iters);
> +}
> +
> +static void *
> +thread_start (void *p)
> +{
> +  return p;
> +}
> +
> +int
> +do_bench (void)
> +{
> +  size_t iters = 20000000;
> +
> +  json_init (&json_ctx, 0, stdout);
> +
> +  json_document_begin (&json_ctx);
> +
> +  json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
> +  json_attr_object_begin (&json_ctx, "functions");
> +  json_attr_object_begin (&json_ctx, "rand");
> +  json_attr_string (&json_ctx, "bench-variant", "single-threaded");
> +  json_array_begin (&json_ctx, "results");
> +
> +  /* Run benchmark single threaded.  */
> +  bench_rand_lock (iters);
> +
> +  json_array_end (&json_ctx);
> +  json_attr_object_end (&json_ctx);
> +
> +  json_attr_object_begin (&json_ctx, "rand");
> +  json_attr_string (&json_ctx, "bench-variant", "multi-threaded");
> +  json_array_begin (&json_ctx, "results");
> +
> +  pthread_t t;
> +  pthread_create (&t, NULL, thread_start, NULL);
> +  pthread_join (t, NULL);
> +
> +  /* Repeat benchmark now SINGLE_THREAD_P == false.  */
> +  bench_rand_lock (iters);
> +
> +  json_array_end (&json_ctx);
> +  json_attr_object_end (&json_ctx);
> +  json_attr_object_end (&json_ctx);
> +  json_document_end (&json_ctx);
> +  return 0;
> +}
> +
> +#define TEST_FUNCTION do_bench ()
> +
> +#include "../test-skeleton.c"
> +
> 
> 
>
  
Wilco Dijkstra Aug. 18, 2022, 6:56 p.m. UTC | #2
Hi Carlos,

> You are using rand as a proxy for __libc_lock_lock() performance.
>
> Given that we build the benchtests/ within the same framework, can
> we access glibc internals and try to benchmark the locks directly?
>
> Just like we have some tests using internals? If it doesn't work
> out easily, then I think the current direction is fine.

I don't believe there are tests for locking internals - the only tests that exist
are the basic atomics and the pthread tests. Trying to include the locking
headers resulted in lots of errors. The benchmarks are dynamically linked,
so you can only test exported interfaces. I think we'd need to add extra
exported (but not officially supported) functions if we want to benchmark
internal locks directly.

Alternatively we could create a benchmark of standard lock implementations
and run them in various scenarios. This could then be used to answer questions
like whether we should prefer exchange or compare-exchange, whether locks
should always spin for a short while (pthread locks do but internal locks don't)
and further improve the internal locks.

Cheers,
Wilco
  
Carlos O'Donell Aug. 19, 2022, 1:38 a.m. UTC | #3
On 8/18/22 14:56, Wilco Dijkstra wrote:
> Hi Carlos,
> 
>> You are using rand as a proxy for __libc_lock_lock() performance.
>>
>> Given that we build the benchtests/ within the same framework, can
>> we access glibc internals and try to benchmark the locks directly?
>>
>> Just like we have some tests using internals? If it doesn't work
>> out easily, then I think the current direction is fine.
> 
> I don't believe there are tests for locking internals - the only tests that exist
> are the basic atomics and the pthread tests. Trying to include the locking
> headers resulted in lots of errors. The benchmarks are dynamically linked,
> so you can only test exported interfaces. I think we'd need to add extra
> exported (but not officially supported) functions if we want to benchmark
> internal locks directly.

Correct, there are no tests for locking internals, I just wondered if flipping the
module name and including the headers worked, but it sounds like it doesn't work out
of the box. Which is fine, I'm OK with benchmarking random() in a single-threaded and
multi-threaded scenario.
 
> Alternatively we could create a benchmark of standard lock implementations
> and run them in various scenarios. This could then be used to answer questions
> like whether we should prefer exchange or compare-exchange, whether locks
> should always spin for a short while (pthread locks do but internal locks don't)
> and further improve the internal locks.

Lets stick with benchmarking random() for now to move your other patch forward?
  

Patch

diff --git a/benchtests/Makefile b/benchtests/Makefile
index d99771be74b40f8afa3953f61c0721b19658d4b7..1bef5bf9ef477a645c3cadc66b2c25774cefb5e5 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -236,6 +236,7 @@  hash-benchset := \
 stdlib-benchset := \
   arc4random \
   strtod \
+  libc-lock \
   # stdlib-benchset
 
 stdio-common-benchset := sprintf
diff --git a/benchtests/bench-libc-lock.c b/benchtests/bench-libc-lock.c
new file mode 100644
index 0000000000000000000000000000000000000000..0057c1397a3867df7515759a47ab93430f9ce466
--- /dev/null
+++ b/benchtests/bench-libc-lock.c
@@ -0,0 +1,102 @@ 
+/* Benchmark internal libc locking functions.
+   Copyright (C) 2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#define TEST_MAIN
+#define TEST_NAME "libc-locks"
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "bench-timing.h"
+#include "json-lib.h"
+
+json_ctx_t json_ctx;
+
+
+/* Measure the overhead of __libc_lock_lock and __libc_lock_unlock by
+   calling rand().  */
+static void
+bench_rand_lock (size_t iters)
+{
+  timing_t start, stop, total;
+
+  srand (0);
+  for (int i = 0; i < iters / 4; i++)
+    (void) rand();
+
+  TIMING_NOW (start);
+
+  for (int i = 0; i < iters; i++)
+    (void) rand();
+
+  TIMING_NOW (stop);
+
+  TIMING_DIFF (total, start, stop);
+
+  json_element_double (&json_ctx, (double) total / (double) iters);
+}
+
+static void *
+thread_start (void *p)
+{
+  return p;
+}
+
+int
+do_bench (void)
+{
+  size_t iters = 20000000;
+
+  json_init (&json_ctx, 0, stdout);
+
+  json_document_begin (&json_ctx);
+
+  json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
+  json_attr_object_begin (&json_ctx, "functions");
+  json_attr_object_begin (&json_ctx, "rand");
+  json_attr_string (&json_ctx, "bench-variant", "single-threaded");
+  json_array_begin (&json_ctx, "results");
+
+  /* Run benchmark single threaded.  */
+  bench_rand_lock (iters);
+
+  json_array_end (&json_ctx);
+  json_attr_object_end (&json_ctx);
+
+  json_attr_object_begin (&json_ctx, "rand");
+  json_attr_string (&json_ctx, "bench-variant", "multi-threaded");
+  json_array_begin (&json_ctx, "results");
+
+  pthread_t t;
+  pthread_create (&t, NULL, thread_start, NULL);
+  pthread_join (t, NULL);
+
+  /* Repeat benchmark now SINGLE_THREAD_P == false.  */
+  bench_rand_lock (iters);
+
+  json_array_end (&json_ctx);
+  json_attr_object_end (&json_ctx);
+  json_attr_object_end (&json_ctx);
+  json_document_end (&json_ctx);
+  return 0;
+}
+
+#define TEST_FUNCTION do_bench ()
+
+#include "../test-skeleton.c"
+