@@ -31,6 +31,14 @@ bench-string := ffs ffsll
bench := $(bench-math) $(bench-pthread) $(bench-string)
+# Locking function benchmarks
+bench-lock-func := mutex spinlock
+bench-lock-test := randomr
+bench-lock-objs := $(foreach t, $(bench-lock-test), \
+ $(objpfx)bench-lock-tfunc-$(t).o) \
+ $(foreach l, $(bench-lock-func), \
+ $(objpfx)bench-lock-lfunc-$(l).o)
+
# String function benchmarks.
string-benchset := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \
@@ -69,6 +77,7 @@ $(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
$(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library)
$(objpfx)bench-malloc-thread: $(shared-thread-library)
+$(objpfx)bench-lock-common: $(bench-lock-objs) $(shared-thread-library) $(libm)
@@ -84,6 +93,7 @@ include ../Rules
binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset))
binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc))
+binaries-bench-lock := $(objpfx)bench-lock-common
# The default duration: 10 seconds.
ifndef BENCH_DURATION
@@ -107,7 +117,8 @@ endif
# This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
# for all these modules.
cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c) \
- $(binaries-bench-malloc:=.c)
+ $(binaries-bench-malloc:=.c) $(binaries-bench-lock:=.c) \
+ $(bench-lock-objs:%.o:%.c)
lib := nonlib
include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left))
@@ -125,9 +136,11 @@ bench-clean:
rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc))
+ rm -f $(binaries-bench-lock) $(addsuffix .o,$(binaries-bench-lock)) \
+ $(bench-lock-objs)
rm -f $(timing-type) $(addsuffix .o,$(timing-type))
-bench: $(timing-type) $(gen-locales) bench-set bench-func bench-malloc
+bench: $(timing-type) $(gen-locales) bench-lock bench-set bench-func bench-malloc
bench-set: $(binaries-benchset)
for run in $^; do \
@@ -142,6 +155,12 @@ bench-malloc: $(binaries-bench-malloc)
$(run-bench) $${thr} > $${run}-$${thr}.out; \
done
+bench-lock: $(binaries-bench-lock)
+ for run in $^; do \
+ echo "Running $${run}"; \
+ $(run-bench) 32 5 > $${run}.out; \
+ done
+
# Build and execute the benchmark functions. This target generates JSON
# formatted bench.out. Each of the programs produce independent JSON output,
# so one could even execute them individually and process it using any JSON
@@ -168,7 +187,7 @@ bench-func: $(binaries-bench)
scripts/benchout.schema.json
$(timing-type) $(binaries-bench) $(binaries-benchset) \
- $(binaries-bench-malloc): %: %.o $(objpfx)json-lib.o \
+ $(binaries-bench-malloc) $(binaries-bench-lock): %: %.o $(objpfx)json-lib.o \
$(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
$(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
$(+link)
new file mode 100644
@@ -0,0 +1,252 @@
+/* Common implementation for locking benchmarks.
+
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <math.h>
+
+#include "bench-lock-common.h"
+
+/* Some bookkeeping structures. */
+typedef uint64_t timing_t;
+typedef uint64_t result_t;
+#define RFMT "%"PRIu64
+
+/* Minimum cache line alignment. Prevent false sharing for things
+ like elided locks.
+
+ TODO: Is there an existing macro to gather this? */
+#define CALIGN 128
+
+typedef int (*lfunc_t) (void *);
+
+struct tret
+{
+ result_t iterations;
+};
+
+struct targs
+{
+ int thread_id;
+ int num_thread;
+ lfunc_t lfunc;
+ lfunc_t ulfunc;
+ pthread_barrier_t *barrier;
+ void *wdata;
+ void *ldata;
+ struct test_funcs tfuncs;
+ struct tret ret;
+} __attribute__ ((aligned (CALIGN)));
+
+static volatile int global_stop;
+
+static void
+alarm_handler (int signum)
+{
+ global_stop = 1;
+}
+
+/* A small kernel to run on each thread to attempt to run as many
+ iterations of the workload as possible before SIGALRM fires. */
+static void *
+trampoline_func (void *varg)
+{
+ struct targs *arg = (struct targs *) varg;
+
+ arg->ret.iterations = 0;
+
+ /* Do a dry run while running under the new thread. */
+ arg->wdata = arg->tfuncs.wl_init (arg->thread_id, arg->num_thread);
+ arg->lfunc (arg->ldata);
+ arg->tfuncs.wl (arg->wdata, arg->thread_id);
+ arg->ulfunc (arg->ldata);
+ pthread_barrier_wait (arg->barrier);
+
+ while (global_stop == 0)
+ {
+ arg->lfunc (arg->ldata);
+ arg->tfuncs.wl (arg->wdata, arg->thread_id);
+ arg->ulfunc (arg->ldata);
+ arg->ret.iterations++;
+ }
+ arg->tfuncs.wl_deinit (arg->wdata, arg->thread_id);
+ return varg;
+}
+
+/* This test creates a num_threads threads and runs them for time_s seconds.
+ The measurement is throughput. More is better. This should be run long
+ enough to mitigate random events. */
+static result_t
+run_test (int num_threads, struct lock_funcs *lfuncs,
+ const struct test_funcs *tfuncs, int time_s)
+{
+ long i;
+ result_t result = 0;
+ pthread_t thread[num_threads];
+ struct targs thread_arg[num_threads];
+
+ /* Coral all threads until they're created and waiting */
+ pthread_barrier_t barrier;
+ pthread_barrier_init (&barrier, NULL, num_threads + 1);
+
+ global_stop = 0;
+
+ for (i = 0; i < num_threads; i += 1)
+ {
+ thread_arg[i].thread_id = i;
+ thread_arg[i].num_thread = num_threads;
+ thread_arg[i].lfunc = lfuncs->l_lock;
+ thread_arg[i].ulfunc = lfuncs->l_unlock;
+ thread_arg[i].barrier = &barrier;
+ thread_arg[i].tfuncs = *tfuncs;
+ thread_arg[i].ldata = lfuncs->l_data;
+ pthread_create (&thread[i], NULL, trampoline_func, &thread_arg[i]);
+ }
+
+ /* Keep everyone coralled until we're ready to go. */
+ alarm (time_s);
+ pthread_barrier_wait (&barrier);
+ pause ();
+
+ for (i = 0; i < num_threads; i++)
+ {
+ /* Return value is part of targs. */
+ pthread_join (thread[i], NULL);
+ result += thread_arg[i].ret.iterations;
+ }
+
+ return result;
+}
+
+/* Run a test without our locking functions in an attempt to
+ get something resembling ideal throughput.
+ TODO: run this a few times and average? */
+static result_t __attribute__ ((noinline))
+run_sanity (const struct test_funcs *workload, int time_s)
+{
+ int dummy_func (void *arg __attribute__ ((unused)))
+ {
+ return 0;
+ };
+ struct lock_funcs dummy_funcs = {
+ .l_init = &dummy_func,
+ .l_lock = &dummy_func,
+ .l_unlock = &dummy_func,
+ .l_data = NULL,
+ };
+ return run_test (1, &dummy_funcs, workload, time_s);
+}
+
+/* Section variables for all our test data. Defined by linker. */
+extern char __start_TFUNCS;
+extern char __start_LFUNCS;
+extern char __stop_TFUNCS;
+extern char __stop_LFUNCS;
+
+#define NUM_ITER 5
+
+int
+main (int a, char **v)
+{
+ result_t sanity_res;
+ struct lock_funcs **lfuncs = (struct lock_funcs **) &__start_LFUNCS;
+ struct sigaction act;
+
+ memset (&act, 0, sizeof (act));
+ act.sa_handler = &alarm_handler;
+ sigaction (SIGALRM, &act, NULL);
+
+ if (a < 2)
+ {
+ printf ("Usage: %s: [max threads] [time]\n", v[0]);
+ exit (EXIT_FAILURE);
+ }
+
+ int threads = atoi (v[1]);
+ int time = atoi (v[2]);
+
+ for (; lfuncs < (struct lock_funcs **) &__stop_LFUNCS; lfuncs++)
+ {
+ struct test_funcs **tfuncs = (struct test_funcs **) &__start_TFUNCS;
+
+ printf ("Running %s microbenchmarks\n", (*lfuncs)->l_name);
+ if ((*lfuncs)->l_init != NULL)
+ (*lfuncs)->l_init ((*lfuncs)->l_data);
+
+ for (; tfuncs < (struct test_funcs **) &__stop_TFUNCS; tfuncs++)
+ {
+ /* Run a test without any locking, on a single thread. The
+ intent is to provide a sanity measure which requires no
+ locking. */
+ printf ("\tRunning %s workload\n", (*tfuncs)->wl_name);
+ sanity_res = run_sanity (*tfuncs, time);
+ printf ("\tbaseline : " RFMT "\n", sanity_res);
+
+ for (int tcnt = 1; tcnt <= threads;)
+ {
+ result_t result[NUM_ITER];
+
+ printf ("\tthreads %4d: ", tcnt);
+ fflush (stdout);
+
+ for (int t = 0; t < NUM_ITER; t++)
+ {
+ result[t] = run_test (tcnt, *lfuncs, *tfuncs, time);
+ printf (RFMT " ", result[t]);
+ fflush (stdout);
+ }
+
+ /* Compute deviation and average. */
+ result_t avg = 0;
+ double var = 0;
+ for (int i = 0; i < NUM_ITER; i++)
+ avg += result[i];
+ avg /= NUM_ITER;
+ for (int i = 0; i < NUM_ITER; i++)
+ {
+ double tmp = (double) result[i] - (double) avg;
+ var += tmp * tmp;
+ }
+ var /= NUM_ITER;
+ var = sqrt (var);
+
+ printf (" avg: " RFMT " dev: %d\n", avg, (int) var);
+
+ /* Increase the step size as incremental results are
+ less interesting as the thread count grows. */
+ if (tcnt < 4)
+ tcnt += 1;
+ else if (tcnt < 16)
+ tcnt += 2;
+ else if (tcnt < 64)
+ tcnt += 4;
+ else
+ tcnt += 8;
+ }
+ }
+ }
+
+ return EXIT_SUCCESS;
+}
new file mode 100644
@@ -0,0 +1,50 @@
+/* Shared header for locking benchmarks.
+
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _BENCH_LOCK_COMMON_H
+#define _BENCH_LOCK_COMMON_H 1
+
+/* Common header like stuff for tests. */
+struct test_funcs
+{
+ void *(*wl_init) (int id, int nid);
+ void (*wl_deinit) (void *, int id);
+ void (*wl) (void *, int id);
+ const char *wl_name;
+};
+
+struct lock_funcs
+{
+ int (*l_init) (void *);
+ int (*l_lock) (void *);
+ int (*l_unlock) (void *);
+ void *l_data;
+ const char *l_name;
+};
+
+/* Helper macros for declaring test workloads and functions.
+ Pointers are inserted into special sections to simplify
+ adding new tests. */
+#define TO_LFUNC(func) ((int(*)(void*)) func)
+#define DECL_LFUNC(func) static struct lock_funcs *_lfptr_##func \
+ __attribute__ ((section ("LFUNCS"))) __attribute__ ((used)) = &func;
+#define DECL_TFUNC(func) static struct test_funcs *_lfptr_##func \
+ __attribute__ ((section ("TFUNCS"))) __attribute__ ((used)) = &func;
+
+#endif /* bench-lock-common.h */
new file mode 100644
@@ -0,0 +1,34 @@
+/* pthread mutex lock performance hooks.
+
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <pthread.h>
+
+#include "bench-lock-common.h"
+
+/* Define some tests for the mutex. */
+static pthread_mutex_t p_mutex = PTHREAD_MUTEX_INITIALIZER;
+static struct lock_funcs smutex_lock_funcs = {
+ .l_init = NULL,
+ .l_lock = TO_LFUNC (&pthread_mutex_lock),
+ .l_unlock = TO_LFUNC (&pthread_mutex_unlock),
+ .l_data = (void *) &p_mutex,
+ .l_name = "pthread_mutex_lock (standard)",
+};
+
+DECL_LFUNC (smutex_lock_funcs);
new file mode 100644
@@ -0,0 +1,41 @@
+/* pthread spinlock implementation for lock benchmark.
+
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <pthread.h>
+
+#include "bench-lock-common.h"
+
+/* Test structures for the mutex workload. */
+static int
+init_pthread_spin_lock (void *data)
+{
+ return pthread_spin_init ((pthread_spinlock_t *) data, 0);
+}
+
+static pthread_spinlock_t spin_lock;
+
+static struct lock_funcs spin_lock_funcs = {
+ .l_init = &init_pthread_spin_lock,
+ .l_lock = TO_LFUNC (&pthread_spin_lock),
+ .l_unlock = TO_LFUNC (&pthread_spin_unlock),
+ .l_data = (void *) &spin_lock,
+ .l_name = "pthread_spin_lock",
+};
+
+DECL_LFUNC (spin_lock_funcs);
new file mode 100644
@@ -0,0 +1,76 @@
+/* random_r() test driver for locking benchmarks.
+
+ Copyright (C) 2015 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <unistd.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "bench-lock-common.h"
+
+struct rdata
+{
+ char rbuf[64];
+ struct random_data rstate;
+};
+
+static void *
+random_wl_init (int id, int nid)
+{
+ struct rdata *data = calloc (sizeof (*data), 1);
+ if (!data)
+ {
+ perror ("calloc");
+ exit (EXIT_FAILURE);
+ }
+
+ for (int i = 0; i < sizeof (data->rbuf); i++)
+ data->rbuf[i] = i + 1;
+
+ if (initstate_r (1, data->rbuf, sizeof (data->rbuf), &data->rstate))
+ {
+ perror ("initstate_r");
+ exit (EXIT_FAILURE);
+ }
+ return data;
+}
+
+static void
+random_wl (void *vdata, int i)
+{
+ struct rdata *data = (struct rdata *) vdata;
+ int32_t val;
+ for (int i = 0; i < 5; i += 1)
+ random_r (&data->rstate, &val);
+}
+
+static void
+random_wl_deinit (void *data, int id)
+{
+ free (data);
+}
+
+struct test_funcs random_wl_nodep = {
+ .wl_init = &random_wl_init,
+ .wl_deinit = &random_wl_deinit,
+ .wl = &random_wl,
+ .wl_name = "random_no_interdep",
+};
+
+DECL_TFUNC (random_wl_nodep);