@@ -1,5 +1,11 @@
2018-04-24 Kemi Wang <kemi.wang@intel.com>
+ * benchtests/bench-mutex-adaptive-thread.c: Microbenchmark for adaptive
+ spin mutex.
+ * benchmark/Makefile: Add adaptive spin mutex benchmark.
+
+2018-04-24 Kemi Wang <kemi.wang@intel.com>
+
* elf/dl-tunables.list: Add glibc.mutex.spin_count entry.
* manual/tunables.texi: Add glibc.mutex.spin_count description.
* nptl/Makefile: Add pthread_mutex_conf.c for compilation.
@@ -95,10 +95,17 @@ else
bench-malloc := $(filter malloc-%,${BENCHSET})
endif
+ifeq (${BENCHSET},)
+bench-mutex := mutex-adaptive-thread
+else
+bench-mutex := $(filter mutex-%,${BENCHSET})
+endif
+
$(addprefix $(objpfx)bench-,$(bench-math)): $(libm)
$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm)
$(addprefix $(objpfx)bench-,$(bench-pthread)): $(shared-thread-library)
$(objpfx)bench-malloc-thread: $(shared-thread-library)
+$(addprefix $(objpfx)bench-,$(bench-mutex)): $(shared-thread-library)
@@ -119,6 +126,7 @@ include ../Rules
binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset))
binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc))
+binaries-bench-mutex := $(addprefix $(objpfx)bench-,$(bench-mutex))
# The default duration: 10 seconds.
ifndef BENCH_DURATION
@@ -142,7 +150,7 @@ endif
# This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
# for all these modules.
cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c) \
- $(binaries-bench-malloc:=.c)
+ $(binaries-bench-malloc:=.c) $(binaries-bench-mutex:=.c)
lib := nonlib
include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
@@ -158,6 +166,7 @@ bench-clean:
rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc))
+ rm -f $(binaries-bench-mutex) $(addsuffix .o,$(binaries-bench-mutex))
rm -f $(timing-type) $(addsuffix .o,$(timing-type))
rm -f $(addprefix $(objpfx),$(bench-extra-objs))
@@ -165,7 +174,7 @@ bench-clean:
ifneq ($(strip ${BENCHSET}),)
VALIDBENCHSETNAMES := bench-pthread bench-math bench-string string-benchset \
wcsmbs-benchset stdlib-benchset stdio-common-benchset math-benchset \
- malloc-thread
+ malloc-thread mutex-adaptive-thread
INVALIDBENCHSETNAMES := $(filter-out ${VALIDBENCHSETNAMES},${BENCHSET})
ifneq (${INVALIDBENCHSETNAMES},)
$(info The following values in BENCHSET are invalid: ${INVALIDBENCHSETNAMES})
@@ -176,7 +185,7 @@ endif
# Define the bench target only if the target has a usable python installation.
ifdef PYTHON
-bench: bench-build bench-set bench-func bench-malloc
+bench: bench-build bench-set bench-func bench-malloc bench-mutex
else
bench:
@echo "The bench target needs python to run."
@@ -187,10 +196,10 @@ endif
# only if we're building natively.
ifeq (no,$(cross-compiling))
bench-build: $(gen-locales) $(timing-type) $(binaries-bench) \
- $(binaries-benchset) $(binaries-bench-malloc)
+ $(binaries-benchset) $(binaries-bench-malloc) $(binaries-bench-mutex)
else
bench-build: $(timing-type) $(binaries-bench) $(binaries-benchset) \
- $(binaries-bench-malloc)
+ $(binaries-bench-malloc) $(binaries-bench-mutex)
endif
bench-set: $(binaries-benchset)
@@ -207,6 +216,19 @@ bench-malloc: $(binaries-bench-malloc)
done;\
done
+# Run benchmark with 1, 2, 3, nproc/2, nproc threads
+bench-mutex: $(binaries-bench-mutex)
+ for run in $^; do \
+ prev=0; \
+ for thr in 1 2 3 4 $$((`nproc` / 4)) $$((`nproc` / 2)) `nproc`; do \
+ if [ $$thr -gt $$prev -a $$thr -lt `nproc` ]; then \
+ echo "Running $${run} $${thr}"; \
+ fi; \
+ prev=$$thr; \
+ $(run-bench) $${thr} > $${run}-$${thr}.out; \
+ done;\
+ done
+
# Build and execute the benchmark functions. This target generates JSON
# formatted bench.out. Each of the programs produce independent JSON output,
# so one could even execute them individually and process it using any JSON
@@ -236,8 +258,8 @@ bench-func: $(binaries-bench)
fi
$(timing-type) $(binaries-bench) $(binaries-benchset) \
- $(binaries-bench-malloc): %: %.o $(objpfx)json-lib.o \
- $(link-extra-libs-tests) \
+ $(binaries-bench-malloc) $(binaries-bench-mutex): \
+ %: %.o $(objpfx)json-lib.o $(link-extra-libs-tests) \
$(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
$(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
$(+link-tests)
new file mode 100644
@@ -0,0 +1,230 @@
+/* Benchmark pthread adaptive spin mutex lock and unlock functions.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "bench-timing.h"
+#include "json-lib.h"
+
+/* Benchmark duration in seconds. */
+#define BENCHMARK_DURATION 15
+
+#define TYPE PTHREAD_MUTEX_ADAPTIVE_NP
+
+static unsigned long long val;
+static pthread_mutexattr_t attr;
+static pthread_mutex_t mutex;
+
+#define WORKING_SET_SIZE 4
+int working_set[] = {1, 10, 100, 1000};
+
+struct thread_args
+{
+ unsigned long long iters;
+ int working_set;
+ timing_t elapsed;
+};
+
+static void init_mutex (void)
+{
+ pthread_mutexattr_init (&attr);
+ pthread_mutexattr_settype (&attr, TYPE);
+ pthread_mutex_init (&mutex, &attr);
+}
+
+static void init_parameter (int size, struct thread_args *args,
+ int num_thread)
+{
+ int i;
+ for (i = 0; i < num_thread; i++)
+{
+ memset(&args[i], 0, sizeof(struct thread_args));
+ args[i].working_set = size;
+}
+}
+
+static volatile bool timeout;
+
+static void
+alarm_handler (int signum)
+{
+ timeout = true;
+}
+
+/* Lock and unlock for protecting the critical section. */
+static unsigned long long
+mutex_benchmark_loop (int size)
+{
+ volatile int count;
+ unsigned long long iters = 0;
+
+ while (!timeout)
+ {
+ count = size;
+ pthread_mutex_lock (&mutex);
+ while (count > 0)
+ {
+ val++;
+ count--;
+ }
+ pthread_mutex_unlock (&mutex);
+ iters++;
+ }
+ return iters;
+}
+
+static void *
+benchmark_thread (void *arg)
+{
+ struct thread_args *args = (struct thread_args *) arg;
+ unsigned long long iters;
+ timing_t start, stop;
+
+ TIMING_NOW (start);
+ iters = mutex_benchmark_loop (args->working_set);
+ TIMING_NOW (stop);
+
+ TIMING_DIFF (args->elapsed, start, stop);
+ args->iters = iters;
+
+ return NULL;
+}
+
+static void
+do_benchmark (size_t num_thread, struct thread_args *args)
+{
+
+ if (num_thread == 1)
+ {
+ timing_t start, stop;
+
+ TIMING_NOW (start);
+ args->iters = mutex_benchmark_loop (args->working_set);
+ TIMING_NOW (stop);
+
+ TIMING_DIFF (args->elapsed, start, stop);
+ }
+ else
+ {
+ pthread_t threads[num_thread];
+
+ for (size_t i = 0; i < num_thread; i++)
+ pthread_create(&threads[i], NULL, benchmark_thread, args + i);
+
+ for (size_t i = 0; i < num_thread; i++)
+ pthread_join(threads[i], NULL);
+ }
+}
+
+static void usage(const char *name)
+{
+ fprintf (stderr, "%s: <num_thread>\n", name);
+ exit (1);
+}
+
+int
+main (int argc, char **argv)
+{
+ int i, j, num_thread = 1;
+ json_ctx_t json_ctx;
+ struct sigaction act;
+
+ if (argc == 1)
+ num_thread = 1;
+ else if (argc == 2)
+ {
+ long ret;
+
+ errno = 0;
+ ret = strtol(argv[1], NULL, 10);
+
+ if (errno || ret == 0)
+ usage(argv[0]);
+
+ num_thread = ret;
+ }
+ else
+ usage(argv[0]);
+
+ /* Benchmark for different critical section size */
+ for (i = 0; i < WORKING_SET_SIZE; i++)
+{
+ int size = working_set[i];
+ struct thread_args args[num_thread];
+ unsigned long long iters = 0, min_iters = -1ULL, max_iters = 0;
+ double d_total_s = 0, d_total_i = 0;
+
+ timeout = false;
+ init_mutex ();
+ init_parameter (size, args, num_thread);
+
+ json_init (&json_ctx, 0, stdout);
+
+ json_document_begin (&json_ctx);
+
+ json_attr_string (&json_ctx, "timing_type", TIMING_TYPE);
+
+ json_attr_object_begin (&json_ctx, "functions");
+
+ json_attr_object_begin (&json_ctx, "mutex");
+
+ json_attr_object_begin (&json_ctx, "");
+
+ memset (&act, 0, sizeof (act));
+ act.sa_handler = &alarm_handler;
+
+ sigaction (SIGALRM, &act, NULL);
+
+ alarm (BENCHMARK_DURATION);
+
+ do_benchmark (num_thread, args);
+
+ for (j = 0; j < num_thread; j++)
+{
+ iters = args[j].iters;
+ if (iters < min_iters)
+ min_iters = iters;
+ if (iters >= max_iters)
+ max_iters = iters;
+ d_total_i += iters;
+ TIMING_ACCUM (d_total_s, args[j].elapsed);
+}
+ json_attr_double (&json_ctx, "duration", d_total_s);
+ json_attr_double (&json_ctx, "total_iterations", d_total_i);
+ json_attr_double (&json_ctx, "min_iteration", min_iters);
+ json_attr_double (&json_ctx, "max_iteration", max_iters);
+ json_attr_double (&json_ctx, "time_per_iteration", d_total_s / d_total_i);
+ json_attr_double (&json_ctx, "threads", num_thread);
+ json_attr_double (&json_ctx, "critical_section_size", size);
+
+ json_attr_object_end (&json_ctx);
+ json_attr_object_end (&json_ctx);
+ json_attr_object_end (&json_ctx);
+
+ json_document_end (&json_ctx);
+ fputs("\n", (&json_ctx)->fp);
+}
+ return 0;
+}