diff --git a/NEWS b/NEWS
index cd29ec7..0764d0d 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,9 @@ Version 2.29
Major new features:
+* NUMA spinlock is added to provide a spinlock implementation optimized
+ for multi-socket NUMA systems.
+
* The getcpu wrapper function has been added, which returns the currently
used CPU and NUMA node. This function is Linux-specific.
diff --git a/manual/examples/numa-spinlock.c b/manual/examples/numa-spinlock.c
new file mode 100644
index 0000000..ca98443
--- /dev/null
+++ b/manual/examples/numa-spinlock.c
@@ -0,0 +1,99 @@
+/* NUMA spinlock example.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ .
+*/
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define NUM_THREADS 20
+
+struct numa_spinlock *lock;
+
+struct work_todo_argument
+{
+ void *arg;
+};
+
+static void *
+work_todo (void *v)
+{
+ /* Do the real work with p->arg. */
+ struct work_todo_argument *p = v;
+ /* Return value is set to lock_info.result. */
+ return NULL;
+}
+
+void *
+work_thread (void *arg)
+{
+ struct work_todo_argument work_todo_arg;
+ struct numa_spinlock_info lock_info;
+
+ if (numa_spinlock_init (lock, &lock_info))
+ {
+ printf ("numa_spinlock_init failure: %m\n");
+ exit (1);
+ }
+
+ work_todo_arg.arg = arg;
+ lock_info.argument = &work_todo_arg;
+ lock_info.workload = work_todo;
+
+ numa_spinlock_apply (&lock_info);
+
+ return lock_info.result;
+}
+
+int
+main (int argc, char **argv)
+{
+ lock = numa_spinlock_alloc ();
+ pthread_t thr[NUM_THREADS];
+ void *res[NUM_THREADS];
+ int numthreads = NUM_THREADS;
+ int i;
+
+ for (i = 0; i < NUM_THREADS; i++)
+ {
+ int err_ret = pthread_create (&thr[i], NULL, work_thread,
+ (void *) (intptr_t) i);
+ if (err_ret != 0)
+ {
+ printf ("pthread_create failed: %d, %s\n",
+ i, strerror (i));
+ numthreads = i;
+ break;
+ }
+ }
+
+ for (i = 0; i < numthreads; i++)
+ {
+ if (pthread_join (thr[i], (void *) &res[i]) == 0)
+ free (res[i]);
+ else
+ printf ("pthread_join failure: %m\n");
+ }
+
+ numa_spinlock_free (lock);
+
+ return 0;
+}
diff --git a/manual/threads.texi b/manual/threads.texi
index 87fda7d..e82ae0d 100644
--- a/manual/threads.texi
+++ b/manual/threads.texi
@@ -625,6 +625,9 @@ the standard.
@menu
* Default Thread Attributes:: Setting default attributes for
threads in a process.
+* NUMA Spinlock:: Spinlock optimized for
+ multi-socket NUMA platform.
+* NUMA Spinlock Example:: A NUMA spinlock example.
@end menu
@node Default Thread Attributes
@@ -669,6 +672,108 @@ The system does not have sufficient memory.
@end table
@end deftypefun
+@node NUMA Spinlock
+@subsubsection Spinlock optimized for multi-node NUMA systems
+
+To improve performance on multi-socket NUMA platforms for serialized
+region protected by spinlock, @theglibc{} implements a NUMA spinlock
+object, which minimizes cross-socket traffic and sends the protected
+serialized region to one core for execution to reduce spinlock contention
+overhead.
+
+The fundamental data types for a NUMA spinlock are
+@code{numa_spinlock} and @code{numa_spinlock_info}:
+
+@deftp {Data Type} {struct numa_spinlock}
+@standards{Linux, numa-spinlock.h}
+This data type is an opaque structure. A @code{numa_spinlock} pointer
+uniquely identifies a NUMA spinlock object.
+@end deftp
+
+@deftp {Data Type} {struct numa_spinlock_info}
+@standards{Linux, numa-spinlock.h}
+
+This data type uniquely identifies a NUMA spinlock information object for
+a thread. It has the following members, and others internal to NUMA
+spinlock implemenation:
+
+@table @code
+@item void *(*workload) (void *)
+A function pointer to the workload function serialized by spinlock.
+@item void *argument
+A pointer to argument passed to the @var{workload} function pointer.
+@item void *result
+Return value from the @var{workload} function pointer.
+@end table
+
+@end deftp
+
+The following functions are provided for NUMA spinlock objects:
+
+@deftypefun struct numa_spinlock *numa_spinlock_alloc (void)
+@standards{Linux, numa-spinlock.h}
+@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}}
+
+This function returns a pointer to a newly allocated NUMA spinlock or a
+null pointer if the NUMA spinlock could not be allocated, setting
+@code{errno} to @code{ENOMEM}. Caller should call
+@code{numa_spinlock_free} on the NUMA spinlock pointer to free the
+memory space when it is no longer needed.
+
+This function is Linux-specific and is declared in @file{numa-spinlock.h}.
+@end deftypefun
+
+@deftypefun void numa_spinlock_free (struct numa_spinlock *@var{lock})
+@standards{Linux, numa-spinlock.h}
+@safety{@prelim{}@mtsafe{}@asunsafe{@asulock{}}@acunsafe{@aculock{} @acsfd{} @acsmem{}}}
+
+Free the memory space pointed to by @var{lock}, which must have been
+returned by a previous call to @code{numa_spinlock_alloc}. Otherwise,
+or if @code{numa_spinlock_free (@var{lock})} has already been called
+before, undefined behavior occurs.
+
+This function is Linux-specific and is declared in @file{numa-spinlock.h}.
+@end deftypefun
+
+@deftypefun int numa_spinlock_init (struct numa_spinlock *@var{lock},
+struct numa_spinlock_info *@var{info})
+@standards{Linux, numa-spinlock.h}
+@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
+
+Initialize the NUMA spinlock information block pointed to by @var{info}
+with a NUMA spinlock pointer @var{lock}. The return value is @code{0} on
+success and @code{-1} on failure. The following @code{errno} error
+codes are defined for this function:
+
+@table @code
+@item ENOSYS
+The operating system does not support the @code{getcpu} function.
+@end table
+
+This function is Linux-specific and is declared in @file{numa-spinlock.h}.
+@end deftypefun
+
+@deftypefun void numa_spinlock_apply (struct numa_spinlock_info *@var{info})
+@standards{Linux, numa-spinlock.h}
+@safety{@prelim{}@mtsafe{}@assafe{}@acsafe{}}
+
+Apply for spinlock with a NUMA spinlock information block pointed to by
+@var{info}. When @code{numa_spinlock_apply} returns, the spinlock is
+released and the @var{result} member of @var{info} contains the return
+value of the @var{workload} member.
+
+This function is Linux-specific and is declared in @file{numa-spinlock.h}.
+@end deftypefun
+
+@node NUMA Spinlock Example
+@subsubsection NUMA Spinlock Example
+
+A NUMA spinlock example:
+
+@smallexample
+@include numa-spinlock.c.texi
+@end smallexample
+
@c FIXME these are undocumented:
@c pthread_atfork
@c pthread_attr_destroy
diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
index f827455..3361597 100644
--- a/sysdeps/unix/sysv/linux/Makefile
+++ b/sysdeps/unix/sysv/linux/Makefile
@@ -222,6 +222,8 @@ CFLAGS-gai.c += -DNEED_NETLINK
endif
ifeq ($(subdir),nptl)
+libpthread-sysdep_routines += numa_spinlock_alloc numa-spinlock
+sysdep_headers += numa-spinlock.h
tests += tst-align-clone tst-getpid1 \
tst-thread-affinity-pthread tst-thread-affinity-pthread2 \
tst-thread-affinity-sched
diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
index f1e12d9..7ce7e2b 100644
--- a/sysdeps/unix/sysv/linux/Versions
+++ b/sysdeps/unix/sysv/linux/Versions
@@ -185,3 +185,12 @@ libc {
__netlink_assert_response;
}
}
+
+libpthread {
+ GLIBC_2.29 {
+ numa_spinlock_alloc;
+ numa_spinlock_free;
+ numa_spinlock_init;
+ numa_spinlock_apply;
+ }
+}
diff --git a/sysdeps/unix/sysv/linux/aarch64/libpthread.abilist b/sysdeps/unix/sysv/linux/aarch64/libpthread.abilist
index 9a9e4ce..eb54a83 100644
--- a/sysdeps/unix/sysv/linux/aarch64/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libpthread.abilist
@@ -243,3 +243,7 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
diff --git a/sysdeps/unix/sysv/linux/alpha/libpthread.abilist b/sysdeps/unix/sysv/linux/alpha/libpthread.abilist
index b413007..dd08796 100644
--- a/sysdeps/unix/sysv/linux/alpha/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/alpha/libpthread.abilist
@@ -227,6 +227,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/arm/libpthread.abilist b/sysdeps/unix/sysv/linux/arm/libpthread.abilist
index af82a4c..45a5c5a 100644
--- a/sysdeps/unix/sysv/linux/arm/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/arm/libpthread.abilist
@@ -27,6 +27,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.4 _IO_flockfile F
GLIBC_2.4 _IO_ftrylockfile F
GLIBC_2.4 _IO_funlockfile F
diff --git a/sysdeps/unix/sysv/linux/csky/libpthread.abilist b/sysdeps/unix/sysv/linux/csky/libpthread.abilist
index ea4b79a..cf65f72 100644
--- a/sysdeps/unix/sysv/linux/csky/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/csky/libpthread.abilist
@@ -73,6 +73,10 @@ GLIBC_2.29 mtx_timedlock F
GLIBC_2.29 mtx_trylock F
GLIBC_2.29 mtx_unlock F
GLIBC_2.29 nanosleep F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.29 open F
GLIBC_2.29 open64 F
GLIBC_2.29 pause F
diff --git a/sysdeps/unix/sysv/linux/hppa/libpthread.abilist b/sysdeps/unix/sysv/linux/hppa/libpthread.abilist
index bcba07f..a80475f 100644
--- a/sysdeps/unix/sysv/linux/hppa/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/hppa/libpthread.abilist
@@ -219,6 +219,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/i386/libpthread.abilist b/sysdeps/unix/sysv/linux/i386/libpthread.abilist
index bece86d..40ac05a 100644
--- a/sysdeps/unix/sysv/linux/i386/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/i386/libpthread.abilist
@@ -227,6 +227,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/ia64/libpthread.abilist b/sysdeps/unix/sysv/linux/ia64/libpthread.abilist
index ccc9449..5b190f6 100644
--- a/sysdeps/unix/sysv/linux/ia64/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/ia64/libpthread.abilist
@@ -219,6 +219,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/m68k/coldfire/libpthread.abilist b/sysdeps/unix/sysv/linux/m68k/coldfire/libpthread.abilist
index af82a4c..45a5c5a 100644
--- a/sysdeps/unix/sysv/linux/m68k/coldfire/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/coldfire/libpthread.abilist
@@ -27,6 +27,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.4 _IO_flockfile F
GLIBC_2.4 _IO_ftrylockfile F
GLIBC_2.4 _IO_funlockfile F
diff --git a/sysdeps/unix/sysv/linux/m68k/m680x0/libpthread.abilist b/sysdeps/unix/sysv/linux/m68k/m680x0/libpthread.abilist
index bece86d..40ac05a 100644
--- a/sysdeps/unix/sysv/linux/m68k/m680x0/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/m68k/m680x0/libpthread.abilist
@@ -227,6 +227,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/microblaze/libpthread.abilist b/sysdeps/unix/sysv/linux/microblaze/libpthread.abilist
index 5067375..e6539bf 100644
--- a/sysdeps/unix/sysv/linux/microblaze/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/microblaze/libpthread.abilist
@@ -243,3 +243,7 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
diff --git a/sysdeps/unix/sysv/linux/mips/mips32/libpthread.abilist b/sysdeps/unix/sysv/linux/mips/mips32/libpthread.abilist
index 0214496..76edcb8 100644
--- a/sysdeps/unix/sysv/linux/mips/mips32/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips32/libpthread.abilist
@@ -227,6 +227,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/mips/mips64/libpthread.abilist b/sysdeps/unix/sysv/linux/mips/mips64/libpthread.abilist
index 0214496..76edcb8 100644
--- a/sysdeps/unix/sysv/linux/mips/mips64/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/mips/mips64/libpthread.abilist
@@ -227,6 +227,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/nios2/libpthread.abilist b/sysdeps/unix/sysv/linux/nios2/libpthread.abilist
index 78cac2a..3141d08 100644
--- a/sysdeps/unix/sysv/linux/nios2/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/nios2/libpthread.abilist
@@ -241,3 +241,7 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
diff --git a/sysdeps/unix/sysv/linux/numa-spinlock-private.h b/sysdeps/unix/sysv/linux/numa-spinlock-private.h
new file mode 100644
index 0000000..0f7a3be
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/numa-spinlock-private.h
@@ -0,0 +1,38 @@
+/* Internal definitions and declarations for NUMA spinlock.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include "numa-spinlock.h"
+
+/* The global NUMA spinlock. */
+struct numa_spinlock
+{
+ /* List of threads who owns the global NUMA spinlock. */
+ struct numa_spinlock_info *owner;
+ /* The maximium NUMA node number. */
+ unsigned int max_node;
+ /* Non-zero for single node system. */
+ unsigned int single_node;
+ /* The maximium CPU number. Used only when NUMA is disabled. */
+ unsigned int max_cpu;
+ /* Array of physical_package_id of each core if it isn't NULL. Used
+ only when NUMA is disabled.*/
+ unsigned int *physical_package_id_p;
+ /* Arrays of lists of threads who are spinning for the local NUMA lock
+ on NUMA nodes indexed by NUMA node number. */
+ struct numa_spinlock_info *lists[];
+};
diff --git a/sysdeps/unix/sysv/linux/numa-spinlock.c b/sysdeps/unix/sysv/linux/numa-spinlock.c
new file mode 100644
index 0000000..a141e7d
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/numa-spinlock.c
@@ -0,0 +1,327 @@
+/* NUMA spinlock
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+#include
+#include
+#include
+#ifndef HAVE_GETCPU
+#include
+#include
+#endif
+#include
+#include
+#include "numa-spinlock-private.h"
+
+#if !defined HAVE_GETCPU && defined _LIBC
+# define HAVE_GETCPU
+#endif
+
+/* On multi-socket systems, memory is shared across the entire system.
+ Data access to the local socket is much faster than the remote socket
+ and data access to the local core is faster than sibling cores on the
+ same socket. For serialized workloads with conventional spinlock,
+ when there is high spinlock contention between threads, lock ping-pong
+ among sockets becomes the bottleneck and threads spend majority of
+ their time in spinlock overhead.
+
+ On multi-socket systems, the keys to our NUMA spinlock performance
+ are to minimize cross-socket traffic as well as localize the serialized
+ workload to one core for execution. The basic principles of NUMA
+ spinlock are mainly consisted of following approaches, which reduce
+ data movement and accelerate critical section, eventually give us
+ significant performance improvement.
+
+ 1. MCS spinlock
+ MCS spinlock help us to reduce the useless lock movement in the
+ spinning state. This paper provides a good description for this
+ kind of lock:
+
+
+ 2. Critical Section Integration (CSI)
+ Essentially spinlock is similar to that one core complete critical
+ sections one by one. So when contention happen, the serialized works
+ are sent to the core who is the lock owner and responsible to execute
+ them, that can save much time and power, because all shared data are
+ located in private cache of the lock owner.
+
+ We implemented this mechanism based on queued spinlock in kernel, that
+ speeds up critical section, and reduces the probability of contention.
+ The paper provides a good description for this kind of lock:
+
+
+ 3. NUMA Aware Spinlock (NAS)
+ Currently multi-socket systems give us better performance per watt,
+ however that also involves more complex synchronization requirement,
+ because off-chip data movement is much slower. We use distributed
+ synchronization mechanism to decrease Lock cache line to and from
+ different nodes. The paper provides a good description for this kind
+ of lock:
+
+
+ 4. Yield Schedule
+ When threads are applying for Critical Section Integration(CSI) with
+ known contention, they will delegate work to the thread who is the
+ lock owner, and wait for work to be completed. The resources which
+ they are using should be transferred to other threads. In order to
+ accelerate the scenario, we introduce yield_sched function during
+ spinning stage.
+
+ 5. Optimization when NUMA is ON or OFF.
+ Although programs can access memory with lower latency when NUMA is
+ enabled, some programs may need more memory bandwidth for computation
+ with NUMA disabled. We also optimize multi-socket systems with NUMA
+ disabled.
+
+ NUMA spinlock flow chart (assuming there are 2 CPU nodes):
+
+ 1. Threads from node_0/node_1 acquire local lock for node_0/1
+ respectively. If the thread succeeds in acquiring local lock, it
+ goes to step 2, otherwise pushes critical function into current
+ local work queue, and enters into spinning stage with MCS mode.
+
+ 2. Threads from node_0/node_1 acquire the global lock. If it succeeds
+ in acquiring the global lock as the lock owner, it goes to step 3,
+ otherwise waits until the lock owner thread releases the global lock.
+
+ 3. The lock owner thread from node_0/1 enters into critical section,
+ cleans up work queue by performing all local critical functions
+ pushed at step 1 with CSI on behalf of other threads and informs
+ those spinning threads that their works have been done. It then
+ releases the local lock.
+
+ 4. The lock owner thread frees global lock. If another thread is
+ waiting at step 2, the lock owner thread passes the global lock to
+ the waiting thread and returns. The new lock owner thread enters
+ into step 3. If no threads are waiting, the lock owner thread
+ releases the global lock and returns. The whole critical section
+ process is completed.
+
+ Steps 1 and 2 mitigate global lock contention. Only one thread
+ from different nodes will compete for the global lock in step 2.
+ Step 3 reduces the global lock & shared data movement because they
+ are located in the same node as well as the same core. Our data
+ shows that Critical Section Integration (CSI) improves data locality
+ and NUMA-aware spinlock (NAS) helps CSI balance the workload.
+
+ NUMA spinlock can greatly speed up critical section on multi-socket
+ systems. It should improve spinlock performance on all multi-socket
+ systems.
+
+ NOTE: LiTL , is an open-source
+ project that provides implementations of dozens of various locks,
+ including several state-of-the-art NUMA-aware spinlocks. Among them
+
+ 1. Hierarchical MCS (HMCS) spinlock. Milind Chabbi, Michael Fagan,
+ and John Mellor-Crummey. High Performance Locks for Multi-level NUMA
+ Systems. In Proceedings of the ACM SIGPLAN Symposium on Principles
+ and Practice of Parallel Programming (PPoPP), pages 215–226, 2015.
+
+ 2. Cohort-MCS (C-MCS) spinlock. Dave Dice, Virendra J. Marathe, and
+ Nir Shavit. Lock Cohorting: A General Technique for Designing NUMA
+ Locks. ACM Trans. Parallel Comput., 1(2):13:1–13:42, 2015.
+ */
+
+/* Get the next thread pointed to by *NEXT_P. NB: We must use a while
+ spin loop to load NEXT_P since there is a small window before *NEXT_P
+ is updated. */
+
+static inline struct numa_spinlock_info *
+get_numa_spinlock_info_next (struct numa_spinlock_info **next_p)
+{
+ struct numa_spinlock_info *next;
+ while (!(next = atomic_load_relaxed (next_p)))
+ atomic_spin_nop ();
+ return next;
+}
+
+/* While holding the global NUMA spinlock, run the workload of the
+ thread pointed to by SELF first, then run the workload for each
+ thread on the thread list pointed to by HEAD_P and wake up the
+ thread so that all workloads run on a single processor. */
+
+static inline void
+run_numa_spinlock (struct numa_spinlock_info *self,
+ struct numa_spinlock_info **head_p)
+{
+ struct numa_spinlock_info *next, *current;
+
+ /* Run the SELF's workload. */
+ self->result = self->workload (self->argument);
+
+ /* Process workloads for the rest of threads on the thread list.
+ NB: The thread list may be prepended by other threads at the
+ same time. */
+
+retry:
+ /* If SELF is the first thread of the thread list pointed to by
+ HEAD_P, clear the thread list. */
+ current = atomic_compare_and_exchange_val_acq (head_p, NULL, self);
+ if (current == self)
+ {
+ /* Since SELF is the only thread on the list, clear SELF's pending
+ field and return. */
+ atomic_store_release (¤t->pending, 0);
+ return;
+ }
+
+ /* CURRENT will have the previous first thread of the thread list
+ pointed to by HEAD_P and *HEAD_P will point to SELF. */
+ current = atomic_exchange_acquire (head_p, self);
+
+ /* NB: No need to check if CURRENT == SELF here since SELF can never
+ be CURRENT. */
+
+repeat:
+ /* Get the next thread. */
+ next = get_numa_spinlock_info_next (¤t->next);
+
+ /* Run the CURRENT's workload and clear CURRENT's pending field. */
+ current->result = current->workload (current->argument);
+ current->pending = 0;
+
+ /* Process the workload for each thread from CURRENT to SELF on the
+ thread list. Don't pass beyond SELF since SELF is the last thread
+ on the list. */
+ if (next == self)
+ goto retry;
+ current = next;
+ goto repeat;
+}
+
+/* Apply for the NUMA spinlock with the NUMA spinlock info data pointed
+ to by SELF. */
+
+void
+numa_spinlock_apply (struct numa_spinlock_info *self)
+{
+ struct numa_spinlock *lock = self->lock;
+ struct numa_spinlock_info *first, *next;
+ struct numa_spinlock_info **head_p;
+
+ self->next = NULL;
+ /* We want the global NUMA spinlock. */
+ self->pending = 1;
+ /* Select the local NUMA spinlock list by the NUMA node number. */
+ head_p = &lock->lists[self->node];
+ /* FIRST will have the previous first thread of the local NUMA spinlock
+ list and *HEAD_P will point to SELF. */
+ first = atomic_exchange_acquire (head_p, self);
+ if (first)
+ {
+ /* SELF has been prepended to the thread list pointed to by
+ HEAD_P. NB: There is a small window between updating
+ *HEAD_P and self->next. */
+ atomic_store_release (&self->next, first);
+ /* Let other threads run first since another thread will run our
+ workload for us. */
+ sched_yield ();
+ /* Spin until our PENDING is cleared. */
+ while (atomic_load_relaxed (&self->pending))
+ atomic_spin_nop ();
+ return;
+ }
+
+ /* NB: Now SELF must be the only thread on the thread list pointed
+ to by HEAD_P. Since thread is always prepended to HEAD_P, we
+ can use *HEAD_P == SELF to check if SELF is the only thread on
+ the thread list. */
+
+ if (__glibc_unlikely (lock->single_node))
+ {
+ /* If there is only one node, there is no need for the global
+ NUMA spinlock. */
+ run_numa_spinlock (self, head_p);
+ return;
+ }
+
+ /* FIRST will have the previous first thread of the local NUMA spinlock
+ list of threads which holds the global NUMA spinlock, which will
+ point to SELF. */
+ first = atomic_exchange_acquire (&lock->owner, self);
+ if (first)
+ {
+ /* SELF has been prepended to the thread list pointed to by
+ lock->owner. NB: There is a small window between updating
+ *HEAD_P and first->next. */
+ atomic_store_release (&first->next, self);
+ /* Spin until the list of threads which holds the global NUMA
+ spinlock clears our PENDING. */
+ while (atomic_load_relaxed (&self->pending))
+ atomic_spin_nop ();
+ }
+
+ /* We get the global NUMA spinlock now. Run our workload. */
+ run_numa_spinlock (self, head_p);
+
+ /* SELF is the only thread on the list if SELF is the first thread
+ of the thread list pointed to by lock->owner. In this case, we
+ simply return. */
+ if (!atomic_compare_and_exchange_bool_acq (&lock->owner, NULL, self))
+ return;
+
+ /* Wake up the next thread. */
+ next = get_numa_spinlock_info_next (&self->next);
+ atomic_store_release (&next->pending, 0);
+}
+
+/* Initialize the NUMA spinlock info data pointed to by INFO from a
+ pointer to the NUMA spinlock, LOCK. */
+
+int
+numa_spinlock_init (struct numa_spinlock *lock,
+ struct numa_spinlock_info *info)
+{
+ memset (info, 0, sizeof (*info));
+ info->lock = lock;
+ /* For single node system, use 0 as the NUMA node number. */
+ if (lock->single_node)
+ return 0;
+ /* NB: Use the NUMA node number from getcpu to select the local NUMA
+ spinlock list. */
+ unsigned int cpu;
+ unsigned int node;
+#ifdef HAVE_GETCPU
+ int err_ret = getcpu (&cpu, &node);
+#else
+ int err_ret = syscall (SYS_getcpu, &cpu, &node, NULL);
+#endif
+ if (err_ret)
+ return err_ret;
+ if (lock->physical_package_id_p)
+ {
+ /* Can it ever happen? */
+ if (cpu > lock->max_cpu)
+ cpu = lock->max_cpu;
+ /* NB: If NUMA is disabled, use physical_package_id. */
+ node = lock->physical_package_id_p[cpu];
+ }
+ /* Can it ever happen? */
+ if (node > lock->max_node)
+ node = lock->max_node;
+ info->node = node;
+ return err_ret;
+}
+
+void
+numa_spinlock_free (struct numa_spinlock *lock)
+{
+ if (lock->physical_package_id_p)
+ free (lock->physical_package_id_p);
+ free (lock);
+}
diff --git a/sysdeps/unix/sysv/linux/numa-spinlock.h b/sysdeps/unix/sysv/linux/numa-spinlock.h
new file mode 100644
index 0000000..b17bda5
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/numa-spinlock.h
@@ -0,0 +1,64 @@
+/* Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef _NUMA_SPINLOCK_H
+#define _NUMA_SPINLOCK_H
+
+#include
+
+__BEGIN_DECLS
+
+/* The NUMA spinlock. */
+struct numa_spinlock;
+
+/* The NUMA spinlock information for each thread. */
+struct numa_spinlock_info
+{
+ /* The workload function of this thread. */
+ void *(*workload) (void *);
+ /* The argument pointer passed to the workload function. */
+ void *argument;
+ /* The return value of the workload function. */
+ void *result;
+ /* The pointer to the NUMA spinlock. */
+ struct numa_spinlock *lock;
+ /* The next thread on the local NUMA spinlock thread list. */
+ struct numa_spinlock_info *next;
+ /* The NUMA node number. */
+ unsigned int node;
+ /* Non-zero to indicate that the thread wants the NUMA spinlock. */
+ int pending;
+ /* Reserved for future use. */
+ void *__reserved[4];
+};
+
+/* Return a pointer to a newly allocated NUMA spinlock. */
+extern struct numa_spinlock *numa_spinlock_alloc (void);
+
+/* Free the memory space of the NUMA spinlock. */
+extern void numa_spinlock_free (struct numa_spinlock *);
+
+/* Initialize the NUMA spinlock information block. */
+extern int numa_spinlock_init (struct numa_spinlock *,
+ struct numa_spinlock_info *);
+
+/* Apply for spinlock with a NUMA spinlock information block. */
+extern void numa_spinlock_apply (struct numa_spinlock_info *);
+
+__END_DECLS
+
+#endif /* numa-spinlock.h */
diff --git a/sysdeps/unix/sysv/linux/numa_spinlock_alloc.c b/sysdeps/unix/sysv/linux/numa_spinlock_alloc.c
new file mode 100644
index 0000000..8ff4e1a
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/numa_spinlock_alloc.c
@@ -0,0 +1,304 @@
+/* Initialization of NUMA spinlock.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#ifdef _LIBC
+# include
+#else
+# include
+# include
+# include
+# define __open_nocancel open
+# define __close_nocancel_nostatus close
+# define __read_nocancel read
+#endif
+
+#include "numa-spinlock-private.h"
+
+static char *
+next_line (int fd, char *const buffer, char **cp, char **re,
+ char *const buffer_end)
+{
+ char *res = *cp;
+ char *nl = memchr (*cp, '\n', *re - *cp);
+ if (nl == NULL)
+ {
+ if (*cp != buffer)
+ {
+ if (*re == buffer_end)
+ {
+ memmove (buffer, *cp, *re - *cp);
+ *re = buffer + (*re - *cp);
+ *cp = buffer;
+
+ ssize_t n = __read_nocancel (fd, *re, buffer_end - *re);
+ if (n < 0)
+ return NULL;
+
+ *re += n;
+
+ nl = memchr (*cp, '\n', *re - *cp);
+ while (nl == NULL && *re == buffer_end)
+ {
+ /* Truncate too long lines. */
+ *re = buffer + 3 * (buffer_end - buffer) / 4;
+ n = __read_nocancel (fd, *re, buffer_end - *re);
+ if (n < 0)
+ return NULL;
+
+ nl = memchr (*re, '\n', n);
+ **re = '\n';
+ *re += n;
+ }
+ }
+ else
+ nl = memchr (*cp, '\n', *re - *cp);
+
+ res = *cp;
+ }
+
+ if (nl == NULL)
+ nl = *re - 1;
+ }
+
+ *cp = nl + 1;
+ assert (*cp <= *re);
+
+ return res == *re ? NULL : res;
+}
+
+static int
+select_cpu (const struct dirent *d)
+{
+ /* Return 1 for "cpuXXX" where XXX are digits. */
+ if (strncmp (d->d_name, "cpu", sizeof ("cpu") - 1) == 0)
+ {
+ const char *p = d->d_name + 3;
+
+ if (*p == '\0')
+ return 0;
+
+ do
+ {
+ if (!isdigit (*p))
+ return 0;
+ p++;
+ }
+ while (*p != '\0');
+
+ return 1;
+ }
+ return 0;
+}
+
+/* Allocate a NUMA spinlock and return a pointer to it. Caller should
+ call numa_spinlock_free on the NUMA spinlock pointer to free the
+ memory when it is no longer needed. */
+
+struct numa_spinlock *
+numa_spinlock_alloc (void)
+{
+ const size_t buffer_size = 1024;
+ char buffer[buffer_size];
+ char *buffer_end = buffer + buffer_size;
+ char *cp = buffer_end;
+ char *re = buffer_end;
+
+ const int flags = O_RDONLY | O_CLOEXEC;
+ int fd = __open_nocancel ("/sys/devices/system/node/online", flags);
+ char *l;
+ unsigned int max_node = 0;
+ unsigned int node_count = 0;
+ if (fd != -1)
+ {
+ l = next_line (fd, buffer, &cp, &re, buffer_end);
+ if (l != NULL)
+ do
+ {
+ char *endp;
+ unsigned long int n = strtoul (l, &endp, 10);
+ if (l == endp)
+ {
+ node_count = 1;
+ break;
+ }
+
+ unsigned long int m = n;
+ if (*endp == '-')
+ {
+ l = endp + 1;
+ m = strtoul (l, &endp, 10);
+ if (l == endp)
+ {
+ node_count = 1;
+ break;
+ }
+ }
+
+ node_count += m - n + 1;
+
+ if (m >= max_node)
+ max_node = m;
+
+ l = endp;
+ while (l < re && isspace (*l))
+ ++l;
+ }
+ while (l < re);
+
+ __close_nocancel_nostatus (fd);
+ }
+
+ /* NB: Some NUMA nodes may not be available, if the number of NUMA
+ nodes is 1, set the maximium NUMA node number to 0. */
+ if (node_count == 1)
+ max_node = 0;
+
+ unsigned int max_cpu = 0;
+ unsigned int *physical_package_id_p = NULL;
+
+ if (max_node == 0)
+ {
+ /* There is at least 1 node. */
+ node_count = 1;
+
+ /* If NUMA is disabled, use physical_package_id instead. */
+ struct dirent **cpu_list;
+ int nprocs = scandir ("/sys/devices/system/cpu", &cpu_list,
+ select_cpu, NULL);
+ if (nprocs > 0)
+ {
+ int i;
+ unsigned int *cpu_id_p = NULL;
+
+ /* Find the maximum CPU number. */
+ if (posix_memalign ((void **) &cpu_id_p,
+ __alignof__ (void *),
+ nprocs * sizeof (unsigned int)) == 0)
+ {
+ for (i = 0; i < nprocs; i++)
+ {
+ unsigned int cpu_id
+ = strtoul (cpu_list[i]->d_name + 3, NULL, 10);
+ cpu_id_p[i] = cpu_id;
+ if (cpu_id > max_cpu)
+ max_cpu = cpu_id;
+ }
+
+ if (posix_memalign ((void **) &physical_package_id_p,
+ __alignof__ (void *),
+ ((max_cpu + 1)
+ * sizeof (unsigned int))) == 0)
+ {
+ memset (physical_package_id_p, 0,
+ ((max_cpu + 1) * sizeof (unsigned int)));
+
+ max_node = UINT_MAX;
+
+ /* Get physical_package_id. */
+ char path[(sizeof ("/sys/devices/system/cpu")
+ + 3 * sizeof (unsigned long int)
+ + sizeof ("/topology/physical_package_id"))];
+ for (i = 0; i < nprocs; i++)
+ {
+ struct dirent *d = cpu_list[i];
+ if (snprintf (path, sizeof (path),
+ "/sys/devices/system/cpu/%s/topology/physical_package_id",
+ d->d_name) > 0)
+ {
+ fd = __open_nocancel (path, flags);
+ if (fd != -1)
+ {
+ if (__read_nocancel (fd, buffer,
+ buffer_size) > 0)
+ {
+ char *endp;
+ unsigned long int package_id
+ = strtoul (buffer, &endp, 10);
+ if (package_id != ULONG_MAX
+ && *buffer != '\0'
+ && (*endp == '\0' || *endp == '\n'))
+ {
+ physical_package_id_p[cpu_id_p[i]]
+ = package_id;
+ if (max_node == UINT_MAX)
+ {
+ /* This is the first node. */
+ max_node = package_id;
+ }
+ else if (package_id != max_node)
+ {
+ /* NB: We only need to know if
+ NODE_COUNT > 1. */
+ node_count = 2;
+ if (package_id > max_node)
+ max_node = package_id;
+ }
+ }
+ }
+ __close_nocancel_nostatus (fd);
+ }
+ }
+
+ free (d);
+ }
+ }
+
+ free (cpu_id_p);
+ }
+ else
+ {
+ for (i = 0; i < nprocs; i++)
+ free (cpu_list[i]);
+ }
+
+ free (cpu_list);
+ }
+ }
+
+ if (physical_package_id_p != NULL && node_count == 1)
+ {
+ /* There is only one node. No need for physical_package_id_p. */
+ free (physical_package_id_p);
+ physical_package_id_p = NULL;
+ max_cpu = 0;
+ }
+
+ /* Allocate an array of struct numa_spinlock_info pointers to hold info
+ for all NUMA nodes with NUMA node number from getcpu () as index. */
+ size_t size = (sizeof (struct numa_spinlock)
+ + ((max_node + 1)
+ * sizeof (struct numa_spinlock_info *)));
+ struct numa_spinlock *lock;
+ if (posix_memalign ((void **) &lock,
+ __alignof__ (struct numa_spinlock_info *), size))
+ return NULL;
+ memset (lock, 0, size);
+
+ lock->max_node = max_node;
+ lock->single_node = node_count == 1;
+ lock->max_cpu = max_cpu;
+ lock->physical_package_id_p = physical_package_id_p;
+
+ return lock;
+}
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc32/libpthread.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc32/libpthread.abilist
index 09e8447..dba7df6 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc32/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc32/libpthread.abilist
@@ -227,6 +227,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libpthread.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libpthread.abilist
index 8300958..a763c0a 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/be/libpthread.abilist
@@ -27,6 +27,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3 _IO_flockfile F
GLIBC_2.3 _IO_ftrylockfile F
GLIBC_2.3 _IO_funlockfile F
diff --git a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libpthread.abilist b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libpthread.abilist
index 9a9e4ce..eb54a83 100644
--- a/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/powerpc/powerpc64/le/libpthread.abilist
@@ -243,3 +243,7 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
diff --git a/sysdeps/unix/sysv/linux/riscv/rv64/libpthread.abilist b/sysdeps/unix/sysv/linux/riscv/rv64/libpthread.abilist
index c370fda..366fcac 100644
--- a/sysdeps/unix/sysv/linux/riscv/rv64/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/riscv/rv64/libpthread.abilist
@@ -235,3 +235,7 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/libpthread.abilist b/sysdeps/unix/sysv/linux/s390/s390-32/libpthread.abilist
index d05468f..786d8e1 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-32/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-32/libpthread.abilist
@@ -229,6 +229,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/libpthread.abilist b/sysdeps/unix/sysv/linux/s390/s390-64/libpthread.abilist
index e8161aa..dd7c52f 100644
--- a/sysdeps/unix/sysv/linux/s390/s390-64/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/s390/s390-64/libpthread.abilist
@@ -221,6 +221,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/sh/libpthread.abilist b/sysdeps/unix/sysv/linux/sh/libpthread.abilist
index bcba07f..a80475f 100644
--- a/sysdeps/unix/sysv/linux/sh/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/sh/libpthread.abilist
@@ -219,6 +219,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc32/libpthread.abilist b/sysdeps/unix/sysv/linux/sparc/sparc32/libpthread.abilist
index b413007..dd08796 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc32/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc32/libpthread.abilist
@@ -227,6 +227,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/sparc/sparc64/libpthread.abilist b/sysdeps/unix/sysv/linux/sparc/sparc64/libpthread.abilist
index ccc9449..5b190f6 100644
--- a/sysdeps/unix/sysv/linux/sparc/sparc64/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/sparc/sparc64/libpthread.abilist
@@ -219,6 +219,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/x86/Makefile b/sysdeps/unix/sysv/linux/x86/Makefile
index 02ca36c..29d41ad 100644
--- a/sysdeps/unix/sysv/linux/x86/Makefile
+++ b/sysdeps/unix/sysv/linux/x86/Makefile
@@ -14,6 +14,7 @@ endif
ifeq ($(subdir),nptl)
libpthread-sysdep_routines += elision-lock elision-unlock elision-timed \
elision-trylock
+xtests += tst-variable-overhead tst-numa-variable-overhead
CFLAGS-elision-lock.c += -mrtm
CFLAGS-elision-unlock.c += -mrtm
CFLAGS-elision-timed.c += -mrtm
diff --git a/sysdeps/unix/sysv/linux/x86/tst-numa-variable-overhead.c b/sysdeps/unix/sysv/linux/x86/tst-numa-variable-overhead.c
new file mode 100644
index 0000000..7cb8542
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/tst-numa-variable-overhead.c
@@ -0,0 +1,53 @@
+/* Test case for NUMA spinlock overhead.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+#include "numa-spinlock.h"
+
+struct numa_spinlock *lock;
+
+struct work_todo_argument
+{
+ unsigned long *v1;
+ unsigned long *v2;
+ unsigned long *v3;
+ unsigned long *v4;
+};
+
+static void *
+work_todo (void *v)
+{
+ struct work_todo_argument *p = v;
+ unsigned long ret;
+ *p->v1 = *p->v1 + 1;
+ *p->v2 = *p->v2 + 1;
+ ret = __sync_val_compare_and_swap (p->v4, 0, 1);
+ *p->v3 = *p->v3 + ret;
+ return (void *) 2;
+}
+
+static inline void
+do_work (struct numa_spinlock_info *lock_info)
+{
+ numa_spinlock_apply (lock_info);
+}
+
+#define USE_NUMA_SPINLOCK
+#include "tst-variable-overhead-skeleton.c"
diff --git a/sysdeps/unix/sysv/linux/x86/tst-variable-overhead-skeleton.c b/sysdeps/unix/sysv/linux/x86/tst-variable-overhead-skeleton.c
new file mode 100644
index 0000000..4b83dfb
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/tst-variable-overhead-skeleton.c
@@ -0,0 +1,384 @@
+/* Test case skeleton for spinlock overhead.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+/* Check spinlock overhead with large number threads. Critical region is
+ very smmall. Critical region + spinlock overhead aren't noticeable
+ when number of threads is small. When thread number increases,
+ spinlock overhead become the bottleneck. It shows up in wall time
+ of thread execution. */
+
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#ifdef MODULE_NAME
+# include
+# include
+#endif
+
+#ifndef USE_PTHREAD_ATTR_SETAFFINITY_NP
+# define USE_PTHREAD_ATTR_SETAFFINITY_NP 1
+#endif
+
+#define memory_barrier() __asm ("" ::: "memory")
+#define pause() __asm ("rep ; nop" ::: "memory")
+
+#define CACHELINE_SIZE 64
+#define CACHE_ALIGNED __attribute__((aligned(CACHELINE_SIZE)))
+
+#define constant_time 5
+unsigned long g_val CACHE_ALIGNED;
+unsigned long g_val2 CACHE_ALIGNED;
+unsigned long g_val3 CACHE_ALIGNED;
+unsigned long cmplock CACHE_ALIGNED;
+struct count
+{
+ unsigned long long total;
+ unsigned long long spinlock;
+ unsigned long long wall;
+} __attribute__((aligned(128)));
+
+struct count *gcount;
+
+/* The time consumed by one update is about 200 TSCs. */
+static int delay_time_unlocked = 400;
+
+struct ops
+{
+ void *(*test) (void *arg);
+ void (*print_thread) (void *res, int);
+} *ops;
+
+struct stats_result
+{
+ unsigned long num;
+};
+
+void *work_thread (void *arg);
+
+#define iterations (10000 * 5)
+
+static volatile int start_thread;
+
+/* Delay some fixed time */
+static void
+delay_tsc (unsigned n)
+{
+ unsigned long long start, current, diff;
+ unsigned int aux;
+ start = __builtin_ia32_rdtscp (&aux);
+ while (1)
+ {
+ current = __builtin_ia32_rdtscp (&aux);
+ diff = current - start;
+ if (diff < n)
+ pause ();
+ else
+ break;
+ }
+}
+
+static void
+wait_a_bit (int delay_time)
+{
+ if (delay_time > 0)
+ delay_tsc (delay_time);
+}
+
+#ifndef USE_NUMA_SPINLOCK
+static inline void
+work_todo (void)
+{
+ unsigned long ret;
+ g_val = g_val + 1;
+ g_val2 = g_val2 + 1;
+ ret = __sync_val_compare_and_swap (&cmplock, 0, 1);
+ g_val3 = g_val3 + 1 + ret;
+}
+#endif
+
+void *
+work_thread (void *arg)
+{
+ long i;
+ unsigned long pid = (unsigned long) arg;
+ struct stats_result *res;
+ unsigned long long start, end;
+ int err_ret = posix_memalign ((void **)&res, CACHELINE_SIZE,
+ roundup (sizeof (*res), CACHELINE_SIZE));
+ if (err_ret)
+ {
+ printf ("posix_memalign failure: %s\n", strerror (err_ret));
+ exit (err_ret);
+ }
+ long num = 0;
+
+#ifdef USE_NUMA_SPINLOCK
+ struct work_todo_argument work_todo_arg;
+ struct numa_spinlock_info lock_info;
+
+ if (numa_spinlock_init (lock, &lock_info))
+ {
+ printf ("numa_spinlock_init failure: %m\n");
+ exit (1);
+ }
+
+ work_todo_arg.v1 = &g_val;
+ work_todo_arg.v2 = &g_val2;
+ work_todo_arg.v3 = &g_val3;
+ work_todo_arg.v4 = &cmplock;
+ lock_info.argument = &work_todo_arg;
+ lock_info.workload = work_todo;
+#endif
+
+ while (!start_thread)
+ pause ();
+
+ unsigned int aux;
+ start = __builtin_ia32_rdtscp (&aux);
+ for (i = 0; i < iterations; i++)
+ {
+#ifdef USE_NUMA_SPINLOCK
+ do_work (&lock_info);
+#else
+ do_work ();
+#endif
+ wait_a_bit (delay_time_unlocked);
+ num++;
+ }
+ end = __builtin_ia32_rdtscp (&aux);
+ gcount[pid].total = end - start;
+ res->num = num;
+
+ return res;
+}
+
+void
+init_global_data(void)
+{
+ g_val = 0;
+ g_val2 = 0;
+ g_val3 = 0;
+ cmplock = 0;
+}
+
+void
+test_threads (int numthreads, int numprocs, unsigned long time)
+{
+ start_thread = 0;
+
+#ifdef USE_NUMA_SPINLOCK
+ lock = numa_spinlock_alloc ();
+#endif
+
+ memory_barrier ();
+
+ pthread_t thr[numthreads];
+ void *res[numthreads];
+ int i;
+
+ init_global_data ();
+ for (i = 0; i < numthreads; i++)
+ {
+ pthread_attr_t attr;
+ const pthread_attr_t *attrp = NULL;
+ if (USE_PTHREAD_ATTR_SETAFFINITY_NP)
+ {
+ attrp = &attr;
+ pthread_attr_init (&attr);
+ cpu_set_t set;
+ CPU_ZERO (&set);
+ int cpu = i % numprocs;
+ (void) CPU_SET (cpu, &set);
+ pthread_attr_setaffinity_np (&attr, sizeof (cpu_set_t), &set);
+ }
+ int err_ret = pthread_create (&thr[i], attrp, ops->test,
+ (void *)(uintptr_t) i);
+ if (err_ret != 0)
+ {
+ printf ("pthread_create failed: %d, %s\n",
+ i, strerror (i));
+ numthreads = i;
+ break;
+ }
+ }
+
+ memory_barrier ();
+ start_thread = 1;
+ memory_barrier ();
+ sched_yield ();
+
+ if (time)
+ {
+ struct timespec ts =
+ {
+ ts.tv_sec = time,
+ ts.tv_nsec = 0
+ };
+ clock_nanosleep (CLOCK_MONOTONIC, 0, &ts, NULL);
+ memory_barrier ();
+ }
+
+ for (i = 0; i < numthreads; i++)
+ {
+ if (pthread_join (thr[i], (void *) &res[i]) == 0)
+ free (res[i]);
+ else
+ printf ("pthread_join failure: %m\n");
+ }
+
+#ifdef USE_NUMA_SPINLOCK
+ numa_spinlock_free (lock);
+#endif
+}
+
+struct ops hashwork_ops =
+{
+ .test = work_thread,
+};
+
+struct ops *ops;
+
+static struct count
+total_cost (int numthreads, int numprocs)
+{
+ int i;
+ unsigned long long total = 0;
+ unsigned long long spinlock = 0;
+
+ memset (gcount, 0, sizeof(gcount[0]) * numthreads);
+
+ unsigned long long start, end, diff;
+ unsigned int aux;
+
+ start = __builtin_ia32_rdtscp (&aux);
+ test_threads (numthreads, numprocs, constant_time);
+ end = __builtin_ia32_rdtscp (&aux);
+ diff = end - start;
+
+ for (i = 0; i < numthreads; i++)
+ {
+ total += gcount[i].total;
+ spinlock += gcount[i].spinlock;
+ }
+
+ struct count cost = { total, spinlock, diff };
+ return cost;
+}
+
+#ifdef MODULE_NAME
+static int
+do_test (void)
+{
+ if (!CPU_FEATURE_USABLE (RDTSCP))
+ return EXIT_UNSUPPORTED;
+#else
+int
+main (void)
+{
+#endif
+ int numprocs = sysconf (_SC_NPROCESSORS_ONLN);
+
+ /* Oversubscribe CPU. */
+ int numthreads = 4 * numprocs;
+
+ ops = &hashwork_ops;
+
+ int err_ret = posix_memalign ((void **)&gcount, 4096,
+ sizeof(gcount[0]) * numthreads);
+ if (err_ret)
+ {
+ printf ("posix_memalign failure: %s\n", strerror (err_ret));
+ exit (err_ret);
+ }
+
+ struct count cost, cost1;
+ double overhead;
+ int i, last;
+ int last_increment = numprocs < 16 ? 16 : numprocs;
+ int numprocs_done = 0;
+ int numprocs_reset = 0;
+ cost1 = total_cost (1, numprocs);
+
+ printf ("Number of processors: %d, Single thread time %lld\n\n",
+ numprocs, cost1.total);
+
+ for (last = i = 2; i <= numthreads;)
+ {
+ last = i;
+ cost = total_cost (i, numprocs);
+ overhead = cost.total;
+ overhead /= i;
+ overhead /= cost1.total;
+ printf ("Number of threads: %4d, Total time %14lld, Overhead: %.2f\n",
+ i, cost.total, overhead);
+ if ((i * 2) < numprocs)
+ i = i * 2;
+ else if (numprocs_done)
+ {
+ if (numprocs_reset)
+ {
+ i = numprocs_reset;
+ numprocs_reset = 0;
+ }
+ else
+ {
+ if ((i * 2) < numthreads)
+ i = i * 2;
+ else
+ i = i + last_increment;
+ }
+ }
+ else
+ {
+ if (numprocs != 2 * i)
+ numprocs_reset = 2 * i;
+ i = numprocs;
+ numprocs_done = 1;
+ }
+ }
+
+ if (last != numthreads)
+ {
+ i = numthreads;
+ cost = total_cost (i, numprocs);
+ overhead = cost.total;
+ overhead /= i;
+ overhead /= cost1.total;
+ printf ("Number of threads: %4d, Total time %14lld, Overhead: %.2f\n",
+ i, cost.total, overhead);
+ }
+
+ free (gcount);
+ return 0;
+}
+
+#ifdef MODULE_NAME
+# define TIMEOUT 900
+# include
+#endif
diff --git a/sysdeps/unix/sysv/linux/x86/tst-variable-overhead.c b/sysdeps/unix/sysv/linux/x86/tst-variable-overhead.c
new file mode 100644
index 0000000..b3ce567
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86/tst-variable-overhead.c
@@ -0,0 +1,47 @@
+/* Test case for spinlock overhead.
+ Copyright (C) 2018 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+#include
+
+struct
+{
+ pthread_spinlock_t testlock;
+ char pad[64 - sizeof (pthread_spinlock_t)];
+} test __attribute__((aligned(64)));
+
+static void
+__attribute__((constructor))
+init_spin (void)
+{
+ pthread_spin_init (&test.testlock, 0);
+}
+
+static void work_todo (void);
+
+static inline void
+do_work (void)
+{
+ pthread_spin_lock(&test.testlock);
+ work_todo ();
+ pthread_spin_unlock(&test.testlock);
+}
+
+#include "tst-variable-overhead-skeleton.c"
diff --git a/sysdeps/unix/sysv/linux/x86_64/64/libpthread.abilist b/sysdeps/unix/sysv/linux/x86_64/64/libpthread.abilist
index 931c827..e90532e 100644
--- a/sysdeps/unix/sysv/linux/x86_64/64/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/64/libpthread.abilist
@@ -219,6 +219,10 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F
GLIBC_2.3.2 pthread_cond_broadcast F
GLIBC_2.3.2 pthread_cond_destroy F
GLIBC_2.3.2 pthread_cond_init F
diff --git a/sysdeps/unix/sysv/linux/x86_64/x32/libpthread.abilist b/sysdeps/unix/sysv/linux/x86_64/x32/libpthread.abilist
index c09c9b0..c74febb 100644
--- a/sysdeps/unix/sysv/linux/x86_64/x32/libpthread.abilist
+++ b/sysdeps/unix/sysv/linux/x86_64/x32/libpthread.abilist
@@ -243,3 +243,7 @@ GLIBC_2.28 tss_create F
GLIBC_2.28 tss_delete F
GLIBC_2.28 tss_get F
GLIBC_2.28 tss_set F
+GLIBC_2.29 numa_spinlock_alloc F
+GLIBC_2.29 numa_spinlock_apply F
+GLIBC_2.29 numa_spinlock_free F
+GLIBC_2.29 numa_spinlock_init F