[v3,32/37] nptl: pthread_mutex_lock, pthread_mutex_unock single-threaded optimization
Commit Message
This is optimization is similar in spirit to the SINGLE_THREAD_P check
in the malloc implementation. Doing this in generic code allows us
to prioritize those cases which are likely to occur in single-threaded
programs (normal and recursive mutexes).
---
nptl/pthread_mutex_cond_lock.c | 1 +
nptl/pthread_mutex_lock.c | 25 ++++++++++++++++++++++---
nptl/pthread_mutex_unlock.c | 17 ++++++++++++++++-
3 files changed, 39 insertions(+), 4 deletions(-)
Comments
On 16/03/2021 14:31, Florian Weimer via Libc-alpha wrote:
> This is optimization is similar in spirit to the SINGLE_THREAD_P check
> in the malloc implementation. Doing this in generic code allows us
> to prioritize those cases which are likely to occur in single-threaded
> programs (normal and recursive mutexes).
At first I though about suggesting to move it on generic lowlevellock.h,
but it is used quite only in some specific places and once libpthread is
moved we can use libc_lock_lock (which uses pthread_mutex) instead.
The only issue with the approach of moving this optimization to
pthread_mutex_lock (instead of custom lock as lll_lock) is to use a
slight more memory due pthread_mutex_t object, but I think the sparse
usage within glibc should be ok.
It would be good if we disentangle and consolidate both lowlevellock.h
and libc-lock.h so we have only header and definitions.
LGTM, thanks.
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
> ---
> nptl/pthread_mutex_cond_lock.c | 1 +
> nptl/pthread_mutex_lock.c | 25 ++++++++++++++++++++++---
> nptl/pthread_mutex_unlock.c | 17 ++++++++++++++++-
> 3 files changed, 39 insertions(+), 4 deletions(-)
>
> diff --git a/nptl/pthread_mutex_cond_lock.c b/nptl/pthread_mutex_cond_lock.c
> index 2f0771302f..3386bd689b 100644
> --- a/nptl/pthread_mutex_cond_lock.c
> +++ b/nptl/pthread_mutex_cond_lock.c
> @@ -2,6 +2,7 @@
>
> #define LLL_MUTEX_LOCK(mutex) \
> lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
> +#define LLL_MUTEX_LOCK_OPTIMIZED(mutex) LLL_MUTEX_LOCK (mutex)
>
> /* Not actually elided so far. Needed? */
> #define LLL_MUTEX_LOCK_ELISION(mutex) \
> diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
> index f0de7b7fd6..8649a92ffb 100644
> --- a/nptl/pthread_mutex_lock.c
> +++ b/nptl/pthread_mutex_lock.c
> @@ -30,8 +30,27 @@
> /* Some of the following definitions differ when pthread_mutex_cond_lock.c
> includes this file. */
> #ifndef LLL_MUTEX_LOCK
> -# define LLL_MUTEX_LOCK(mutex) \
> +/* lll_lock with single-thread optimization. */
> +static inline void
> +lll_mutex_lock_optimized (pthread_mutex_t *mutex)
> +{
> + /* The single-threaded optimization is only valid for private
> + mutexes. For process-shared mutexes, the mutex could be in a
> + shared mapping, so synchronization with another process is needed
> + even without any threads. If the lock is already marked as
> + acquired, POSIX requires that pthread_mutex_lock deadlocks for
> + normal mutexes, so skip the optimization in that case as
> + well. */
> + int private = PTHREAD_MUTEX_PSHARED (mutex);
> + if (private == LLL_PRIVATE && SINGLE_THREAD_P && mutex->__data.__lock == 0)
> + mutex->__data.__lock = 1;
> + else
> + lll_lock (mutex->__data.__lock, private);
> +}
> +
> +# define LLL_MUTEX_LOCK(mutex) \
> lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
Ok.
> +# define LLL_MUTEX_LOCK_OPTIMIZED(mutex) lll_mutex_lock_optimized (mutex)
> # define LLL_MUTEX_TRYLOCK(mutex) \
> lll_trylock ((mutex)->__data.__lock)
> # define LLL_ROBUST_MUTEX_LOCK_MODIFIER 0
> @@ -64,7 +83,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex)
> FORCE_ELISION (mutex, goto elision);
> simple:
> /* Normal mutex. */
> - LLL_MUTEX_LOCK (mutex);
> + LLL_MUTEX_LOCK_OPTIMIZED (mutex);
> assert (mutex->__data.__owner == 0);
> }
> #if ENABLE_ELISION_SUPPORT
Ok.
> @@ -99,7 +118,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex)
> }
>
> /* We have to get the mutex. */
> - LLL_MUTEX_LOCK (mutex);
> + LLL_MUTEX_LOCK_OPTIMIZED (mutex);
>
> assert (mutex->__data.__owner == 0);
> mutex->__data.__count = 1;
Ok.
> diff --git a/nptl/pthread_mutex_unlock.c b/nptl/pthread_mutex_unlock.c
> index 3b5ccdacf9..655093ee9a 100644
> --- a/nptl/pthread_mutex_unlock.c
> +++ b/nptl/pthread_mutex_unlock.c
> @@ -28,6 +28,21 @@ static int
> __pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
> __attribute_noinline__;
>
> +/* lll_lock with single-thread optimization. */
> +static inline void
> +lll_mutex_unlock_optimized (pthread_mutex_t *mutex)
> +{
> + /* The single-threaded optimization is only valid for private
> + mutexes. For process-shared mutexes, the mutex could be in a
> + shared mapping, so synchronization with another process is needed
> + even without any threads. */
> + int private = PTHREAD_MUTEX_PSHARED (mutex);
> + if (private == LLL_PRIVATE && SINGLE_THREAD_P)
> + mutex->__data.__lock = 0;
> + else
> + lll_unlock (mutex->__data.__lock, private);
> +}
> +
> int
> attribute_hidden
> __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr)
> @@ -51,7 +66,7 @@ __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr)
> --mutex->__data.__nusers;
>
> /* Unlock. */
> - lll_unlock (mutex->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex));
> + lll_mutex_unlock_optimized (mutex);
>
> LIBC_PROBE (mutex_release, 1, mutex);
>
>
Ok.
@@ -2,6 +2,7 @@
#define LLL_MUTEX_LOCK(mutex) \
lll_cond_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
+#define LLL_MUTEX_LOCK_OPTIMIZED(mutex) LLL_MUTEX_LOCK (mutex)
/* Not actually elided so far. Needed? */
#define LLL_MUTEX_LOCK_ELISION(mutex) \
@@ -30,8 +30,27 @@
/* Some of the following definitions differ when pthread_mutex_cond_lock.c
includes this file. */
#ifndef LLL_MUTEX_LOCK
-# define LLL_MUTEX_LOCK(mutex) \
+/* lll_lock with single-thread optimization. */
+static inline void
+lll_mutex_lock_optimized (pthread_mutex_t *mutex)
+{
+ /* The single-threaded optimization is only valid for private
+ mutexes. For process-shared mutexes, the mutex could be in a
+ shared mapping, so synchronization with another process is needed
+ even without any threads. If the lock is already marked as
+ acquired, POSIX requires that pthread_mutex_lock deadlocks for
+ normal mutexes, so skip the optimization in that case as
+ well. */
+ int private = PTHREAD_MUTEX_PSHARED (mutex);
+ if (private == LLL_PRIVATE && SINGLE_THREAD_P && mutex->__data.__lock == 0)
+ mutex->__data.__lock = 1;
+ else
+ lll_lock (mutex->__data.__lock, private);
+}
+
+# define LLL_MUTEX_LOCK(mutex) \
lll_lock ((mutex)->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex))
+# define LLL_MUTEX_LOCK_OPTIMIZED(mutex) lll_mutex_lock_optimized (mutex)
# define LLL_MUTEX_TRYLOCK(mutex) \
lll_trylock ((mutex)->__data.__lock)
# define LLL_ROBUST_MUTEX_LOCK_MODIFIER 0
@@ -64,7 +83,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex)
FORCE_ELISION (mutex, goto elision);
simple:
/* Normal mutex. */
- LLL_MUTEX_LOCK (mutex);
+ LLL_MUTEX_LOCK_OPTIMIZED (mutex);
assert (mutex->__data.__owner == 0);
}
#if ENABLE_ELISION_SUPPORT
@@ -99,7 +118,7 @@ __pthread_mutex_lock (pthread_mutex_t *mutex)
}
/* We have to get the mutex. */
- LLL_MUTEX_LOCK (mutex);
+ LLL_MUTEX_LOCK_OPTIMIZED (mutex);
assert (mutex->__data.__owner == 0);
mutex->__data.__count = 1;
@@ -28,6 +28,21 @@ static int
__pthread_mutex_unlock_full (pthread_mutex_t *mutex, int decr)
__attribute_noinline__;
+/* lll_lock with single-thread optimization. */
+static inline void
+lll_mutex_unlock_optimized (pthread_mutex_t *mutex)
+{
+ /* The single-threaded optimization is only valid for private
+ mutexes. For process-shared mutexes, the mutex could be in a
+ shared mapping, so synchronization with another process is needed
+ even without any threads. */
+ int private = PTHREAD_MUTEX_PSHARED (mutex);
+ if (private == LLL_PRIVATE && SINGLE_THREAD_P)
+ mutex->__data.__lock = 0;
+ else
+ lll_unlock (mutex->__data.__lock, private);
+}
+
int
attribute_hidden
__pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr)
@@ -51,7 +66,7 @@ __pthread_mutex_unlock_usercnt (pthread_mutex_t *mutex, int decr)
--mutex->__data.__nusers;
/* Unlock. */
- lll_unlock (mutex->__data.__lock, PTHREAD_MUTEX_PSHARED (mutex));
+ lll_mutex_unlock_optimized (mutex);
LIBC_PROBE (mutex_release, 1, mutex);