nptl: Use out-of-line wake function in __libc_lock_unlock slow path

Message ID 87lf6i5g7m.fsf@oldenburg.str.redhat.com
State Committed
Commit 508ee037a30f1d4f6a1f8de2bd0d9866d4684625
Headers
Series nptl: Use out-of-line wake function in __libc_lock_unlock slow path |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit success Build for i686

Commit Message

Florian Weimer July 7, 2021, 2:52 p.m. UTC
  This slightly reduces code size, as can be seen below.
__libc_lock_unlock is usually used along with __libc_lock_lock in
the same function.  __libc_lock_lock already has an out-of-line
slow path, so this change should not introduce many additional
non-leaf functions.

This change also fixes a link failure in 32-bit Arm thumb mode
because commit 1f9c804fbd699104adefbce9e56d2c8aa711b6b9
("nptl: Use internal low-level lock type for !IS_IN (libc)")
introduced __libc_do_syscall calls outside of libc.

Before x86-64:

   text	   data	    bss	    dec	    hex	filename
1937748	  20456	  54896	2013100	 1eb7ac	libc.so.6
  25601	    856	  12768	  39225	   9939	nss/libnss_db.so.2
  40310	    952	  25144	  66406	  10366	nss/libnss_files.so.2

After x86-64:
   text	   data	    bss	    dec	    hex	filename
1935312	  20456	  54896	2010664	 1eae28	libc.so.6
  25559	    864	  12768	  39191	   9917	nss/libnss_db.so.2
  39764	    960	  25144	  65868	  1014c	nss/libnss_files.so.2

Before i686:

2110961	  11272	  39144	2161377	 20fae1	libc.so.6
  27243	    428	  12652	  40323	   9d83	nss/libnss_db.so.2
  43062	    476	  25028	  68566	  10bd6	nss/libnss_files.so.2

After i686:

2107347	  11272	  39144	2157763	 20ecc3	libc.so.6
  26929	    432	  12652	  40013	   9c4d	nss/libnss_db.so.2
  43132	    480	  25028	  68640	  10c20	nss/libnss_files.so.2

---
 nptl/Versions               |  1 +
 nptl/lowlevellock.c         | 14 ++++++++++++++
 sysdeps/nptl/lowlevellock.h | 26 ++++++++++++++++++--------
 3 files changed, 33 insertions(+), 8 deletions(-)
  

Comments

Adhemerval Zanella July 8, 2021, 6:17 p.m. UTC | #1
On 07/07/2021 11:52, Florian Weimer via Libc-alpha wrote:
> This slightly reduces code size, as can be seen below.
> __libc_lock_unlock is usually used along with __libc_lock_lock in
> the same function.  __libc_lock_lock already has an out-of-line
> slow path, so this change should not introduce many additional
> non-leaf functions.
> 
> This change also fixes a link failure in 32-bit Arm thumb mode
> because commit 1f9c804fbd699104adefbce9e56d2c8aa711b6b9
> ("nptl: Use internal low-level lock type for !IS_IN (libc)")
> introduced __libc_do_syscall calls outside of libc.
> 
> Before x86-64:
> 
>    text	   data	    bss	    dec	    hex	filename
> 1937748	  20456	  54896	2013100	 1eb7ac	libc.so.6
>   25601	    856	  12768	  39225	   9939	nss/libnss_db.so.2
>   40310	    952	  25144	  66406	  10366	nss/libnss_files.so.2
> 
> After x86-64:
>    text	   data	    bss	    dec	    hex	filename
> 1935312	  20456	  54896	2010664	 1eae28	libc.so.6
>   25559	    864	  12768	  39191	   9917	nss/libnss_db.so.2
>   39764	    960	  25144	  65868	  1014c	nss/libnss_files.so.2
> 
> Before i686:
> 
> 2110961	  11272	  39144	2161377	 20fae1	libc.so.6
>   27243	    428	  12652	  40323	   9d83	nss/libnss_db.so.2
>   43062	    476	  25028	  68566	  10bd6	nss/libnss_files.so.2
> 
> After i686:
> 
> 2107347	  11272	  39144	2157763	 20ecc3	libc.so.6
>   26929	    432	  12652	  40013	   9c4d	nss/libnss_db.so.2
>   43132	    480	  25028	  68640	  10c20	nss/libnss_files.so.2
> 

I check a build for some -march variants (armv5te, armv6, armv6t2, armv7-a,
armv7 -mthumb) and I saw no regression on armv7 and armv7 thumb.

The patch looks ok in general, only one question below.

> ---
>  nptl/Versions               |  1 +
>  nptl/lowlevellock.c         | 14 ++++++++++++++
>  sysdeps/nptl/lowlevellock.h | 26 ++++++++++++++++++--------
>  3 files changed, 33 insertions(+), 8 deletions(-)
> 
> diff --git a/nptl/Versions b/nptl/Versions
> index 2a75f013f2..3221de89d1 100644
> --- a/nptl/Versions
> +++ b/nptl/Versions
> @@ -380,6 +380,7 @@ libc {
>    }
>    GLIBC_PRIVATE {
>      __libc_alloca_cutoff;
> +    __lll_lock_wake_private;
>      __lll_lock_wait_private;
>      __nptl_create_event;
>      __nptl_death_event;
> diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c
> index 2d077d8694..4f88178964 100644
> --- a/nptl/lowlevellock.c
> +++ b/nptl/lowlevellock.c
> @@ -52,6 +52,20 @@ __lll_lock_wait (int *futex, int private)
>  }
>  libc_hidden_def (__lll_lock_wait)
>  
> +void
> +__lll_lock_wake_private (int *futex)
> +{
> +  lll_futex_wake (futex, 1, LLL_PRIVATE);
> +}
> +libc_hidden_def (__lll_lock_wake_private)
> +
> +void
> +__lll_lock_wake (int *futex, int private)
> +{
> +  lll_futex_wake (futex, 1, private);
> +}
> +libc_hidden_def (__lll_lock_wake)
> +
>  #if ENABLE_ELISION_SUPPORT
>  int __pthread_force_elision __attribute__ ((nocommon));
>  libc_hidden_data_def (__pthread_force_elision)

Do we really need two functions for this?  

> diff --git a/sysdeps/nptl/lowlevellock.h b/sysdeps/nptl/lowlevellock.h
> index be60c9ac28..4d95114ed3 100644
> --- a/sysdeps/nptl/lowlevellock.h
> +++ b/sysdeps/nptl/lowlevellock.h
> @@ -125,6 +125,11 @@ libc_hidden_proto (__lll_lock_wait)
>  #define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
>  
>  
> +extern void __lll_lock_wake_private (int *futex);
> +libc_hidden_proto (__lll_lock_wake_private)
> +extern void __lll_lock_wake (int *futex, int private);
> +libc_hidden_proto (__lll_lock_wake)
> +
>  /* This is an expression rather than a statement even though its value is
>     void, so that it can be used in a comma expression or as an expression
>     that's cast to void.  */

Ok.

> @@ -137,14 +142,19 @@ libc_hidden_proto (__lll_lock_wait)
>     acquires the lock and when there will be no further lock acquisitions;
>     thus, we must not access the lock after releasing it, or those accesses
>     could be concurrent with mutex destruction or reuse of the memory.  */
> -#define __lll_unlock(futex, private)                    \
> -  ((void)                                               \
> -   ({                                                   \
> -     int *__futex = (futex);                            \
> -     int __private = (private);                         \
> -     int __oldval = atomic_exchange_rel (__futex, 0);   \
> -     if (__glibc_unlikely (__oldval > 1))               \
> -       lll_futex_wake (__futex, 1, __private);          \
> +#define __lll_unlock(futex, private)					\
> +  ((void)								\
> +  ({									\
> +     int *__futex = (futex);						\
> +     int __private = (private);						\
> +     int __oldval = atomic_exchange_rel (__futex, 0);			\
> +     if (__glibc_unlikely (__oldval > 1))				\
> +       {								\
> +         if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \
> +           __lll_lock_wake_private (__futex);                           \
> +         else                                                           \
> +           __lll_lock_wake (__futex, __private);			\
> +       }								\
>     }))
>  #define lll_unlock(futex, private)	\
>    __lll_unlock (&(futex), private)
>
  
Florian Weimer July 8, 2021, 6:24 p.m. UTC | #2
* Adhemerval Zanella:

>> diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c
>> index 2d077d8694..4f88178964 100644
>> --- a/nptl/lowlevellock.c
>> +++ b/nptl/lowlevellock.c
>> @@ -52,6 +52,20 @@ __lll_lock_wait (int *futex, int private)
>>  }
>>  libc_hidden_def (__lll_lock_wait)
>>  
>> +void
>> +__lll_lock_wake_private (int *futex)
>> +{
>> +  lll_futex_wake (futex, 1, LLL_PRIVATE);
>> +}
>> +libc_hidden_def (__lll_lock_wake_private)
>> +
>> +void
>> +__lll_lock_wake (int *futex, int private)
>> +{
>> +  lll_futex_wake (futex, 1, private);
>> +}
>> +libc_hidden_def (__lll_lock_wake)
>> +
>>  #if ENABLE_ELISION_SUPPORT
>>  int __pthread_force_elision __attribute__ ((nocommon));
>>  libc_hidden_data_def (__pthread_force_elision)
>
> Do we really need two functions for this?

It mirrors the wait side, which has two functions as well.  We probably
only need __lll_lock_wake_private these days, but I didn't want to start
cleaning up the libc-lock implementation with its deep macro nesting.

Thanks,
Florian
  
Adhemerval Zanella July 8, 2021, 6:27 p.m. UTC | #3
On 08/07/2021 15:24, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>> diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c
>>> index 2d077d8694..4f88178964 100644
>>> --- a/nptl/lowlevellock.c
>>> +++ b/nptl/lowlevellock.c
>>> @@ -52,6 +52,20 @@ __lll_lock_wait (int *futex, int private)
>>>  }
>>>  libc_hidden_def (__lll_lock_wait)
>>>  
>>> +void
>>> +__lll_lock_wake_private (int *futex)
>>> +{
>>> +  lll_futex_wake (futex, 1, LLL_PRIVATE);
>>> +}
>>> +libc_hidden_def (__lll_lock_wake_private)
>>> +
>>> +void
>>> +__lll_lock_wake (int *futex, int private)
>>> +{
>>> +  lll_futex_wake (futex, 1, private);
>>> +}
>>> +libc_hidden_def (__lll_lock_wake)
>>> +
>>>  #if ENABLE_ELISION_SUPPORT
>>>  int __pthread_force_elision __attribute__ ((nocommon));
>>>  libc_hidden_data_def (__pthread_force_elision)
>>
>> Do we really need two functions for this?
> 
> It mirrors the wait side, which has two functions as well.  We probably
> only need __lll_lock_wake_private these days, but I didn't want to start
> cleaning up the libc-lock implementation with its deep macro nesting.

Fair enough.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
  

Patch

diff --git a/nptl/Versions b/nptl/Versions
index 2a75f013f2..3221de89d1 100644
--- a/nptl/Versions
+++ b/nptl/Versions
@@ -380,6 +380,7 @@  libc {
   }
   GLIBC_PRIVATE {
     __libc_alloca_cutoff;
+    __lll_lock_wake_private;
     __lll_lock_wait_private;
     __nptl_create_event;
     __nptl_death_event;
diff --git a/nptl/lowlevellock.c b/nptl/lowlevellock.c
index 2d077d8694..4f88178964 100644
--- a/nptl/lowlevellock.c
+++ b/nptl/lowlevellock.c
@@ -52,6 +52,20 @@  __lll_lock_wait (int *futex, int private)
 }
 libc_hidden_def (__lll_lock_wait)
 
+void
+__lll_lock_wake_private (int *futex)
+{
+  lll_futex_wake (futex, 1, LLL_PRIVATE);
+}
+libc_hidden_def (__lll_lock_wake_private)
+
+void
+__lll_lock_wake (int *futex, int private)
+{
+  lll_futex_wake (futex, 1, private);
+}
+libc_hidden_def (__lll_lock_wake)
+
 #if ENABLE_ELISION_SUPPORT
 int __pthread_force_elision __attribute__ ((nocommon));
 libc_hidden_data_def (__pthread_force_elision)
diff --git a/sysdeps/nptl/lowlevellock.h b/sysdeps/nptl/lowlevellock.h
index be60c9ac28..4d95114ed3 100644
--- a/sysdeps/nptl/lowlevellock.h
+++ b/sysdeps/nptl/lowlevellock.h
@@ -125,6 +125,11 @@  libc_hidden_proto (__lll_lock_wait)
 #define lll_cond_lock(futex, private) __lll_cond_lock (&(futex), private)
 
 
+extern void __lll_lock_wake_private (int *futex);
+libc_hidden_proto (__lll_lock_wake_private)
+extern void __lll_lock_wake (int *futex, int private);
+libc_hidden_proto (__lll_lock_wake)
+
 /* This is an expression rather than a statement even though its value is
    void, so that it can be used in a comma expression or as an expression
    that's cast to void.  */
@@ -137,14 +142,19 @@  libc_hidden_proto (__lll_lock_wait)
    acquires the lock and when there will be no further lock acquisitions;
    thus, we must not access the lock after releasing it, or those accesses
    could be concurrent with mutex destruction or reuse of the memory.  */
-#define __lll_unlock(futex, private)                    \
-  ((void)                                               \
-   ({                                                   \
-     int *__futex = (futex);                            \
-     int __private = (private);                         \
-     int __oldval = atomic_exchange_rel (__futex, 0);   \
-     if (__glibc_unlikely (__oldval > 1))               \
-       lll_futex_wake (__futex, 1, __private);          \
+#define __lll_unlock(futex, private)					\
+  ((void)								\
+  ({									\
+     int *__futex = (futex);						\
+     int __private = (private);						\
+     int __oldval = atomic_exchange_rel (__futex, 0);			\
+     if (__glibc_unlikely (__oldval > 1))				\
+       {								\
+         if (__builtin_constant_p (private) && (private) == LLL_PRIVATE) \
+           __lll_lock_wake_private (__futex);                           \
+         else                                                           \
+           __lll_lock_wake (__futex, __private);			\
+       }								\
    }))
 #define lll_unlock(futex, private)	\
   __lll_unlock (&(futex), private)