diff mbox series

[v2,02/13] elf, nptl: Resolve recursive lock implementation early

Message ID 32df0df2b98f923c651203bd20e3df950a889d87.1620323953.git.fweimer@redhat.com
State Committed
Commit d6163dfd3831cf48b69f430f37b4c099059a9db5
Headers show
Series Linux: Move most stack management out of libpthread | expand

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Florian Weimer May 6, 2021, 6:09 p.m. UTC
If libpthread is included in libc, it is not necessary to delay
initialization of the lock/unlock function pointers until libpthread
is loaded.  This eliminates two unprotected function pointers
from _rtld_global and removes some initialization code from
libpthread.
---
v2: Rename dl-lock.c into dl-mutex.c and use a sysdeps override instead
    of a preprocessor conditional.

 elf/Makefile               |  3 ++-
 elf/dl-mutex.c             | 19 ++++++++++++++
 elf/rtld.c                 | 18 +++++++++++++
 nptl/nptl-init.c           |  9 -------
 sysdeps/generic/ldsodefs.h | 25 +++++++++++++++++-
 sysdeps/nptl/dl-mutex.c    | 53 ++++++++++++++++++++++++++++++++++++++
 sysdeps/nptl/libc-lockP.h  | 17 +++---------
 7 files changed, 120 insertions(+), 24 deletions(-)
 create mode 100644 elf/dl-mutex.c
 create mode 100644 sysdeps/nptl/dl-mutex.c

Comments

Carlos O'Donell May 9, 2021, 9:42 p.m. UTC | #1
On 5/6/21 2:09 PM, Florian Weimer via Libc-alpha wrote:
> If libpthread is included in libc, it is not necessary to delay
> initialization of the lock/unlock function pointers until libpthread
> is loaded.  This eliminates two unprotected function pointers
> from _rtld_global and removes some initialization code from
> libpthread.

This version looks good to me, and the early initialization makes it
logically easier to follow when reading the code. Despite the removal
of the unprotected function pointesr in _rtld_global, we still need
some function pointer in order to lookup the function symbols from libc.so
and remember their values, but data placement is harder to discover than
the fixed offset from a public symbol.

Tested on x86_64 and i686 without regression.

Tested-by: Carlos O'Donell <carlos@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>


> ---
> v2: Rename dl-lock.c into dl-mutex.c and use a sysdeps override instead
>     of a preprocessor conditional.
> 
>  elf/Makefile               |  3 ++-
>  elf/dl-mutex.c             | 19 ++++++++++++++
>  elf/rtld.c                 | 18 +++++++++++++
>  nptl/nptl-init.c           |  9 -------
>  sysdeps/generic/ldsodefs.h | 25 +++++++++++++++++-
>  sysdeps/nptl/dl-mutex.c    | 53 ++++++++++++++++++++++++++++++++++++++
>  sysdeps/nptl/libc-lockP.h  | 17 +++---------
>  7 files changed, 120 insertions(+), 24 deletions(-)
>  create mode 100644 elf/dl-mutex.c
>  create mode 100644 sysdeps/nptl/dl-mutex.c
> 
> diff --git a/elf/Makefile b/elf/Makefile
> index 4f99af626f..d3e909637a 100644
> --- a/elf/Makefile
> +++ b/elf/Makefile
> @@ -66,7 +66,8 @@ elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \
>  # interpreter and operating independent of libc.
>  rtld-routines	= rtld $(all-dl-routines) dl-sysdep dl-environ dl-minimal \
>    dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \
> -  dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu
> +  dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu \
> +  dl-mutex
>  all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
>  
>  CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables
> diff --git a/elf/dl-mutex.c b/elf/dl-mutex.c
> new file mode 100644
> index 0000000000..2cd9d49c2e
> --- /dev/null
> +++ b/elf/dl-mutex.c
> @@ -0,0 +1,19 @@
> +/* Recursive locking implementation for the dynamic loader.  Generic version.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* The generic version initialization happpens in dl_main.  */
> diff --git a/elf/rtld.c b/elf/rtld.c
> index ad325d4c10..a359167f8a 100644
> --- a/elf/rtld.c
> +++ b/elf/rtld.c
> @@ -857,6 +857,14 @@ rtld_lock_default_unlock_recursive (void *lock)
>    __rtld_lock_default_unlock_recursive (lock);
>  }
>  #endif
> +#if PTHREAD_IN_LIBC
> +/* Dummy implementation.  See __rtld_mutex_init.  */
> +static int
> +rtld_mutex_dummy (pthread_mutex_t *lock)
> +{
> +  return 0;
> +}
> +#endif
>  
>  
>  static void
> @@ -1148,6 +1156,10 @@ dl_main (const ElfW(Phdr) *phdr,
>    GL(dl_rtld_lock_recursive) = rtld_lock_default_lock_recursive;
>    GL(dl_rtld_unlock_recursive) = rtld_lock_default_unlock_recursive;
>  #endif
> +#if PTHREAD_IN_LIBC
> +  ___rtld_mutex_lock = rtld_mutex_dummy;
> +  ___rtld_mutex_unlock = rtld_mutex_dummy;
> +#endif
>  
>    /* The explicit initialization here is cheaper than processing the reloc
>       in the _rtld_local definition's initializer.  */
> @@ -2363,6 +2375,9 @@ dl_main (const ElfW(Phdr) *phdr,
>  	 loader.  */
>        __rtld_malloc_init_real (main_map);
>  
> +      /* Likewise for the locking implementation.  */
> +      __rtld_mutex_init ();
> +
>        /* Mark all the objects so we know they have been already relocated.  */
>        for (struct link_map *l = main_map; l != NULL; l = l->l_next)
>  	{
> @@ -2468,6 +2483,9 @@ dl_main (const ElfW(Phdr) *phdr,
>  	 at this point.  */
>        __rtld_malloc_init_real (main_map);
>  
> +      /* Likewise for the locking implementation.  */
> +      __rtld_mutex_init ();
> +
>        RTLD_TIMING_VAR (start);
>        rtld_timer_start (&start);
>  
> diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
> index fcab5a0904..2724770533 100644
> --- a/nptl/nptl-init.c
> +++ b/nptl/nptl-init.c
> @@ -179,15 +179,6 @@ __pthread_initialize_minimal_internal (void)
>    lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
>  
>  #ifdef SHARED
> -  /* Make __rtld_lock_{,un}lock_recursive use pthread_mutex_{,un}lock,
> -     keep the lock count from the ld.so implementation.  */
> -  GL(dl_rtld_lock_recursive) = (void *) __pthread_mutex_lock;
> -  GL(dl_rtld_unlock_recursive) = (void *) __pthread_mutex_unlock;
> -  unsigned int rtld_lock_count = GL(dl_load_lock).mutex.__data.__count;
> -  GL(dl_load_lock).mutex.__data.__count = 0;
> -  while (rtld_lock_count-- > 0)
> -    __pthread_mutex_lock (&GL(dl_load_lock).mutex);
> -
>    GL(dl_make_stack_executable_hook) = &__make_stacks_executable;
>  #endif
>  
> diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
> index 1b064c5894..6d590d1335 100644
> --- a/sysdeps/generic/ldsodefs.h
> +++ b/sysdeps/generic/ldsodefs.h
> @@ -403,7 +403,7 @@ struct rtld_global
>    struct auditstate _dl_rtld_auditstate[DL_NNS];
>  #endif
>  
> -#if defined SHARED && defined _LIBC_REENTRANT \
> +#if !PTHREAD_IN_LIBC && defined SHARED \
>      && defined __rtld_lock_default_lock_recursive
>    EXTERN void (*_dl_rtld_lock_recursive) (void *);
>    EXTERN void (*_dl_rtld_unlock_recursive) (void *);
> @@ -1318,6 +1318,29 @@ link_map_audit_state (struct link_map *l, size_t index)
>  }
>  #endif /* SHARED */
>  
> +#if PTHREAD_IN_LIBC && defined SHARED
> +/* Recursive locking implementation for use within the dynamic loader.
> +   Used to define the __rtld_lock_lock_recursive and
> +   __rtld_lock_unlock_recursive via <libc-lock.h>.  Initialized to a
> +   no-op dummy implementation early.  Similar
> +   to GL (dl_rtld_lock_recursive) and GL (dl_rtld_unlock_recursive)
> +   in !PTHREAD_IN_LIBC builds.  */
> +extern int (*___rtld_mutex_lock) (pthread_mutex_t *) attribute_hidden;
> +extern int (*___rtld_mutex_unlock) (pthread_mutex_t *lock) attribute_hidden;
> +
> +/* Called after libc has been loaded, but before RELRO is activated.
> +   Used to initialize the function pointers to the actual
> +   implementations.  */
> +void __rtld_mutex_init (void) attribute_hidden;
> +#else /* !PTHREAD_IN_LIBC */
> +static inline void
> +__rtld_mutex_init (void)
> +{
> +  /* The initialization happens later (!PTHREAD_IN_LIBC) or is not
> +     needed at all (!SHARED).  */
> +}
> +#endif /* !PTHREAD_IN_LIBC */
> +
>  #if THREAD_GSCOPE_IN_TCB
>  void __thread_gscope_wait (void) attribute_hidden;
>  # define THREAD_GSCOPE_WAIT() __thread_gscope_wait ()
> diff --git a/sysdeps/nptl/dl-mutex.c b/sysdeps/nptl/dl-mutex.c
> new file mode 100644
> index 0000000000..08b71dc21b
> --- /dev/null
> +++ b/sysdeps/nptl/dl-mutex.c
> @@ -0,0 +1,53 @@
> +/* Recursive locking implementation for the dynamic loader.  NPTL version.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* Use the mutex implementation in libc (assuming PTHREAD_IN_LIBC).  */
> +
> +#include <assert.h>
> +#include <first-versions.h>
> +#include <ldsodefs.h>
> +
> +__typeof (pthread_mutex_lock) *___rtld_mutex_lock attribute_relro;
> +__typeof (pthread_mutex_unlock) *___rtld_mutex_unlock attribute_relro;
> +
> +void
> +__rtld_mutex_init (void)
> +{
> +  /* There is an implicit assumption here that the lock counters are
> +     zero and this function is called while nothing is locked.  For
> +     early initialization of the mutex functions this is true because
> +     it happens directly in dl_main in elf/rtld.c, and not some ELF
> +     constructor while holding loader locks.  */
> +
> +  struct link_map *libc_map = GL (dl_ns)[LM_ID_BASE].libc_map;
> +
> +  const ElfW(Sym) *sym
> +    = _dl_lookup_direct (libc_map, "pthread_mutex_lock",
> +                         0x4f152227, /* dl_new_hash output.  */
> +                         FIRST_VERSION_libc_pthread_mutex_lock_STRING,
> +                         FIRST_VERSION_libc_pthread_mutex_lock_HASH);
> +  assert (sym != NULL);
> +  ___rtld_mutex_lock = DL_SYMBOL_ADDRESS (libc_map, sym);
> +
> +  sym = _dl_lookup_direct (libc_map, "pthread_mutex_unlock",
> +                           0x7dd7aaaa, /* dl_new_hash output.  */
> +                           FIRST_VERSION_libc_pthread_mutex_unlock_STRING,
> +                           FIRST_VERSION_libc_pthread_mutex_unlock_HASH);
> +  assert (sym != NULL);
> +  ___rtld_mutex_unlock = DL_SYMBOL_ADDRESS (libc_map, sym);
> +}
> diff --git a/sysdeps/nptl/libc-lockP.h b/sysdeps/nptl/libc-lockP.h
> index ae9691d40e..ec7b02bbdd 100644
> --- a/sysdeps/nptl/libc-lockP.h
> +++ b/sysdeps/nptl/libc-lockP.h
> @@ -151,9 +151,6 @@ _Static_assert (LLL_LOCK_INITIALIZER == 0, "LLL_LOCK_INITIALIZER != 0");
>    __libc_maybe_call (__pthread_mutex_trylock, (&(NAME)), 0)
>  #endif
>  
> -#define __rtld_lock_trylock_recursive(NAME) \
> -  __libc_maybe_call (__pthread_mutex_trylock, (&(NAME).mutex), 0)
> -
>  /* Unlock the named lock variable.  */
>  #if IS_IN (libc) || IS_IN (libpthread)
>  # define __libc_lock_unlock(NAME) \
> @@ -163,19 +160,13 @@ _Static_assert (LLL_LOCK_INITIALIZER == 0, "LLL_LOCK_INITIALIZER != 0");
>  #endif
>  #define __libc_rwlock_unlock(NAME) __pthread_rwlock_unlock (&(NAME))
>  
> -#ifdef SHARED
> -# define __rtld_lock_default_lock_recursive(lock) \
> -  ++((pthread_mutex_t *)(lock))->__data.__count;
> -
> -# define __rtld_lock_default_unlock_recursive(lock) \
> -  --((pthread_mutex_t *)(lock))->__data.__count;
> -
> +#if IS_IN (rtld)
>  # define __rtld_lock_lock_recursive(NAME) \
> -  GL(dl_rtld_lock_recursive) (&(NAME).mutex)
> +  ___rtld_mutex_lock (&(NAME).mutex)
>  
>  # define __rtld_lock_unlock_recursive(NAME) \
> -  GL(dl_rtld_unlock_recursive) (&(NAME).mutex)
> -#else
> +  ___rtld_mutex_unlock (&(NAME).mutex)
> +#else /* Not in the dynamic loader.  */
>  # define __rtld_lock_lock_recursive(NAME) \
>    __pthread_mutex_lock (&(NAME).mutex)
>  
>
Florian Weimer May 10, 2021, 5:54 a.m. UTC | #2
* Carlos O'Donell:

> On 5/6/21 2:09 PM, Florian Weimer via Libc-alpha wrote:
>> If libpthread is included in libc, it is not necessary to delay
>> initialization of the lock/unlock function pointers until libpthread
>> is loaded.  This eliminates two unprotected function pointers
>> from _rtld_global and removes some initialization code from
>> libpthread.
>
> This version looks good to me, and the early initialization makes it
> logically easier to follow when reading the code. Despite the removal
> of the unprotected function pointesr in _rtld_global, we still need
> some function pointer in order to lookup the function symbols from libc.so
> and remember their values, but data placement is harder to discover than
> the fixed offset from a public symbol.

Eh, the lookup happens before any user code runs, so it really ought to
be safe. 8-)

Thanks,
Florian
diff mbox series

Patch

diff --git a/elf/Makefile b/elf/Makefile
index 4f99af626f..d3e909637a 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -66,7 +66,8 @@  elide-routines.os = $(all-dl-routines) dl-support enbl-secure dl-origin \
 # interpreter and operating independent of libc.
 rtld-routines	= rtld $(all-dl-routines) dl-sysdep dl-environ dl-minimal \
   dl-error-minimal dl-conflict dl-hwcaps dl-hwcaps_split dl-hwcaps-subdirs \
-  dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu
+  dl-usage dl-diagnostics dl-diagnostics-kernel dl-diagnostics-cpu \
+  dl-mutex
 all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
 
 CFLAGS-dl-runtime.c += -fexceptions -fasynchronous-unwind-tables
diff --git a/elf/dl-mutex.c b/elf/dl-mutex.c
new file mode 100644
index 0000000000..2cd9d49c2e
--- /dev/null
+++ b/elf/dl-mutex.c
@@ -0,0 +1,19 @@ 
+/* Recursive locking implementation for the dynamic loader.  Generic version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* The generic version initialization happpens in dl_main.  */
diff --git a/elf/rtld.c b/elf/rtld.c
index ad325d4c10..a359167f8a 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -857,6 +857,14 @@  rtld_lock_default_unlock_recursive (void *lock)
   __rtld_lock_default_unlock_recursive (lock);
 }
 #endif
+#if PTHREAD_IN_LIBC
+/* Dummy implementation.  See __rtld_mutex_init.  */
+static int
+rtld_mutex_dummy (pthread_mutex_t *lock)
+{
+  return 0;
+}
+#endif
 
 
 static void
@@ -1148,6 +1156,10 @@  dl_main (const ElfW(Phdr) *phdr,
   GL(dl_rtld_lock_recursive) = rtld_lock_default_lock_recursive;
   GL(dl_rtld_unlock_recursive) = rtld_lock_default_unlock_recursive;
 #endif
+#if PTHREAD_IN_LIBC
+  ___rtld_mutex_lock = rtld_mutex_dummy;
+  ___rtld_mutex_unlock = rtld_mutex_dummy;
+#endif
 
   /* The explicit initialization here is cheaper than processing the reloc
      in the _rtld_local definition's initializer.  */
@@ -2363,6 +2375,9 @@  dl_main (const ElfW(Phdr) *phdr,
 	 loader.  */
       __rtld_malloc_init_real (main_map);
 
+      /* Likewise for the locking implementation.  */
+      __rtld_mutex_init ();
+
       /* Mark all the objects so we know they have been already relocated.  */
       for (struct link_map *l = main_map; l != NULL; l = l->l_next)
 	{
@@ -2468,6 +2483,9 @@  dl_main (const ElfW(Phdr) *phdr,
 	 at this point.  */
       __rtld_malloc_init_real (main_map);
 
+      /* Likewise for the locking implementation.  */
+      __rtld_mutex_init ();
+
       RTLD_TIMING_VAR (start);
       rtld_timer_start (&start);
 
diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
index fcab5a0904..2724770533 100644
--- a/nptl/nptl-init.c
+++ b/nptl/nptl-init.c
@@ -179,15 +179,6 @@  __pthread_initialize_minimal_internal (void)
   lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
 
 #ifdef SHARED
-  /* Make __rtld_lock_{,un}lock_recursive use pthread_mutex_{,un}lock,
-     keep the lock count from the ld.so implementation.  */
-  GL(dl_rtld_lock_recursive) = (void *) __pthread_mutex_lock;
-  GL(dl_rtld_unlock_recursive) = (void *) __pthread_mutex_unlock;
-  unsigned int rtld_lock_count = GL(dl_load_lock).mutex.__data.__count;
-  GL(dl_load_lock).mutex.__data.__count = 0;
-  while (rtld_lock_count-- > 0)
-    __pthread_mutex_lock (&GL(dl_load_lock).mutex);
-
   GL(dl_make_stack_executable_hook) = &__make_stacks_executable;
 #endif
 
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 1b064c5894..6d590d1335 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -403,7 +403,7 @@  struct rtld_global
   struct auditstate _dl_rtld_auditstate[DL_NNS];
 #endif
 
-#if defined SHARED && defined _LIBC_REENTRANT \
+#if !PTHREAD_IN_LIBC && defined SHARED \
     && defined __rtld_lock_default_lock_recursive
   EXTERN void (*_dl_rtld_lock_recursive) (void *);
   EXTERN void (*_dl_rtld_unlock_recursive) (void *);
@@ -1318,6 +1318,29 @@  link_map_audit_state (struct link_map *l, size_t index)
 }
 #endif /* SHARED */
 
+#if PTHREAD_IN_LIBC && defined SHARED
+/* Recursive locking implementation for use within the dynamic loader.
+   Used to define the __rtld_lock_lock_recursive and
+   __rtld_lock_unlock_recursive via <libc-lock.h>.  Initialized to a
+   no-op dummy implementation early.  Similar
+   to GL (dl_rtld_lock_recursive) and GL (dl_rtld_unlock_recursive)
+   in !PTHREAD_IN_LIBC builds.  */
+extern int (*___rtld_mutex_lock) (pthread_mutex_t *) attribute_hidden;
+extern int (*___rtld_mutex_unlock) (pthread_mutex_t *lock) attribute_hidden;
+
+/* Called after libc has been loaded, but before RELRO is activated.
+   Used to initialize the function pointers to the actual
+   implementations.  */
+void __rtld_mutex_init (void) attribute_hidden;
+#else /* !PTHREAD_IN_LIBC */
+static inline void
+__rtld_mutex_init (void)
+{
+  /* The initialization happens later (!PTHREAD_IN_LIBC) or is not
+     needed at all (!SHARED).  */
+}
+#endif /* !PTHREAD_IN_LIBC */
+
 #if THREAD_GSCOPE_IN_TCB
 void __thread_gscope_wait (void) attribute_hidden;
 # define THREAD_GSCOPE_WAIT() __thread_gscope_wait ()
diff --git a/sysdeps/nptl/dl-mutex.c b/sysdeps/nptl/dl-mutex.c
new file mode 100644
index 0000000000..08b71dc21b
--- /dev/null
+++ b/sysdeps/nptl/dl-mutex.c
@@ -0,0 +1,53 @@ 
+/* Recursive locking implementation for the dynamic loader.  NPTL version.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Use the mutex implementation in libc (assuming PTHREAD_IN_LIBC).  */
+
+#include <assert.h>
+#include <first-versions.h>
+#include <ldsodefs.h>
+
+__typeof (pthread_mutex_lock) *___rtld_mutex_lock attribute_relro;
+__typeof (pthread_mutex_unlock) *___rtld_mutex_unlock attribute_relro;
+
+void
+__rtld_mutex_init (void)
+{
+  /* There is an implicit assumption here that the lock counters are
+     zero and this function is called while nothing is locked.  For
+     early initialization of the mutex functions this is true because
+     it happens directly in dl_main in elf/rtld.c, and not some ELF
+     constructor while holding loader locks.  */
+
+  struct link_map *libc_map = GL (dl_ns)[LM_ID_BASE].libc_map;
+
+  const ElfW(Sym) *sym
+    = _dl_lookup_direct (libc_map, "pthread_mutex_lock",
+                         0x4f152227, /* dl_new_hash output.  */
+                         FIRST_VERSION_libc_pthread_mutex_lock_STRING,
+                         FIRST_VERSION_libc_pthread_mutex_lock_HASH);
+  assert (sym != NULL);
+  ___rtld_mutex_lock = DL_SYMBOL_ADDRESS (libc_map, sym);
+
+  sym = _dl_lookup_direct (libc_map, "pthread_mutex_unlock",
+                           0x7dd7aaaa, /* dl_new_hash output.  */
+                           FIRST_VERSION_libc_pthread_mutex_unlock_STRING,
+                           FIRST_VERSION_libc_pthread_mutex_unlock_HASH);
+  assert (sym != NULL);
+  ___rtld_mutex_unlock = DL_SYMBOL_ADDRESS (libc_map, sym);
+}
diff --git a/sysdeps/nptl/libc-lockP.h b/sysdeps/nptl/libc-lockP.h
index ae9691d40e..ec7b02bbdd 100644
--- a/sysdeps/nptl/libc-lockP.h
+++ b/sysdeps/nptl/libc-lockP.h
@@ -151,9 +151,6 @@  _Static_assert (LLL_LOCK_INITIALIZER == 0, "LLL_LOCK_INITIALIZER != 0");
   __libc_maybe_call (__pthread_mutex_trylock, (&(NAME)), 0)
 #endif
 
-#define __rtld_lock_trylock_recursive(NAME) \
-  __libc_maybe_call (__pthread_mutex_trylock, (&(NAME).mutex), 0)
-
 /* Unlock the named lock variable.  */
 #if IS_IN (libc) || IS_IN (libpthread)
 # define __libc_lock_unlock(NAME) \
@@ -163,19 +160,13 @@  _Static_assert (LLL_LOCK_INITIALIZER == 0, "LLL_LOCK_INITIALIZER != 0");
 #endif
 #define __libc_rwlock_unlock(NAME) __pthread_rwlock_unlock (&(NAME))
 
-#ifdef SHARED
-# define __rtld_lock_default_lock_recursive(lock) \
-  ++((pthread_mutex_t *)(lock))->__data.__count;
-
-# define __rtld_lock_default_unlock_recursive(lock) \
-  --((pthread_mutex_t *)(lock))->__data.__count;
-
+#if IS_IN (rtld)
 # define __rtld_lock_lock_recursive(NAME) \
-  GL(dl_rtld_lock_recursive) (&(NAME).mutex)
+  ___rtld_mutex_lock (&(NAME).mutex)
 
 # define __rtld_lock_unlock_recursive(NAME) \
-  GL(dl_rtld_unlock_recursive) (&(NAME).mutex)
-#else
+  ___rtld_mutex_unlock (&(NAME).mutex)
+#else /* Not in the dynamic loader.  */
 # define __rtld_lock_lock_recursive(NAME) \
   __pthread_mutex_lock (&(NAME).mutex)