[v2,3/3] nptl: Only initialize robust list at mutex usage

Message ID 20260505190809.3898686-4-adhemerval.zanella@linaro.org (mailing list archive)
State New
Headers
Series nptl: Fix robust mutex support detection and defer robust list initialization |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
redhat-pt-bot/TryBot-32bit success Build for i686

Commit Message

Adhemerval Zanella Netto May 5, 2026, 7:06 p.m. UTC
  The set_robust_list syscall is currently called unconditionally at
process startup (__tls_init_tp) and on every thread creation
(pthread_create), even in programs that never use robust mutexes.

This patch defers the call to the first time a robust mutex is actually
locked or initialized with PTHREAD_PROCESS_SHARED.  The new helper
robust_list_setup() performs the deferred registration and is called
from pthread_mutex_init (for pshared+robust), pthread_mutex_lock,
pthread_mutex_trylock, and pthread_mutex_timedlock.

The field robust_head.futex_offset serves as a sentinel: zero means the
list has not been registered with the kernel; non-zero means it has.
robust_list_setup() sets futex_offset to its correct value before
calling set_robust_list, so the kernel always reads a valid offset when
it walks the list on thread death.  On syscall failure futex_offset is
reset to zero.

_Fork is updated to re-register the list with the kernel only when the
parent had already initialized it (futex_offset != 0).  If the parent
never used robust mutexes, the child's first robust mutex lock calls
robust_list_setup() itself.

The __nptl_set_robust_list_avail variable moves from ld.so to libc.so,
since it no longer needs to be set during pthread startup.

Checked on x86_64-linux-gnu and aarch64-linux-gnu.
---
 nptl/Makefile                  |  2 +
 nptl/Versions                  |  1 -
 nptl/allocatestack.c           |  9 +---
 nptl/descr.h                   | 31 ++++++++++++
 nptl/nptl_robust_setup.c       | 38 +++++++++++++++
 nptl/pthread_create.c          |  8 ---
 nptl/pthread_mutex_init.c      |  2 +-
 nptl/pthread_mutex_lock.c      |  2 +
 nptl/pthread_mutex_timedlock.c |  2 +
 nptl/pthread_mutex_trylock.c   |  2 +
 nptl/tst-robust-pshared.c      | 89 ++++++++++++++++++++++++++++++++++
 sysdeps/nptl/_Fork.c           | 24 ++++-----
 sysdeps/nptl/dl-tls_init_tp.c  | 17 +------
 sysdeps/nptl/pthreadP.h        |  6 +--
 14 files changed, 183 insertions(+), 50 deletions(-)
 create mode 100644 nptl/nptl_robust_setup.c
 create mode 100644 nptl/tst-robust-pshared.c
  

Patch

diff --git a/nptl/Makefile b/nptl/Makefile
index 02862d1c04b..c9c7fc00f55 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -50,6 +50,7 @@  routines = \
   nptl_deallocate_tsd \
   nptl_free_tcb \
   nptl_nthreads \
+  nptl_robust_setup \
   nptl_setxid \
   nptlfreeres \
   old_pthread_cond_broadcast \
@@ -326,6 +327,7 @@  tests = \
   tst-pthread_exit-nothreads-static \
   tst-pthread_gettid_np \
   tst-robust-fork \
+  tst-robust-pshared \
   tst-robustpi1 \
   tst-robustpi2 \
   tst-robustpi3 \
diff --git a/nptl/Versions b/nptl/Versions
index b813b675b91..94a567bd609 100644
--- a/nptl/Versions
+++ b/nptl/Versions
@@ -535,6 +535,5 @@  libpthread {
 ld {
   GLIBC_PRIVATE {
      __nptl_initial_report_events;
-     __nptl_set_robust_list_avail;
   }
 }
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index b2ecb001136..fcccc75ddb8 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -631,14 +631,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
   /* The robust mutex lists also need to be initialized
      unconditionally because the cleanup for the previous stack owner
      might have happened in the kernel.  */
-  pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
-				  - offsetof (pthread_mutex_t,
-					      __data.__list.__next));
-  pd->robust_head.list_op_pending = NULL;
-#if __PTHREAD_MUTEX_HAVE_PREV
-  pd->robust_prev = &pd->robust_head;
-#endif
-  pd->robust_head.list = &pd->robust_head;
+  robust_list_init (pd);
 
   /* We place the thread descriptor at the end of the stack.  */
   *pdp = pd;
diff --git a/nptl/descr.h b/nptl/descr.h
index 627cc3980f0..308100c0c30 100644
--- a/nptl/descr.h
+++ b/nptl/descr.h
@@ -458,6 +458,37 @@  cancel_enabled_and_canceled_and_async (int value)
     == (CANCELTYPE_BITMASK | CANCELED_BITMASK);
 }
 
+static inline void
+robust_list_init (struct pthread *pd)
+{
+  pd->robust_head.list_op_pending = NULL;
+#if __PTHREAD_MUTEX_HAVE_PREV
+  pd->robust_prev = &pd->robust_head;
+#endif
+  pd->robust_head.list = &pd->robust_head;
+  pd->robust_head.futex_offset = 0;
+}
+
+extern bool __nptl_robust_setup (struct robust_list_head *robust_head)
+     attribute_hidden;
+
+static inline bool
+robust_list_setup (struct pthread *pd)
+{
+  /* The current thread was already initialized.  */
+  if (pd->robust_head.futex_offset != 0)
+    return true;
+
+  pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
+				  - offsetof (pthread_mutex_t,
+					      __data.__list.__next));
+  if (__nptl_robust_setup (&pd->robust_head))
+    return true;
+
+  pd->robust_head.futex_offset = 0;
+  return false;
+}
+
 /* This yields the pointer that TLS support code calls the thread pointer.  */
 #if TLS_TCB_AT_TP
 # define TLS_TPADJ(pd) (pd)
diff --git a/nptl/nptl_robust_setup.c b/nptl/nptl_robust_setup.c
new file mode 100644
index 00000000000..0b95de22009
--- /dev/null
+++ b/nptl/nptl_robust_setup.c
@@ -0,0 +1,38 @@ 
+/* Linux robust mutex setup.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <descr.h>
+#include <atomic.h>
+#include <pthreadP.h>
+
+int __nptl_set_robust_list_avail = 1;
+
+bool
+__nptl_robust_setup (struct robust_list_head *robust_head)
+{
+  if (atomic_load_relaxed (&__nptl_set_robust_list_avail))
+    {
+      int res = INTERNAL_SYSCALL_CALL (set_robust_list, robust_head,
+				       sizeof (struct robust_list_head));
+      if (!INTERNAL_SYSCALL_ERROR_P (res))
+        return true;
+
+      atomic_store_relaxed (&__nptl_set_robust_list_avail, 0);
+    }
+  return false;
+}
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index 72f77d1914e..d7aed06efda 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -386,14 +386,6 @@  start_thread (void *arg)
       __libc_fatal ("Fatal glibc error: rseq registration failed\n");
   }
 
-  if (__nptl_set_robust_list_avail)
-    {
-      /* This call should never fail because the initial call in init.c
-	 succeeded.  */
-      INTERNAL_SYSCALL_CALL (set_robust_list, &pd->robust_head,
-			     sizeof (struct robust_list_head));
-    }
-
   /* This is where the try/finally block should be created.  For
      compilers without that support we do use setjmp.  */
   struct pthread_unwind_buf unwind_buf;
diff --git a/nptl/pthread_mutex_init.c b/nptl/pthread_mutex_init.c
index 9be08332f1f..2f0bb84c557 100644
--- a/nptl/pthread_mutex_init.c
+++ b/nptl/pthread_mutex_init.c
@@ -94,7 +94,7 @@  ___pthread_mutex_init (pthread_mutex_t *mutex,
   if ((imutexattr->mutexkind & PTHREAD_MUTEXATTR_FLAG_ROBUST) != 0)
     {
       if ((imutexattr->mutexkind & PTHREAD_MUTEXATTR_FLAG_PSHARED) != 0
-	  && !__nptl_set_robust_list_avail)
+	  && !robust_list_setup (THREAD_SELF))
 	return ENOTSUP;
       mutex_kind |= PTHREAD_MUTEX_ROBUST_NORMAL_NP;
     }
diff --git a/nptl/pthread_mutex_lock.c b/nptl/pthread_mutex_lock.c
index a697f2b6ca8..f649036d954 100644
--- a/nptl/pthread_mutex_lock.c
+++ b/nptl/pthread_mutex_lock.c
@@ -177,6 +177,7 @@  __pthread_mutex_lock_full (pthread_mutex_t *mutex)
     case PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP:
     case PTHREAD_MUTEX_ROBUST_NORMAL_NP:
     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
+      robust_list_setup (THREAD_SELF);
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 		     &mutex->__data.__list.__next);
       /* We need to set op_pending before starting the operation.  Also
@@ -361,6 +362,7 @@  __pthread_mutex_lock_full (pthread_mutex_t *mutex)
 
 	if (robust)
 	  {
+	    robust_list_setup (THREAD_SELF);
 	    /* Note: robust PI futexes are signaled by setting bit 0.  */
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 			   (void *) (((uintptr_t) &mutex->__data.__list.__next)
diff --git a/nptl/pthread_mutex_timedlock.c b/nptl/pthread_mutex_timedlock.c
index 9efca2c7791..a52ed795df0 100644
--- a/nptl/pthread_mutex_timedlock.c
+++ b/nptl/pthread_mutex_timedlock.c
@@ -111,6 +111,7 @@  __pthread_mutex_clocklock_common (pthread_mutex_t *mutex,
     case PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP:
     case PTHREAD_MUTEX_ROBUST_NORMAL_NP:
     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
+      robust_list_setup (THREAD_SELF);
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 		     &mutex->__data.__list.__next);
       /* We need to set op_pending before starting the operation.  Also
@@ -295,6 +296,7 @@  __pthread_mutex_clocklock_common (pthread_mutex_t *mutex,
 
 	if (robust)
 	  {
+	    robust_list_setup (THREAD_SELF);
 	    /* Note: robust PI futexes are signaled by setting bit 0.  */
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 			   (void *) (((uintptr_t) &mutex->__data.__list.__next)
diff --git a/nptl/pthread_mutex_trylock.c b/nptl/pthread_mutex_trylock.c
index 236b3228ddb..09c5dc983b2 100644
--- a/nptl/pthread_mutex_trylock.c
+++ b/nptl/pthread_mutex_trylock.c
@@ -77,6 +77,7 @@  ___pthread_mutex_trylock (pthread_mutex_t *mutex)
     case PTHREAD_MUTEX_ROBUST_ERRORCHECK_NP:
     case PTHREAD_MUTEX_ROBUST_NORMAL_NP:
     case PTHREAD_MUTEX_ROBUST_ADAPTIVE_NP:
+      robust_list_setup (THREAD_SELF);
       THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 		     &mutex->__data.__list.__next);
       /* We need to set op_pending before starting the operation.  Also
@@ -219,6 +220,7 @@  ___pthread_mutex_trylock (pthread_mutex_t *mutex)
 
 	if (robust)
 	  {
+	    robust_list_setup (THREAD_SELF);
 	    /* Note: robust PI futexes are signaled by setting bit 0.  */
 	    THREAD_SETMEM (THREAD_SELF, robust_head.list_op_pending,
 			   (void *) (((uintptr_t) &mutex->__data.__list.__next)
diff --git a/nptl/tst-robust-pshared.c b/nptl/tst-robust-pshared.c
new file mode 100644
index 00000000000..4074ffa494b
--- /dev/null
+++ b/nptl/tst-robust-pshared.c
@@ -0,0 +1,89 @@ 
+/* Test process-shared robust mutex support and lazy initialization (BZ 33225).
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* Verify that pthread_mutex_init returns ENOTSUP for a process-shared robust
+   mutex when the set_robust_list syscall is not available (e.g., qemu-user),
+   and that the lazy robust list initialization works correctly when the
+   syscall is available.  */
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <support/check.h>
+#include <support/test-driver.h>
+#include <support/xthread.h>
+
+/* Lock the mutex and exit without unlocking to trigger EOWNERDEAD.  */
+static void *
+owner_thread (void *arg)
+{
+  pthread_mutex_t *mutex = arg;
+  TEST_COMPARE (pthread_mutex_lock (mutex), 0);
+  /* Thread exits here without unlocking.  The kernel walks the robust
+     list registered via set_robust_list and marks the mutex owner-dead.
+     This verifies that lazy robust_list_setup correctly set futex_offset
+     and called set_robust_list before the first lock.  */
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  bool robust_support = support_process_shared_robust_mutex ();
+  if (test_verbose)
+    printf ("info: process-shared robust mutex support: %d\n", robust_support);
+
+  pthread_mutexattr_t attr;
+  TEST_COMPARE (pthread_mutexattr_init (&attr), 0);
+  TEST_COMPARE (pthread_mutexattr_setpshared (&attr, PTHREAD_PROCESS_SHARED),
+		0);
+  TEST_COMPARE (pthread_mutexattr_setrobust (&attr, PTHREAD_MUTEX_ROBUST), 0);
+
+  pthread_mutex_t mutex;
+  int ret = pthread_mutex_init (&mutex, &attr);
+  TEST_COMPARE (pthread_mutexattr_destroy (&attr), 0);
+
+  if (!robust_support)
+    {
+      /* When set_robust_list is unavailable, pshared+robust mutex init
+	 must fail with ENOTSUP rather than silently succeeding.  */
+      TEST_COMPARE (ret, ENOTSUP);
+      return 0;
+    }
+
+  TEST_COMPARE (ret, 0);
+
+  /* Have a thread lock the mutex and exit without unlocking.
+     This exercises the lazy robust_list_setup path in
+     pthread_mutex_lock: futex_offset must be set before set_robust_list
+     is called so the kernel can correctly compute the lock address on
+     thread death.  */
+  pthread_t thread;
+  TEST_COMPARE (pthread_create (&thread, NULL, owner_thread, &mutex), 0);
+  TEST_COMPARE (pthread_join (thread, NULL), 0);
+
+  TEST_COMPARE (pthread_mutex_lock (&mutex), EOWNERDEAD);
+  TEST_COMPARE (pthread_mutex_consistent (&mutex), 0);
+  TEST_COMPARE (pthread_mutex_unlock (&mutex), 0);
+  TEST_COMPARE (pthread_mutex_destroy (&mutex), 0);
+
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/nptl/_Fork.c b/sysdeps/nptl/_Fork.c
index 907b3fef876..ba6912aa346 100644
--- a/sysdeps/nptl/_Fork.c
+++ b/sysdeps/nptl/_Fork.c
@@ -35,22 +35,22 @@  _Fork (void)
     {
       struct pthread *self = THREAD_SELF;
 
-      /* Initialize the robust mutex list setting in the kernel which has
-	 been reset during the fork.  We do not check for errors because if
-	 it fails here, it must have failed at process startup as well and
-	 nobody could have used robust mutexes.
-	 Before we do that, we have to clear the list of robust mutexes
-	 because we do not inherit ownership of mutexes from the parent.
-	 We do not have to set self->robust_head.futex_offset since we do
-	 inherit the correct value from the parent.  We do not need to clear
-	 the pending operation because it must have been zero when fork was
-	 called.  */
+      /* Clear the list of robust mutexes because we do not inherit ownership
+	 of mutexes from the parent.  We do not need to clear the pending
+	 operation because it must have been zero when fork was called.
+	 futex_offset is inherited from the parent unchanged.  */
 #if __PTHREAD_MUTEX_HAVE_PREV
       self->robust_prev = &self->robust_head;
 #endif
       self->robust_head.list = &self->robust_head;
-      INTERNAL_SYSCALL_CALL (set_robust_list, &self->robust_head,
-			     sizeof (struct robust_list_head));
+      /* Re-register the robust list with the kernel only if the parent had
+	 already initialized it.  futex_offset is the sentinel: zero means
+	 lazy initialization has not happened yet, so there is nothing to
+	 re-register and the first robust mutex lock in the child will call
+	 set_robust_list itself.  */
+      if (self->robust_head.futex_offset != 0)
+	INTERNAL_SYSCALL_CALL (set_robust_list, &self->robust_head,
+			       sizeof (struct robust_list_head));
       call_function_static_weak (__getrandom_fork_subprocess);
     }
 
diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
index 75e3712a6d8..77f2e341822 100644
--- a/sysdeps/nptl/dl-tls_init_tp.c
+++ b/sysdeps/nptl/dl-tls_init_tp.c
@@ -28,9 +28,6 @@ 
 #define TUNABLE_NAMESPACE pthread
 #include <dl-tunables.h>
 
-bool __nptl_set_robust_list_avail;
-rtld_hidden_data_def (__nptl_set_robust_list_avail)
-
 bool __nptl_initial_report_events;
 rtld_hidden_def (__nptl_initial_report_events)
 
@@ -82,19 +79,7 @@  __tls_init_tp (void)
   THREAD_SETMEM (pd, report_events, __nptl_initial_report_events);
 
   /* Initialize the robust mutex data.  */
-  {
-#if __PTHREAD_MUTEX_HAVE_PREV
-    pd->robust_prev = &pd->robust_head;
-#endif
-    pd->robust_head.list = &pd->robust_head;
-    pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
-                                    - offsetof (pthread_mutex_t,
-                                                __data.__list.__next));
-    int res = INTERNAL_SYSCALL_CALL (set_robust_list, &pd->robust_head,
-                                     sizeof (struct robust_list_head));
-    if (!INTERNAL_SYSCALL_ERROR_P (res))
-      __nptl_set_robust_list_avail = true;
-  }
+  robust_list_init (pd);
 
   {
     /* If the registration fails or is disabled by tunable, the public
diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
index c62c8982905..38a1383c3fe 100644
--- a/sysdeps/nptl/pthreadP.h
+++ b/sysdeps/nptl/pthreadP.h
@@ -192,10 +192,8 @@  libc_hidden_proto (__pthread_keys)
 extern unsigned int __nptl_nthreads;
 libc_hidden_proto (__nptl_nthreads)
 
-/* True if the set_robust_list system call works.  Initialized in
-   __tls_init_tp.  */
-extern bool __nptl_set_robust_list_avail;
-rtld_hidden_proto (__nptl_set_robust_list_avail)
+/* Set if the set_robust_list system call works.  */
+extern int __nptl_set_robust_list_avail attribute_hidden;
 
 /* Thread Priority Protection.  */
 extern int __sched_fifo_min_prio;