[13/13] Linux: Move __reclaim_stacks into the fork implementation in libc

Message ID 55e7367c90178cd8fe4cddb84ace7e31b6648f52.1620323953.git.fweimer@redhat.com
State Committed
Commit 732139dabeda7ecce0d56200bc176251e759ccde
Headers
Series Linux: Move most stack management out of libpthread |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Florian Weimer May 6, 2021, 6:11 p.m. UTC
  As a result, __libc_pthread_init is no longer needed.
---
 nptl/Versions        |   1 -
 nptl/allocatestack.c | 108 ------------------------------------------
 nptl/nptl-init.c     |   3 --
 nptl/pthreadP.h      |   7 ---
 sysdeps/nptl/fork.c  | 110 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 110 insertions(+), 119 deletions(-)
  

Comments

Carlos O'Donell May 9, 2021, 9:41 p.m. UTC | #1
On 5/6/21 2:11 PM, Florian Weimer via Libc-alpha wrote:
> As a result, __libc_pthread_init is no longer needed.

Yay! :-)

LGTM.

Tested on x86_64 and i686 without regression.

Tested-by: Carlos O'Donell <carlos@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>


> ---
>  nptl/Versions        |   1 -
>  nptl/allocatestack.c | 108 ------------------------------------------
>  nptl/nptl-init.c     |   3 --
>  nptl/pthreadP.h      |   7 ---
>  sysdeps/nptl/fork.c  | 110 +++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 110 insertions(+), 119 deletions(-)
> 
> diff --git a/nptl/Versions b/nptl/Versions
> index d439a023b7..4c1c4ee0a7 100644
> --- a/nptl/Versions
> +++ b/nptl/Versions
> @@ -308,7 +308,6 @@ libc {
>      __libc_cleanup_push_defer;
>      __libc_dl_error_tsd;
>      __libc_multiple_threads;
> -    __libc_pthread_init;

OK.

>      __lll_clocklock_elision;
>      __lll_lock_elision;
>      __lll_lock_wait;
> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
> index 076cffd35b..8672e89e75 100644
> --- a/nptl/allocatestack.c
> +++ b/nptl/allocatestack.c
> @@ -754,111 +754,3 @@ __deallocate_stack (struct pthread *pd)
>  
>    lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
>  }
> -
> -/* In case of a fork() call the memory allocation in the child will be
> -   the same but only one thread is running.  All stacks except that of
> -   the one running thread are not used anymore.  We have to recycle
> -   them.  */
> -void
> -__reclaim_stacks (void)
> -{
> -  struct pthread *self = (struct pthread *) THREAD_SELF;
> -
> -  /* No locking necessary.  The caller is the only stack in use.  But
> -     we have to be aware that we might have interrupted a list
> -     operation.  */
> -
> -  if (GL (dl_in_flight_stack) != 0)
> -    {
> -      bool add_p = GL (dl_in_flight_stack) & 1;
> -      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);
> -
> -      if (add_p)
> -	{
> -	  /* We always add at the beginning of the list.  So in this case we
> -	     only need to check the beginning of these lists to see if the
> -	     pointers at the head of the list are inconsistent.  */
> -	  list_t *l = NULL;
> -
> -	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
> -	    l = &GL (dl_stack_used);
> -	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))
> -	    l = &GL (dl_stack_cache);
> -
> -	  if (l != NULL)
> -	    {
> -	      assert (l->next->prev == elem);
> -	      elem->next = l->next;
> -	      elem->prev = l;
> -	      l->next = elem;
> -	    }
> -	}
> -      else
> -	{
> -	  /* We can simply always replay the delete operation.  */
> -	  elem->next->prev = elem->prev;
> -	  elem->prev->next = elem->next;
> -	}
> -
> -      GL (dl_in_flight_stack) = 0;
> -    }
> -
> -  /* Mark all stacks except the still running one as free.  */
> -  list_t *runp;
> -  list_for_each (runp, &GL (dl_stack_used))
> -    {
> -      struct pthread *curp = list_entry (runp, struct pthread, list);
> -      if (curp != self)
> -	{
> -	  /* This marks the stack as free.  */
> -	  curp->tid = 0;
> -
> -	  /* Account for the size of the stack.  */
> -	  GL (dl_stack_cache_actsize) += curp->stackblock_size;
> -
> -	  if (curp->specific_used)
> -	    {
> -	      /* Clear the thread-specific data.  */
> -	      memset (curp->specific_1stblock, '\0',
> -		      sizeof (curp->specific_1stblock));
> -
> -	      curp->specific_used = false;
> -
> -	      for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
> -		if (curp->specific[cnt] != NULL)
> -		  {
> -		    memset (curp->specific[cnt], '\0',
> -			    sizeof (curp->specific_1stblock));
> -
> -		    /* We have allocated the block which we do not
> -		       free here so re-set the bit.  */
> -		    curp->specific_used = true;
> -		  }
> -	    }
> -	}
> -    }
> -
> -  /* Add the stack of all running threads to the cache.  */
> -  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));
> -
> -  /* Remove the entry for the current thread to from the cache list
> -     and add it to the list of running threads.  Which of the two
> -     lists is decided by the user_stack flag.  */
> -  list_del (&self->list);
> -
> -  /* Re-initialize the lists for all the threads.  */
> -  INIT_LIST_HEAD (&GL (dl_stack_used));
> -  INIT_LIST_HEAD (&GL (dl_stack_user));
> -
> -  if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
> -    list_add (&self->list, &GL (dl_stack_user));
> -  else
> -    list_add (&self->list, &GL (dl_stack_used));
> -
> -  /* There is one thread running.  */
> -  __nptl_nthreads = 1;
> -
> -  /* Initialize locks.  */
> -  GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
> -  __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
> -}
> diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
> index 4c89e7a792..16fb66bdf5 100644
> --- a/nptl/nptl-init.c
> +++ b/nptl/nptl-init.c
> @@ -172,9 +172,6 @@ __pthread_initialize_minimal_internal (void)
>    __default_pthread_attr.internal.stacksize = limit.rlim_cur;
>    __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
>    lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
> -
> -  /* Register the fork generation counter with the libc.  */
> -  __libc_pthread_init (__reclaim_stacks);
>  }
>  strong_alias (__pthread_initialize_minimal_internal,
>  	      __pthread_initialize_minimal)
> diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
> index 6b912f053b..d9b97c814a 100644
> --- a/nptl/pthreadP.h
> +++ b/nptl/pthreadP.h
> @@ -333,10 +333,6 @@ extern void __free_tcb (struct pthread *pd) attribute_hidden;
>  /* Free allocated stack.  */
>  extern void __deallocate_stack (struct pthread *pd) attribute_hidden;
>  
> -/* Mark all the stacks except for the current one as available.  This
> -   function also re-initializes the lock for the stack cache.  */
> -extern void __reclaim_stacks (void) attribute_hidden;
> -
>  /* Change the permissions of a thread stack.  Called from
>     _dl_make_stacks_executable and pthread_create.  */
>  int
> @@ -372,9 +368,6 @@ extern unsigned long int __fork_generation attribute_hidden;
>  /* Pointer to the fork generation counter in the thread library.  */
>  extern unsigned long int *__fork_generation_pointer attribute_hidden;
>  
> -/* Register the generation counter in the libpthread with the libc.  */
> -extern void __libc_pthread_init (void (*reclaim) (void));
> -
>  extern size_t __pthread_get_minstack (const pthread_attr_t *attr);
>  
>  /* Namespace save aliases.  */
> diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
> index f41c40fca0..062b01265a 100644
> --- a/sysdeps/nptl/fork.c
> +++ b/sysdeps/nptl/fork.c
> @@ -35,6 +35,7 @@
>  #include <nss/nss_database.h>
>  #include <unwind-link.h>
>  #include <sys/single_threaded.h>
> +#include <list.h>
>  
>  static void
>  fresetlockfiles (void)
> @@ -46,6 +47,106 @@ fresetlockfiles (void)
>        _IO_lock_init (*((_IO_lock_t *) _IO_iter_file(i)->_lock));
>  }
>  
> +/* In case of a fork() call the memory allocation in the child will be
> +   the same but only one thread is running.  All stacks except that of
> +   the one running thread are not used anymore.  We have to recycle
> +   them.  */
> +static void
> +reclaim_stacks (void)
> +{
> +  struct pthread *self = (struct pthread *) THREAD_SELF;
> +
> +  /* No locking necessary.  The caller is the only stack in use.  But
> +     we have to be aware that we might have interrupted a list
> +     operation.  */
> +
> +  if (GL (dl_in_flight_stack) != 0)
> +    {
> +      bool add_p = GL (dl_in_flight_stack) & 1;
> +      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);
> +
> +      if (add_p)
> +	{
> +	  /* We always add at the beginning of the list.  So in this case we
> +	     only need to check the beginning of these lists to see if the
> +	     pointers at the head of the list are inconsistent.  */
> +	  list_t *l = NULL;
> +
> +	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
> +	    l = &GL (dl_stack_used);
> +	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))
> +	    l = &GL (dl_stack_cache);
> +
> +	  if (l != NULL)
> +	    {
> +	      assert (l->next->prev == elem);
> +	      elem->next = l->next;
> +	      elem->prev = l;
> +	      l->next = elem;
> +	    }
> +	}
> +      else
> +	{
> +	  /* We can simply always replay the delete operation.  */
> +	  elem->next->prev = elem->prev;
> +	  elem->prev->next = elem->next;
> +	}
> +
> +      GL (dl_in_flight_stack) = 0;
> +    }
> +
> +  /* Mark all stacks except the still running one as free.  */
> +  list_t *runp;
> +  list_for_each (runp, &GL (dl_stack_used))
> +    {
> +      struct pthread *curp = list_entry (runp, struct pthread, list);
> +      if (curp != self)
> +	{
> +	  /* This marks the stack as free.  */
> +	  curp->tid = 0;
> +
> +	  /* Account for the size of the stack.  */
> +	  GL (dl_stack_cache_actsize) += curp->stackblock_size;
> +
> +	  if (curp->specific_used)
> +	    {
> +	      /* Clear the thread-specific data.  */
> +	      memset (curp->specific_1stblock, '\0',
> +		      sizeof (curp->specific_1stblock));
> +
> +	      curp->specific_used = false;
> +
> +	      for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
> +		if (curp->specific[cnt] != NULL)
> +		  {
> +		    memset (curp->specific[cnt], '\0',
> +			    sizeof (curp->specific_1stblock));
> +
> +		    /* We have allocated the block which we do not
> +		       free here so re-set the bit.  */
> +		    curp->specific_used = true;
> +		  }
> +	    }
> +	}
> +    }
> +
> +  /* Add the stack of all running threads to the cache.  */
> +  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));
> +
> +  /* Remove the entry for the current thread to from the cache list
> +     and add it to the list of running threads.  Which of the two
> +     lists is decided by the user_stack flag.  */
> +  list_del (&self->list);
> +
> +  /* Re-initialize the lists for all the threads.  */
> +  INIT_LIST_HEAD (&GL (dl_stack_used));
> +  INIT_LIST_HEAD (&GL (dl_stack_user));
> +
> +  if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
> +    list_add (&self->list, &GL (dl_stack_user));
> +  else
> +    list_add (&self->list, &GL (dl_stack_used));
> +}
>  
>  pid_t
>  __libc_fork (void)
> @@ -112,6 +213,13 @@ __libc_fork (void)
>  	{
>  	  __libc_unwind_link_after_fork ();
>  
> +	  /* There is one thread running.  */
> +	  __nptl_nthreads = 1;
> +
> +	  /* Initialize thread library locks.  */
> +	  GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
> +	  __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
> +
>  	  /* Release malloc locks.  */
>  	  call_function_static_weak (__malloc_fork_unlock_child);
>  
> @@ -128,6 +236,8 @@ __libc_fork (void)
>        /* Reset the lock the dynamic loader uses to protect its data.  */
>        __rtld_lock_initialize (GL(dl_load_lock));
>  
> +      reclaim_stacks ();
> +
>        /* Run the handlers registered for the child.  */
>        __run_fork_handlers (atfork_run_child, multiple_threads);
>      }
>
  

Patch

diff --git a/nptl/Versions b/nptl/Versions
index d439a023b7..4c1c4ee0a7 100644
--- a/nptl/Versions
+++ b/nptl/Versions
@@ -308,7 +308,6 @@  libc {
     __libc_cleanup_push_defer;
     __libc_dl_error_tsd;
     __libc_multiple_threads;
-    __libc_pthread_init;
     __lll_clocklock_elision;
     __lll_lock_elision;
     __lll_lock_wait;
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 076cffd35b..8672e89e75 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -754,111 +754,3 @@  __deallocate_stack (struct pthread *pd)
 
   lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 }
-
-/* In case of a fork() call the memory allocation in the child will be
-   the same but only one thread is running.  All stacks except that of
-   the one running thread are not used anymore.  We have to recycle
-   them.  */
-void
-__reclaim_stacks (void)
-{
-  struct pthread *self = (struct pthread *) THREAD_SELF;
-
-  /* No locking necessary.  The caller is the only stack in use.  But
-     we have to be aware that we might have interrupted a list
-     operation.  */
-
-  if (GL (dl_in_flight_stack) != 0)
-    {
-      bool add_p = GL (dl_in_flight_stack) & 1;
-      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);
-
-      if (add_p)
-	{
-	  /* We always add at the beginning of the list.  So in this case we
-	     only need to check the beginning of these lists to see if the
-	     pointers at the head of the list are inconsistent.  */
-	  list_t *l = NULL;
-
-	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
-	    l = &GL (dl_stack_used);
-	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))
-	    l = &GL (dl_stack_cache);
-
-	  if (l != NULL)
-	    {
-	      assert (l->next->prev == elem);
-	      elem->next = l->next;
-	      elem->prev = l;
-	      l->next = elem;
-	    }
-	}
-      else
-	{
-	  /* We can simply always replay the delete operation.  */
-	  elem->next->prev = elem->prev;
-	  elem->prev->next = elem->next;
-	}
-
-      GL (dl_in_flight_stack) = 0;
-    }
-
-  /* Mark all stacks except the still running one as free.  */
-  list_t *runp;
-  list_for_each (runp, &GL (dl_stack_used))
-    {
-      struct pthread *curp = list_entry (runp, struct pthread, list);
-      if (curp != self)
-	{
-	  /* This marks the stack as free.  */
-	  curp->tid = 0;
-
-	  /* Account for the size of the stack.  */
-	  GL (dl_stack_cache_actsize) += curp->stackblock_size;
-
-	  if (curp->specific_used)
-	    {
-	      /* Clear the thread-specific data.  */
-	      memset (curp->specific_1stblock, '\0',
-		      sizeof (curp->specific_1stblock));
-
-	      curp->specific_used = false;
-
-	      for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
-		if (curp->specific[cnt] != NULL)
-		  {
-		    memset (curp->specific[cnt], '\0',
-			    sizeof (curp->specific_1stblock));
-
-		    /* We have allocated the block which we do not
-		       free here so re-set the bit.  */
-		    curp->specific_used = true;
-		  }
-	    }
-	}
-    }
-
-  /* Add the stack of all running threads to the cache.  */
-  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));
-
-  /* Remove the entry for the current thread to from the cache list
-     and add it to the list of running threads.  Which of the two
-     lists is decided by the user_stack flag.  */
-  list_del (&self->list);
-
-  /* Re-initialize the lists for all the threads.  */
-  INIT_LIST_HEAD (&GL (dl_stack_used));
-  INIT_LIST_HEAD (&GL (dl_stack_user));
-
-  if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
-    list_add (&self->list, &GL (dl_stack_user));
-  else
-    list_add (&self->list, &GL (dl_stack_used));
-
-  /* There is one thread running.  */
-  __nptl_nthreads = 1;
-
-  /* Initialize locks.  */
-  GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
-  __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
-}
diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
index 4c89e7a792..16fb66bdf5 100644
--- a/nptl/nptl-init.c
+++ b/nptl/nptl-init.c
@@ -172,9 +172,6 @@  __pthread_initialize_minimal_internal (void)
   __default_pthread_attr.internal.stacksize = limit.rlim_cur;
   __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
   lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
-
-  /* Register the fork generation counter with the libc.  */
-  __libc_pthread_init (__reclaim_stacks);
 }
 strong_alias (__pthread_initialize_minimal_internal,
 	      __pthread_initialize_minimal)
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index 6b912f053b..d9b97c814a 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -333,10 +333,6 @@  extern void __free_tcb (struct pthread *pd) attribute_hidden;
 /* Free allocated stack.  */
 extern void __deallocate_stack (struct pthread *pd) attribute_hidden;
 
-/* Mark all the stacks except for the current one as available.  This
-   function also re-initializes the lock for the stack cache.  */
-extern void __reclaim_stacks (void) attribute_hidden;
-
 /* Change the permissions of a thread stack.  Called from
    _dl_make_stacks_executable and pthread_create.  */
 int
@@ -372,9 +368,6 @@  extern unsigned long int __fork_generation attribute_hidden;
 /* Pointer to the fork generation counter in the thread library.  */
 extern unsigned long int *__fork_generation_pointer attribute_hidden;
 
-/* Register the generation counter in the libpthread with the libc.  */
-extern void __libc_pthread_init (void (*reclaim) (void));
-
 extern size_t __pthread_get_minstack (const pthread_attr_t *attr);
 
 /* Namespace save aliases.  */
diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
index f41c40fca0..062b01265a 100644
--- a/sysdeps/nptl/fork.c
+++ b/sysdeps/nptl/fork.c
@@ -35,6 +35,7 @@ 
 #include <nss/nss_database.h>
 #include <unwind-link.h>
 #include <sys/single_threaded.h>
+#include <list.h>
 
 static void
 fresetlockfiles (void)
@@ -46,6 +47,106 @@  fresetlockfiles (void)
       _IO_lock_init (*((_IO_lock_t *) _IO_iter_file(i)->_lock));
 }
 
+/* In case of a fork() call the memory allocation in the child will be
+   the same but only one thread is running.  All stacks except that of
+   the one running thread are not used anymore.  We have to recycle
+   them.  */
+static void
+reclaim_stacks (void)
+{
+  struct pthread *self = (struct pthread *) THREAD_SELF;
+
+  /* No locking necessary.  The caller is the only stack in use.  But
+     we have to be aware that we might have interrupted a list
+     operation.  */
+
+  if (GL (dl_in_flight_stack) != 0)
+    {
+      bool add_p = GL (dl_in_flight_stack) & 1;
+      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);
+
+      if (add_p)
+	{
+	  /* We always add at the beginning of the list.  So in this case we
+	     only need to check the beginning of these lists to see if the
+	     pointers at the head of the list are inconsistent.  */
+	  list_t *l = NULL;
+
+	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
+	    l = &GL (dl_stack_used);
+	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))
+	    l = &GL (dl_stack_cache);
+
+	  if (l != NULL)
+	    {
+	      assert (l->next->prev == elem);
+	      elem->next = l->next;
+	      elem->prev = l;
+	      l->next = elem;
+	    }
+	}
+      else
+	{
+	  /* We can simply always replay the delete operation.  */
+	  elem->next->prev = elem->prev;
+	  elem->prev->next = elem->next;
+	}
+
+      GL (dl_in_flight_stack) = 0;
+    }
+
+  /* Mark all stacks except the still running one as free.  */
+  list_t *runp;
+  list_for_each (runp, &GL (dl_stack_used))
+    {
+      struct pthread *curp = list_entry (runp, struct pthread, list);
+      if (curp != self)
+	{
+	  /* This marks the stack as free.  */
+	  curp->tid = 0;
+
+	  /* Account for the size of the stack.  */
+	  GL (dl_stack_cache_actsize) += curp->stackblock_size;
+
+	  if (curp->specific_used)
+	    {
+	      /* Clear the thread-specific data.  */
+	      memset (curp->specific_1stblock, '\0',
+		      sizeof (curp->specific_1stblock));
+
+	      curp->specific_used = false;
+
+	      for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
+		if (curp->specific[cnt] != NULL)
+		  {
+		    memset (curp->specific[cnt], '\0',
+			    sizeof (curp->specific_1stblock));
+
+		    /* We have allocated the block which we do not
+		       free here so re-set the bit.  */
+		    curp->specific_used = true;
+		  }
+	    }
+	}
+    }
+
+  /* Add the stack of all running threads to the cache.  */
+  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));
+
+  /* Remove the entry for the current thread to from the cache list
+     and add it to the list of running threads.  Which of the two
+     lists is decided by the user_stack flag.  */
+  list_del (&self->list);
+
+  /* Re-initialize the lists for all the threads.  */
+  INIT_LIST_HEAD (&GL (dl_stack_used));
+  INIT_LIST_HEAD (&GL (dl_stack_user));
+
+  if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
+    list_add (&self->list, &GL (dl_stack_user));
+  else
+    list_add (&self->list, &GL (dl_stack_used));
+}
 
 pid_t
 __libc_fork (void)
@@ -112,6 +213,13 @@  __libc_fork (void)
 	{
 	  __libc_unwind_link_after_fork ();
 
+	  /* There is one thread running.  */
+	  __nptl_nthreads = 1;
+
+	  /* Initialize thread library locks.  */
+	  GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
+	  __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
+
 	  /* Release malloc locks.  */
 	  call_function_static_weak (__malloc_fork_unlock_child);
 
@@ -128,6 +236,8 @@  __libc_fork (void)
       /* Reset the lock the dynamic loader uses to protect its data.  */
       __rtld_lock_initialize (GL(dl_load_lock));
 
+      reclaim_stacks ();
+
       /* Run the handlers registered for the child.  */
       __run_fork_handlers (atfork_run_child, multiple_threads);
     }