[08/13] nptl: Move more stack management variables into _rtld_global

Message ID 452b4e33920e6d9e1dd7fa1699af443dbb83ee88.1620323953.git.fweimer@redhat.com
State Committed
Commit 9d124d81f09d2097bfde444b2a6228a725a00efd
Headers
Series Linux: Move most stack management out of libpthread |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Florian Weimer May 6, 2021, 6:10 p.m. UTC
  Permissions of the cached stacks may have to be updated if an object
is loaded that requires executable stacks, so the dynamic loader
needs to know about these cached stacks.

The move of in_flight_stack and stack_cache_actsize is a requirement for
merging __reclaim_stacks into the fork implementation in libc.
---
 elf/dl-support.c              |  3 +++
 nptl/allocatestack.c          | 51 +++++++++++++++--------------------
 sysdeps/generic/ldsodefs.h    | 11 ++++++++
 sysdeps/nptl/dl-tls_init_tp.c |  1 +
 4 files changed, 36 insertions(+), 30 deletions(-)
  

Comments

Carlos O'Donell May 9, 2021, 9:42 p.m. UTC | #1
On 5/6/21 2:10 PM, Florian Weimer via Libc-alpha wrote:
> Permissions of the cached stacks may have to be updated if an object
> is loaded that requires executable stacks, so the dynamic loader
> needs to know about these cached stacks.
> 
> The move of in_flight_stack and stack_cache_actsize is a requirement for
> merging __reclaim_stacks into the fork implementation in libc.

LGTM and makes logical sense to move the data.

Tested on x86_64 and i686 without regression.

Tested-by: Carlos O'Donell <carlos@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>


> ---
>  elf/dl-support.c              |  3 +++
>  nptl/allocatestack.c          | 51 +++++++++++++++--------------------
>  sysdeps/generic/ldsodefs.h    | 11 ++++++++
>  sysdeps/nptl/dl-tls_init_tp.c |  1 +
>  4 files changed, 36 insertions(+), 30 deletions(-)
> 
> diff --git a/elf/dl-support.c b/elf/dl-support.c
> index f966a2e7cd..580b0202ad 100644
> --- a/elf/dl-support.c
> +++ b/elf/dl-support.c
> @@ -192,6 +192,9 @@ int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable;
>  #if THREAD_GSCOPE_IN_TCB
>  list_t _dl_stack_used;
>  list_t _dl_stack_user;
> +list_t _dl_stack_cache;
> +size_t _dl_stack_cache_actsize;
> +uintptr_t _dl_in_flight_stack;
>  int _dl_stack_cache_lock;
>  #else
>  int _dl_thread_gscope_count;
> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
> index 88c49f8154..71cfa874d1 100644
> --- a/nptl/allocatestack.c
> +++ b/nptl/allocatestack.c
> @@ -103,15 +103,6 @@
>  
>  /* Maximum size in kB of cache.  */
>  static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
> -static size_t stack_cache_actsize;
> -
> -/* List of queued stack frames.  */
> -static LIST_HEAD (stack_cache);
> -
> -/* We need to record what list operations we are going to do so that,
> -   in case of an asynchronous interruption due to a fork() call, we
> -   can correct for the work.  */
> -static uintptr_t in_flight_stack;
>  
>  /* Check whether the stack is still used or not.  */
>  #define FREE_P(descr) ((descr)->tid <= 0)
> @@ -120,7 +111,7 @@ static uintptr_t in_flight_stack;
>  static void
>  stack_list_del (list_t *elem)
>  {
> -  in_flight_stack = (uintptr_t) elem;
> +  GL (dl_in_flight_stack) = (uintptr_t) elem;
>  
>    atomic_write_barrier ();
>  
> @@ -128,14 +119,14 @@ stack_list_del (list_t *elem)
>  
>    atomic_write_barrier ();
>  
> -  in_flight_stack = 0;
> +  GL (dl_in_flight_stack) = 0;
>  }
>  
>  
>  static void
>  stack_list_add (list_t *elem, list_t *list)
>  {
> -  in_flight_stack = (uintptr_t) elem | 1;
> +  GL (dl_in_flight_stack) = (uintptr_t) elem | 1;
>  
>    atomic_write_barrier ();
>  
> @@ -143,7 +134,7 @@ stack_list_add (list_t *elem, list_t *list)
>  
>    atomic_write_barrier ();
>  
> -  in_flight_stack = 0;
> +  GL (dl_in_flight_stack) = 0;
>  }
>  
>  
> @@ -168,7 +159,7 @@ get_cached_stack (size_t *sizep, void **memp)
>       same.  As the very least there are only a few different sizes.
>       Therefore this loop will exit early most of the time with an
>       exact match.  */
> -  list_for_each (entry, &stack_cache)
> +  list_for_each (entry, &GL (dl_stack_cache))
>      {
>        struct pthread *curr;
>  
> @@ -208,7 +199,7 @@ get_cached_stack (size_t *sizep, void **memp)
>    stack_list_add (&result->list, &GL (dl_stack_used));
>  
>    /* And decrease the cache size.  */
> -  stack_cache_actsize -= result->stackblock_size;
> +  GL (dl_stack_cache_actsize) -= result->stackblock_size;
>  
>    /* Release the lock early.  */
>    lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
> @@ -249,7 +240,7 @@ free_stacks (size_t limit)
>    list_t *prev;
>  
>    /* Search from the end of the list.  */
> -  list_for_each_prev_safe (entry, prev, &stack_cache)
> +  list_for_each_prev_safe (entry, prev, &GL (dl_stack_cache))
>      {
>        struct pthread *curr;
>  
> @@ -260,7 +251,7 @@ free_stacks (size_t limit)
>  	  stack_list_del (entry);
>  
>  	  /* Account for the freed memory.  */
> -	  stack_cache_actsize -= curr->stackblock_size;
> +	  GL (dl_stack_cache_actsize) -= curr->stackblock_size;
>  
>  	  /* Free the memory associated with the ELF TLS.  */
>  	  _dl_deallocate_tls (TLS_TPADJ (curr), false);
> @@ -271,7 +262,7 @@ free_stacks (size_t limit)
>  	    abort ();
>  
>  	  /* Maybe we have freed enough.  */
> -	  if (stack_cache_actsize <= limit)
> +	  if (GL (dl_stack_cache_actsize) <= limit)
>  	    break;
>  	}
>      }
> @@ -293,10 +284,10 @@ queue_stack (struct pthread *stack)
>    /* We unconditionally add the stack to the list.  The memory may
>       still be in use but it will not be reused until the kernel marks
>       the stack as not used anymore.  */
> -  stack_list_add (&stack->list, &stack_cache);
> +  stack_list_add (&stack->list, &GL (dl_stack_cache));
>  
> -  stack_cache_actsize += stack->stackblock_size;
> -  if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
> +  GL (dl_stack_cache_actsize) += stack->stackblock_size;
> +  if (__glibc_unlikely (GL (dl_stack_cache_actsize) > stack_cache_maxsize))
>      free_stacks (stack_cache_maxsize);
>  }
>  
> @@ -827,7 +818,7 @@ __make_stacks_executable (void **stack_endp)
>       might be wasted time but better spend it here than adding a check
>       in the fast path.  */
>    if (err == 0)
> -    list_for_each (runp, &stack_cache)
> +    list_for_each (runp, &GL (dl_stack_cache))
>        {
>  	err = change_stack_perm (list_entry (runp, struct pthread, list)
>  #ifdef NEED_SEPARATE_REGISTER_STACK
> @@ -857,10 +848,10 @@ __reclaim_stacks (void)
>       we have to be aware that we might have interrupted a list
>       operation.  */
>  
> -  if (in_flight_stack != 0)
> +  if (GL (dl_in_flight_stack) != 0)
>      {
> -      bool add_p = in_flight_stack & 1;
> -      list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1);
> +      bool add_p = GL (dl_in_flight_stack) & 1;
> +      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);
>  
>        if (add_p)
>  	{
> @@ -871,8 +862,8 @@ __reclaim_stacks (void)
>  
>  	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
>  	    l = &GL (dl_stack_used);
> -	  else if (stack_cache.next->prev != &stack_cache)
> -	    l = &stack_cache;
> +	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))
> +	    l = &GL (dl_stack_cache);
>  
>  	  if (l != NULL)
>  	    {
> @@ -901,7 +892,7 @@ __reclaim_stacks (void)
>  	  curp->tid = 0;
>  
>  	  /* Account for the size of the stack.  */
> -	  stack_cache_actsize += curp->stackblock_size;
> +	  GL (dl_stack_cache_actsize) += curp->stackblock_size;
>  
>  	  if (curp->specific_used)
>  	    {
> @@ -926,7 +917,7 @@ __reclaim_stacks (void)
>      }
>  
>    /* Add the stack of all running threads to the cache.  */
> -  list_splice (&GL (dl_stack_used), &stack_cache);
> +  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));
>  
>    /* Remove the entry for the current thread to from the cache list
>       and add it to the list of running threads.  Which of the two
> @@ -945,7 +936,7 @@ __reclaim_stacks (void)
>    /* There is one thread running.  */
>    __nptl_nthreads = 1;
>  
> -  in_flight_stack = 0;
> +  GL (dl_in_flight_stack) = 0;
>  
>    /* Initialize locks.  */
>    GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
> diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
> index ee851ac789..81cce2e4d5 100644
> --- a/sysdeps/generic/ldsodefs.h
> +++ b/sysdeps/generic/ldsodefs.h
> @@ -481,6 +481,17 @@ struct rtld_global
>    /* List of thread stacks that were allocated by the application.  */
>    EXTERN list_t _dl_stack_user;
>  
> +  /* List of queued thread stacks.  */
> +  EXTERN list_t _dl_stack_cache;
> +
> +  /* Total size of all stacks in the cache (sum over stackblock_size).  */
> +  EXTERN size_t _dl_stack_cache_actsize;
> +
> +  /* We need to record what list operations we are going to do so
> +     that, in case of an asynchronous interruption due to a fork()
> +     call, we can correct for the work.  */
> +  EXTERN uintptr_t _dl_in_flight_stack;
> +
>    /* Mutex protecting the stack lists.  */
>    EXTERN int _dl_stack_cache_lock;
>  #else
> diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
> index cb29222727..f1aaa5aa9d 100644
> --- a/sysdeps/nptl/dl-tls_init_tp.c
> +++ b/sysdeps/nptl/dl-tls_init_tp.c
> @@ -43,6 +43,7 @@ __tls_pre_init_tp (void)
>       initialized.  */
>    INIT_LIST_HEAD (&GL (dl_stack_used));
>    INIT_LIST_HEAD (&GL (dl_stack_user));
> +  INIT_LIST_HEAD (&GL (dl_stack_cache));
>  
>  #ifdef SHARED
>    ___rtld_mutex_lock = rtld_mutex_dummy;
>
  

Patch

diff --git a/elf/dl-support.c b/elf/dl-support.c
index f966a2e7cd..580b0202ad 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -192,6 +192,9 @@  int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable;
 #if THREAD_GSCOPE_IN_TCB
 list_t _dl_stack_used;
 list_t _dl_stack_user;
+list_t _dl_stack_cache;
+size_t _dl_stack_cache_actsize;
+uintptr_t _dl_in_flight_stack;
 int _dl_stack_cache_lock;
 #else
 int _dl_thread_gscope_count;
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 88c49f8154..71cfa874d1 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -103,15 +103,6 @@ 
 
 /* Maximum size in kB of cache.  */
 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
-static size_t stack_cache_actsize;
-
-/* List of queued stack frames.  */
-static LIST_HEAD (stack_cache);
-
-/* We need to record what list operations we are going to do so that,
-   in case of an asynchronous interruption due to a fork() call, we
-   can correct for the work.  */
-static uintptr_t in_flight_stack;
 
 /* Check whether the stack is still used or not.  */
 #define FREE_P(descr) ((descr)->tid <= 0)
@@ -120,7 +111,7 @@  static uintptr_t in_flight_stack;
 static void
 stack_list_del (list_t *elem)
 {
-  in_flight_stack = (uintptr_t) elem;
+  GL (dl_in_flight_stack) = (uintptr_t) elem;
 
   atomic_write_barrier ();
 
@@ -128,14 +119,14 @@  stack_list_del (list_t *elem)
 
   atomic_write_barrier ();
 
-  in_flight_stack = 0;
+  GL (dl_in_flight_stack) = 0;
 }
 
 
 static void
 stack_list_add (list_t *elem, list_t *list)
 {
-  in_flight_stack = (uintptr_t) elem | 1;
+  GL (dl_in_flight_stack) = (uintptr_t) elem | 1;
 
   atomic_write_barrier ();
 
@@ -143,7 +134,7 @@  stack_list_add (list_t *elem, list_t *list)
 
   atomic_write_barrier ();
 
-  in_flight_stack = 0;
+  GL (dl_in_flight_stack) = 0;
 }
 
 
@@ -168,7 +159,7 @@  get_cached_stack (size_t *sizep, void **memp)
      same.  As the very least there are only a few different sizes.
      Therefore this loop will exit early most of the time with an
      exact match.  */
-  list_for_each (entry, &stack_cache)
+  list_for_each (entry, &GL (dl_stack_cache))
     {
       struct pthread *curr;
 
@@ -208,7 +199,7 @@  get_cached_stack (size_t *sizep, void **memp)
   stack_list_add (&result->list, &GL (dl_stack_used));
 
   /* And decrease the cache size.  */
-  stack_cache_actsize -= result->stackblock_size;
+  GL (dl_stack_cache_actsize) -= result->stackblock_size;
 
   /* Release the lock early.  */
   lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
@@ -249,7 +240,7 @@  free_stacks (size_t limit)
   list_t *prev;
 
   /* Search from the end of the list.  */
-  list_for_each_prev_safe (entry, prev, &stack_cache)
+  list_for_each_prev_safe (entry, prev, &GL (dl_stack_cache))
     {
       struct pthread *curr;
 
@@ -260,7 +251,7 @@  free_stacks (size_t limit)
 	  stack_list_del (entry);
 
 	  /* Account for the freed memory.  */
-	  stack_cache_actsize -= curr->stackblock_size;
+	  GL (dl_stack_cache_actsize) -= curr->stackblock_size;
 
 	  /* Free the memory associated with the ELF TLS.  */
 	  _dl_deallocate_tls (TLS_TPADJ (curr), false);
@@ -271,7 +262,7 @@  free_stacks (size_t limit)
 	    abort ();
 
 	  /* Maybe we have freed enough.  */
-	  if (stack_cache_actsize <= limit)
+	  if (GL (dl_stack_cache_actsize) <= limit)
 	    break;
 	}
     }
@@ -293,10 +284,10 @@  queue_stack (struct pthread *stack)
   /* We unconditionally add the stack to the list.  The memory may
      still be in use but it will not be reused until the kernel marks
      the stack as not used anymore.  */
-  stack_list_add (&stack->list, &stack_cache);
+  stack_list_add (&stack->list, &GL (dl_stack_cache));
 
-  stack_cache_actsize += stack->stackblock_size;
-  if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
+  GL (dl_stack_cache_actsize) += stack->stackblock_size;
+  if (__glibc_unlikely (GL (dl_stack_cache_actsize) > stack_cache_maxsize))
     free_stacks (stack_cache_maxsize);
 }
 
@@ -827,7 +818,7 @@  __make_stacks_executable (void **stack_endp)
      might be wasted time but better spend it here than adding a check
      in the fast path.  */
   if (err == 0)
-    list_for_each (runp, &stack_cache)
+    list_for_each (runp, &GL (dl_stack_cache))
       {
 	err = change_stack_perm (list_entry (runp, struct pthread, list)
 #ifdef NEED_SEPARATE_REGISTER_STACK
@@ -857,10 +848,10 @@  __reclaim_stacks (void)
      we have to be aware that we might have interrupted a list
      operation.  */
 
-  if (in_flight_stack != 0)
+  if (GL (dl_in_flight_stack) != 0)
     {
-      bool add_p = in_flight_stack & 1;
-      list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1);
+      bool add_p = GL (dl_in_flight_stack) & 1;
+      list_t *elem = (list_t *) (GL (dl_in_flight_stack) & ~(uintptr_t) 1);
 
       if (add_p)
 	{
@@ -871,8 +862,8 @@  __reclaim_stacks (void)
 
 	  if (GL (dl_stack_used).next->prev != &GL (dl_stack_used))
 	    l = &GL (dl_stack_used);
-	  else if (stack_cache.next->prev != &stack_cache)
-	    l = &stack_cache;
+	  else if (GL (dl_stack_cache).next->prev != &GL (dl_stack_cache))
+	    l = &GL (dl_stack_cache);
 
 	  if (l != NULL)
 	    {
@@ -901,7 +892,7 @@  __reclaim_stacks (void)
 	  curp->tid = 0;
 
 	  /* Account for the size of the stack.  */
-	  stack_cache_actsize += curp->stackblock_size;
+	  GL (dl_stack_cache_actsize) += curp->stackblock_size;
 
 	  if (curp->specific_used)
 	    {
@@ -926,7 +917,7 @@  __reclaim_stacks (void)
     }
 
   /* Add the stack of all running threads to the cache.  */
-  list_splice (&GL (dl_stack_used), &stack_cache);
+  list_splice (&GL (dl_stack_used), &GL (dl_stack_cache));
 
   /* Remove the entry for the current thread to from the cache list
      and add it to the list of running threads.  Which of the two
@@ -945,7 +936,7 @@  __reclaim_stacks (void)
   /* There is one thread running.  */
   __nptl_nthreads = 1;
 
-  in_flight_stack = 0;
+  GL (dl_in_flight_stack) = 0;
 
   /* Initialize locks.  */
   GL (dl_stack_cache_lock) = LLL_LOCK_INITIALIZER;
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index ee851ac789..81cce2e4d5 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -481,6 +481,17 @@  struct rtld_global
   /* List of thread stacks that were allocated by the application.  */
   EXTERN list_t _dl_stack_user;
 
+  /* List of queued thread stacks.  */
+  EXTERN list_t _dl_stack_cache;
+
+  /* Total size of all stacks in the cache (sum over stackblock_size).  */
+  EXTERN size_t _dl_stack_cache_actsize;
+
+  /* We need to record what list operations we are going to do so
+     that, in case of an asynchronous interruption due to a fork()
+     call, we can correct for the work.  */
+  EXTERN uintptr_t _dl_in_flight_stack;
+
   /* Mutex protecting the stack lists.  */
   EXTERN int _dl_stack_cache_lock;
 #else
diff --git a/sysdeps/nptl/dl-tls_init_tp.c b/sysdeps/nptl/dl-tls_init_tp.c
index cb29222727..f1aaa5aa9d 100644
--- a/sysdeps/nptl/dl-tls_init_tp.c
+++ b/sysdeps/nptl/dl-tls_init_tp.c
@@ -43,6 +43,7 @@  __tls_pre_init_tp (void)
      initialized.  */
   INIT_LIST_HEAD (&GL (dl_stack_used));
   INIT_LIST_HEAD (&GL (dl_stack_user));
+  INIT_LIST_HEAD (&GL (dl_stack_cache));
 
 #ifdef SHARED
   ___rtld_mutex_lock = rtld_mutex_dummy;