[10/13] nptl: Move changing of stack permissions into ld.so

Message ID fbec7584eb17918dbae61ed5e6fdd2fd3d49cff7.1620323953.git.fweimer@redhat.com
State Committed
Commit 2dd87703d4386f2776c5b5f375a494c91d7f9fe4
Headers
Series Linux: Move most stack management out of libpthread |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Florian Weimer May 6, 2021, 6:11 p.m. UTC
  All the stack lists are now in _rtld_global, so it is possible
to change stack permissions directly from there, instead of
calling into libpthread to do the change.
---
 elf/dl-load.c                          |  4 ++
 elf/dl-support.c                       | 10 ++--
 elf/rtld.c                             |  2 +
 nptl/allocatestack.c                   | 63 +--------------------
 nptl/nptl-init.c                       |  4 --
 nptl/pthreadP.h                        |  7 ++-
 sysdeps/generic/ldsodefs.h             | 11 +++-
 sysdeps/unix/sysv/linux/Versions       |  6 ++
 sysdeps/unix/sysv/linux/dl-execstack.c | 76 +++++++++++++++++++++++---
 9 files changed, 100 insertions(+), 83 deletions(-)
  

Comments

Carlos O'Donell May 9, 2021, 9:42 p.m. UTC | #1
On 5/6/21 2:11 PM, Florian Weimer via Libc-alpha wrote:
> All the stack lists are now in _rtld_global, so it is possible
> to change stack permissions directly from there, instead of
> calling into libpthread to do the change.

LGTM.

Tested on x86_64 and i686 without regression.

Tested-by: Carlos O'Donell <carlos@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>


> ---
>  elf/dl-load.c                          |  4 ++
>  elf/dl-support.c                       | 10 ++--
>  elf/rtld.c                             |  2 +
>  nptl/allocatestack.c                   | 63 +--------------------
>  nptl/nptl-init.c                       |  4 --
>  nptl/pthreadP.h                        |  7 ++-
>  sysdeps/generic/ldsodefs.h             | 11 +++-
>  sysdeps/unix/sysv/linux/Versions       |  6 ++
>  sysdeps/unix/sysv/linux/dl-execstack.c | 76 +++++++++++++++++++++++---
>  9 files changed, 100 insertions(+), 83 deletions(-)
> 
> diff --git a/elf/dl-load.c b/elf/dl-load.c
> index 2832ab3540..918ec7546c 100644
> --- a/elf/dl-load.c
> +++ b/elf/dl-load.c
> @@ -1368,7 +1368,11 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
>        check_consistency ();
>  #endif
>  
> +#if PTHREAD_IN_LIBC
> +      errval = _dl_make_stacks_executable (stack_endp);
> +#else
>        errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
> +#endif
>        if (errval)
>  	{
>  	  errstring = N_("\
> diff --git a/elf/dl-support.c b/elf/dl-support.c
> index 580b0202ad..dfc9ab760e 100644
> --- a/elf/dl-support.c
> +++ b/elf/dl-support.c
> @@ -183,12 +183,6 @@ uint64_t _dl_hwcap_mask __attribute__ ((nocommon));
>   * executable but this isn't true for all platforms.  */
>  ElfW(Word) _dl_stack_flags = DEFAULT_STACK_PERMS;
>  
> -/* If loading a shared object requires that we make the stack executable
> -   when it was not, we do it by calling this function.
> -   It returns an errno code or zero on success.  */
> -int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable;
> -
> -
>  #if THREAD_GSCOPE_IN_TCB
>  list_t _dl_stack_used;
>  list_t _dl_stack_user;
> @@ -197,6 +191,10 @@ size_t _dl_stack_cache_actsize;
>  uintptr_t _dl_in_flight_stack;
>  int _dl_stack_cache_lock;
>  #else
> +/* If loading a shared object requires that we make the stack executable
> +   when it was not, we do it by calling this function.
> +   It returns an errno code or zero on success.  */
> +int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable;
>  int _dl_thread_gscope_count;
>  void (*_dl_init_static_tls) (struct link_map *) = &_dl_nothread_init_static_tls;
>  #endif
> diff --git a/elf/rtld.c b/elf/rtld.c
> index 1255d5cc7d..fbbd60b446 100644
> --- a/elf/rtld.c
> +++ b/elf/rtld.c
> @@ -1125,9 +1125,11 @@ dl_main (const ElfW(Phdr) *phdr,
>  
>    __tls_pre_init_tp ();
>  
> +#if !PTHREAD_IN_LIBC
>    /* The explicit initialization here is cheaper than processing the reloc
>       in the _rtld_local definition's initializer.  */
>    GL(dl_make_stack_executable_hook) = &_dl_make_stack_executable;
> +#endif
>  
>    /* Process the environment variable which control the behaviour.  */
>    process_envvars (&state);
> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
> index 46089163f4..12cd1058d4 100644
> --- a/nptl/allocatestack.c
> +++ b/nptl/allocatestack.c
> @@ -291,31 +291,6 @@ queue_stack (struct pthread *stack)
>      free_stacks (stack_cache_maxsize);
>  }
>  
> -
> -static int
> -change_stack_perm (struct pthread *pd)
> -{
> -#ifdef NEED_SEPARATE_REGISTER_STACK
> -  size_t pagemask = __getpagesize () - 1;
> -  void *stack = (pd->stackblock
> -		 + (((((pd->stackblock_size - pd->guardsize) / 2)
> -		      & pagemask) + pd->guardsize) & pagemask));
> -  size_t len = pd->stackblock + pd->stackblock_size - stack;
> -#elif _STACK_GROWS_DOWN
> -  void *stack = pd->stackblock + pd->guardsize;
> -  size_t len = pd->stackblock_size - pd->guardsize;
> -#elif _STACK_GROWS_UP
> -  void *stack = pd->stackblock;
> -  size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
> -#else
> -# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
> -#endif
> -  if (__mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
> -    return errno;
> -
> -  return 0;
> -}
> -
>  /* Return the guard page position on allocated stack.  */
>  static inline char *
>  __attribute ((always_inline))
> @@ -625,7 +600,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>  	  if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
>  				&& (prot & PROT_EXEC) == 0, 0))
>  	    {
> -	      int err = change_stack_perm (pd);
> +	      int err = __nptl_change_stack_perm (pd);
>  	      if (err != 0)
>  		{
>  		  /* Free the stack memory we just allocated.  */
> @@ -780,42 +755,6 @@ __deallocate_stack (struct pthread *pd)
>    lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
>  }
>  
> -
> -int
> -__make_stacks_executable (void **stack_endp)
> -{
> -  /* First the main thread's stack.  */
> -  int err = _dl_make_stack_executable (stack_endp);
> -  if (err != 0)
> -    return err;
> -
> -  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
> -
> -  list_t *runp;
> -  list_for_each (runp, &GL (dl_stack_used))
> -    {
> -      err = change_stack_perm (list_entry (runp, struct pthread, list));
> -      if (err != 0)
> -	break;
> -    }
> -
> -  /* Also change the permission for the currently unused stacks.  This
> -     might be wasted time but better spend it here than adding a check
> -     in the fast path.  */
> -  if (err == 0)
> -    list_for_each (runp, &GL (dl_stack_cache))
> -      {
> -	err = change_stack_perm (list_entry (runp, struct pthread, list));
> -	if (err != 0)
> -	  break;
> -      }
> -
> -  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
> -
> -  return err;
> -}
> -
> -
>  /* In case of a fork() call the memory allocation in the child will be
>     the same but only one thread is running.  All stacks except that of
>     the one running thread are not used anymore.  We have to recycle
> diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
> index 2fb1117f3e..4c89e7a792 100644
> --- a/nptl/nptl-init.c
> +++ b/nptl/nptl-init.c
> @@ -173,10 +173,6 @@ __pthread_initialize_minimal_internal (void)
>    __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
>    lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
>  
> -#ifdef SHARED
> -  GL(dl_make_stack_executable_hook) = &__make_stacks_executable;
> -#endif
> -
>    /* Register the fork generation counter with the libc.  */
>    __libc_pthread_init (__reclaim_stacks);
>  }
> diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
> index 8ab247f977..3a6b436400 100644
> --- a/nptl/pthreadP.h
> +++ b/nptl/pthreadP.h
> @@ -335,8 +335,11 @@ extern void __deallocate_stack (struct pthread *pd) attribute_hidden;
>     function also re-initializes the lock for the stack cache.  */
>  extern void __reclaim_stacks (void) attribute_hidden;
>  
> -/* Make all threads's stacks executable.  */
> -extern int __make_stacks_executable (void **stack_endp) attribute_hidden;
> +/* Change the permissions of a thread stack.  Called from
> +   _dl_make_stacks_executable and pthread_create.  */
> +int
> +__nptl_change_stack_perm (struct pthread *pd);
> +rtld_hidden_proto (__nptl_change_stack_perm)
>  
>  /* longjmp handling.  */
>  extern void __pthread_cleanup_upto (__jmp_buf target, char *targetframe);
> diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
> index 81cce2e4d5..8426b5cbd8 100644
> --- a/sysdeps/generic/ldsodefs.h
> +++ b/sysdeps/generic/ldsodefs.h
> @@ -416,10 +416,12 @@ struct rtld_global
>  #endif
>  #include <dl-procruntime.c>
>  
> +#if !PTHREAD_IN_LIBC
>    /* If loading a shared object requires that we make the stack executable
>       when it was not, we do it by calling this function.
>       It returns an errno code or zero on success.  */
>    EXTERN int (*_dl_make_stack_executable_hook) (void **);
> +#endif
>  
>    /* Prevailing state of the stack, PF_X indicating it's executable.  */
>    EXTERN ElfW(Word) _dl_stack_flags;
> @@ -717,10 +719,17 @@ extern const ElfW(Phdr) *_dl_phdr;
>  extern size_t _dl_phnum;
>  #endif
>  
> +#if PTHREAD_IN_LIBC
> +/* This function changes the permissions of all stacks (not just those
> +   of the main stack).  */
> +int _dl_make_stacks_executable (void **stack_endp) attribute_hidden;
> +#else
>  /* This is the initial value of GL(dl_make_stack_executable_hook).
> -   A threads library can change it.  */
> +   A threads library can change it.  The ld.so implementation changes
> +   the permissions of the main stack only.  */
>  extern int _dl_make_stack_executable (void **stack_endp);
>  rtld_hidden_proto (_dl_make_stack_executable)
> +#endif
>  
>  /* Variable pointing to the end of the stack (or close to it).  This value
>     must be constant over the runtime of the application.  Some programs
> diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
> index c35f783e2a..220bb2dffe 100644
> --- a/sysdeps/unix/sysv/linux/Versions
> +++ b/sysdeps/unix/sysv/linux/Versions
> @@ -181,3 +181,9 @@ libc {
>      __netlink_assert_response;
>    }
>  }
> +
> +ld {
> +  GLIBC_PRIVATE {
> +    __nptl_change_stack_perm;
> +  }
> +}
> diff --git a/sysdeps/unix/sysv/linux/dl-execstack.c b/sysdeps/unix/sysv/linux/dl-execstack.c
> index 3339138c42..e2449d1890 100644
> --- a/sysdeps/unix/sysv/linux/dl-execstack.c
> +++ b/sysdeps/unix/sysv/linux/dl-execstack.c
> @@ -16,20 +16,21 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> -#include <ldsodefs.h>
> -#include <sys/mman.h>
>  #include <errno.h>
> +#include <ldsodefs.h>
>  #include <libintl.h>
> -#include <stdbool.h>
> +#include <list.h>
> +#include <nptl/pthreadP.h>
>  #include <stackinfo.h>
> +#include <stdbool.h>
> +#include <sys/mman.h>
>  #include <sysdep.h>
> -
> +#include <unistd.h>
>  
>  extern int __stack_prot attribute_relro attribute_hidden;
>  
> -
> -int
> -_dl_make_stack_executable (void **stack_endp)
> +static int
> +make_main_stack_executable (void **stack_endp)
>  {
>    /* This gives us the highest/lowest page that needs to be changed.  */
>    uintptr_t page = ((uintptr_t) *stack_endp
> @@ -56,4 +57,63 @@ _dl_make_stack_executable (void **stack_endp)
>  
>    return result;
>  }
> -rtld_hidden_def (_dl_make_stack_executable)
> +
> +int
> +_dl_make_stacks_executable (void **stack_endp)
> +{
> +  /* First the main thread's stack.  */
> +  int err = make_main_stack_executable (stack_endp);
> +  if (err != 0)
> +    return err;
> +
> +  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
> +
> +  list_t *runp;
> +  list_for_each (runp, &GL (dl_stack_used))
> +    {
> +      err = __nptl_change_stack_perm (list_entry (runp, struct pthread, list));
> +      if (err != 0)
> +	break;
> +    }
> +
> +  /* Also change the permission for the currently unused stacks.  This
> +     might be wasted time but better spend it here than adding a check
> +     in the fast path.  */
> +  if (err == 0)
> +    list_for_each (runp, &GL (dl_stack_cache))
> +      {
> +	err = __nptl_change_stack_perm (list_entry (runp, struct pthread,
> +						    list));
> +	if (err != 0)
> +	  break;
> +      }
> +
> +  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
> +
> +  return err;
> +}
> +
> +int
> +__nptl_change_stack_perm (struct pthread *pd)
> +{
> +#ifdef NEED_SEPARATE_REGISTER_STACK
> +  size_t pagemask = __getpagesize () - 1;
> +  void *stack = (pd->stackblock
> +		 + (((((pd->stackblock_size - pd->guardsize) / 2)
> +		      & pagemask) + pd->guardsize) & pagemask));
> +  size_t len = pd->stackblock + pd->stackblock_size - stack;
> +#elif _STACK_GROWS_DOWN
> +  void *stack = pd->stackblock + pd->guardsize;
> +  size_t len = pd->stackblock_size - pd->guardsize;
> +#elif _STACK_GROWS_UP
> +  void *stack = pd->stackblock;
> +  size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
> +#else
> +# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
> +#endif
> +  if (__mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
> +    return errno;
> +
> +  return 0;
> +}
> +rtld_hidden_def (__nptl_change_stack_perm)
>
  

Patch

diff --git a/elf/dl-load.c b/elf/dl-load.c
index 2832ab3540..918ec7546c 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1368,7 +1368,11 @@  _dl_map_object_from_fd (const char *name, const char *origname, int fd,
       check_consistency ();
 #endif
 
+#if PTHREAD_IN_LIBC
+      errval = _dl_make_stacks_executable (stack_endp);
+#else
       errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
+#endif
       if (errval)
 	{
 	  errstring = N_("\
diff --git a/elf/dl-support.c b/elf/dl-support.c
index 580b0202ad..dfc9ab760e 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -183,12 +183,6 @@  uint64_t _dl_hwcap_mask __attribute__ ((nocommon));
  * executable but this isn't true for all platforms.  */
 ElfW(Word) _dl_stack_flags = DEFAULT_STACK_PERMS;
 
-/* If loading a shared object requires that we make the stack executable
-   when it was not, we do it by calling this function.
-   It returns an errno code or zero on success.  */
-int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable;
-
-
 #if THREAD_GSCOPE_IN_TCB
 list_t _dl_stack_used;
 list_t _dl_stack_user;
@@ -197,6 +191,10 @@  size_t _dl_stack_cache_actsize;
 uintptr_t _dl_in_flight_stack;
 int _dl_stack_cache_lock;
 #else
+/* If loading a shared object requires that we make the stack executable
+   when it was not, we do it by calling this function.
+   It returns an errno code or zero on success.  */
+int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable;
 int _dl_thread_gscope_count;
 void (*_dl_init_static_tls) (struct link_map *) = &_dl_nothread_init_static_tls;
 #endif
diff --git a/elf/rtld.c b/elf/rtld.c
index 1255d5cc7d..fbbd60b446 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -1125,9 +1125,11 @@  dl_main (const ElfW(Phdr) *phdr,
 
   __tls_pre_init_tp ();
 
+#if !PTHREAD_IN_LIBC
   /* The explicit initialization here is cheaper than processing the reloc
      in the _rtld_local definition's initializer.  */
   GL(dl_make_stack_executable_hook) = &_dl_make_stack_executable;
+#endif
 
   /* Process the environment variable which control the behaviour.  */
   process_envvars (&state);
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 46089163f4..12cd1058d4 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -291,31 +291,6 @@  queue_stack (struct pthread *stack)
     free_stacks (stack_cache_maxsize);
 }
 
-
-static int
-change_stack_perm (struct pthread *pd)
-{
-#ifdef NEED_SEPARATE_REGISTER_STACK
-  size_t pagemask = __getpagesize () - 1;
-  void *stack = (pd->stackblock
-		 + (((((pd->stackblock_size - pd->guardsize) / 2)
-		      & pagemask) + pd->guardsize) & pagemask));
-  size_t len = pd->stackblock + pd->stackblock_size - stack;
-#elif _STACK_GROWS_DOWN
-  void *stack = pd->stackblock + pd->guardsize;
-  size_t len = pd->stackblock_size - pd->guardsize;
-#elif _STACK_GROWS_UP
-  void *stack = pd->stackblock;
-  size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
-#else
-# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
-#endif
-  if (__mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
-    return errno;
-
-  return 0;
-}
-
 /* Return the guard page position on allocated stack.  */
 static inline char *
 __attribute ((always_inline))
@@ -625,7 +600,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 	  if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
 				&& (prot & PROT_EXEC) == 0, 0))
 	    {
-	      int err = change_stack_perm (pd);
+	      int err = __nptl_change_stack_perm (pd);
 	      if (err != 0)
 		{
 		  /* Free the stack memory we just allocated.  */
@@ -780,42 +755,6 @@  __deallocate_stack (struct pthread *pd)
   lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 }
 
-
-int
-__make_stacks_executable (void **stack_endp)
-{
-  /* First the main thread's stack.  */
-  int err = _dl_make_stack_executable (stack_endp);
-  if (err != 0)
-    return err;
-
-  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
-
-  list_t *runp;
-  list_for_each (runp, &GL (dl_stack_used))
-    {
-      err = change_stack_perm (list_entry (runp, struct pthread, list));
-      if (err != 0)
-	break;
-    }
-
-  /* Also change the permission for the currently unused stacks.  This
-     might be wasted time but better spend it here than adding a check
-     in the fast path.  */
-  if (err == 0)
-    list_for_each (runp, &GL (dl_stack_cache))
-      {
-	err = change_stack_perm (list_entry (runp, struct pthread, list));
-	if (err != 0)
-	  break;
-      }
-
-  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
-
-  return err;
-}
-
-
 /* In case of a fork() call the memory allocation in the child will be
    the same but only one thread is running.  All stacks except that of
    the one running thread are not used anymore.  We have to recycle
diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
index 2fb1117f3e..4c89e7a792 100644
--- a/nptl/nptl-init.c
+++ b/nptl/nptl-init.c
@@ -173,10 +173,6 @@  __pthread_initialize_minimal_internal (void)
   __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
   lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
 
-#ifdef SHARED
-  GL(dl_make_stack_executable_hook) = &__make_stacks_executable;
-#endif
-
   /* Register the fork generation counter with the libc.  */
   __libc_pthread_init (__reclaim_stacks);
 }
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index 8ab247f977..3a6b436400 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -335,8 +335,11 @@  extern void __deallocate_stack (struct pthread *pd) attribute_hidden;
    function also re-initializes the lock for the stack cache.  */
 extern void __reclaim_stacks (void) attribute_hidden;
 
-/* Make all threads's stacks executable.  */
-extern int __make_stacks_executable (void **stack_endp) attribute_hidden;
+/* Change the permissions of a thread stack.  Called from
+   _dl_make_stacks_executable and pthread_create.  */
+int
+__nptl_change_stack_perm (struct pthread *pd);
+rtld_hidden_proto (__nptl_change_stack_perm)
 
 /* longjmp handling.  */
 extern void __pthread_cleanup_upto (__jmp_buf target, char *targetframe);
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 81cce2e4d5..8426b5cbd8 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -416,10 +416,12 @@  struct rtld_global
 #endif
 #include <dl-procruntime.c>
 
+#if !PTHREAD_IN_LIBC
   /* If loading a shared object requires that we make the stack executable
      when it was not, we do it by calling this function.
      It returns an errno code or zero on success.  */
   EXTERN int (*_dl_make_stack_executable_hook) (void **);
+#endif
 
   /* Prevailing state of the stack, PF_X indicating it's executable.  */
   EXTERN ElfW(Word) _dl_stack_flags;
@@ -717,10 +719,17 @@  extern const ElfW(Phdr) *_dl_phdr;
 extern size_t _dl_phnum;
 #endif
 
+#if PTHREAD_IN_LIBC
+/* This function changes the permissions of all stacks (not just those
+   of the main stack).  */
+int _dl_make_stacks_executable (void **stack_endp) attribute_hidden;
+#else
 /* This is the initial value of GL(dl_make_stack_executable_hook).
-   A threads library can change it.  */
+   A threads library can change it.  The ld.so implementation changes
+   the permissions of the main stack only.  */
 extern int _dl_make_stack_executable (void **stack_endp);
 rtld_hidden_proto (_dl_make_stack_executable)
+#endif
 
 /* Variable pointing to the end of the stack (or close to it).  This value
    must be constant over the runtime of the application.  Some programs
diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
index c35f783e2a..220bb2dffe 100644
--- a/sysdeps/unix/sysv/linux/Versions
+++ b/sysdeps/unix/sysv/linux/Versions
@@ -181,3 +181,9 @@  libc {
     __netlink_assert_response;
   }
 }
+
+ld {
+  GLIBC_PRIVATE {
+    __nptl_change_stack_perm;
+  }
+}
diff --git a/sysdeps/unix/sysv/linux/dl-execstack.c b/sysdeps/unix/sysv/linux/dl-execstack.c
index 3339138c42..e2449d1890 100644
--- a/sysdeps/unix/sysv/linux/dl-execstack.c
+++ b/sysdeps/unix/sysv/linux/dl-execstack.c
@@ -16,20 +16,21 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <ldsodefs.h>
-#include <sys/mman.h>
 #include <errno.h>
+#include <ldsodefs.h>
 #include <libintl.h>
-#include <stdbool.h>
+#include <list.h>
+#include <nptl/pthreadP.h>
 #include <stackinfo.h>
+#include <stdbool.h>
+#include <sys/mman.h>
 #include <sysdep.h>
-
+#include <unistd.h>
 
 extern int __stack_prot attribute_relro attribute_hidden;
 
-
-int
-_dl_make_stack_executable (void **stack_endp)
+static int
+make_main_stack_executable (void **stack_endp)
 {
   /* This gives us the highest/lowest page that needs to be changed.  */
   uintptr_t page = ((uintptr_t) *stack_endp
@@ -56,4 +57,63 @@  _dl_make_stack_executable (void **stack_endp)
 
   return result;
 }
-rtld_hidden_def (_dl_make_stack_executable)
+
+int
+_dl_make_stacks_executable (void **stack_endp)
+{
+  /* First the main thread's stack.  */
+  int err = make_main_stack_executable (stack_endp);
+  if (err != 0)
+    return err;
+
+  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
+
+  list_t *runp;
+  list_for_each (runp, &GL (dl_stack_used))
+    {
+      err = __nptl_change_stack_perm (list_entry (runp, struct pthread, list));
+      if (err != 0)
+	break;
+    }
+
+  /* Also change the permission for the currently unused stacks.  This
+     might be wasted time but better spend it here than adding a check
+     in the fast path.  */
+  if (err == 0)
+    list_for_each (runp, &GL (dl_stack_cache))
+      {
+	err = __nptl_change_stack_perm (list_entry (runp, struct pthread,
+						    list));
+	if (err != 0)
+	  break;
+      }
+
+  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
+
+  return err;
+}
+
+int
+__nptl_change_stack_perm (struct pthread *pd)
+{
+#ifdef NEED_SEPARATE_REGISTER_STACK
+  size_t pagemask = __getpagesize () - 1;
+  void *stack = (pd->stackblock
+		 + (((((pd->stackblock_size - pd->guardsize) / 2)
+		      & pagemask) + pd->guardsize) & pagemask));
+  size_t len = pd->stackblock + pd->stackblock_size - stack;
+#elif _STACK_GROWS_DOWN
+  void *stack = pd->stackblock + pd->guardsize;
+  size_t len = pd->stackblock_size - pd->guardsize;
+#elif _STACK_GROWS_UP
+  void *stack = pd->stackblock;
+  size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
+#else
+# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
+#endif
+  if (__mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
+    return errno;
+
+  return 0;
+}
+rtld_hidden_def (__nptl_change_stack_perm)