[13/23] nptl: Eliminate the __static_tls_size, __static_tls_align_m1 variables

Message ID 3a4fd2cf500c263176935ce1d6f78aea829207c0.1620838411.git.fweimer@redhat.com
State Superseded
Headers
Series nptl: Move almost all remaining functions into libc |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Florian Weimer May 12, 2021, 4:58 p.m. UTC
  Use the  __nptl_tls_static_size_for_stack inline function instead,
and the GLRO (dl_tls_static_align) value directly.

The computation of GLRO (dl_tls_static_align)  in
_dl_determine_tlsoffset ensures that the alignment is at least
TLS_TCB_ALIGN, which at least STACK_ALIGN (see allocate_stack).
Therefore, the additional rounding-up step is removed.

ALso move the initialization of the default stack size from
__pthread_initialize_minimal_internal to __pthread_early_init.
This introduces an extra system call during single-threaded startup,
but this simplifies the initialization sequence.  No locking is
needed around the writes to __default_pthread_attr because the
process is single-threaded at this point.
---
 elf/dl-tls.c                      |  5 ++--
 nptl/allocatestack.c              | 25 +++++++++--------
 nptl/nptl-init.c                  | 46 ++-----------------------------
 nptl/nptl-stack.h                 | 11 +++++++-
 nptl/pthreadP.h                   |  4 ---
 sysdeps/nptl/pthread_early_init.h | 28 +++++++++++++++++++
 6 files changed, 58 insertions(+), 61 deletions(-)
  

Comments

Adhemerval Zanella Netto May 14, 2021, 12:40 p.m. UTC | #1
On 12/05/2021 13:58, Florian Weimer via Libc-alpha wrote:
> Use the  __nptl_tls_static_size_for_stack inline function instead,
> and the GLRO (dl_tls_static_align) value directly.
> 
> The computation of GLRO (dl_tls_static_align)  in
> _dl_determine_tlsoffset ensures that the alignment is at least
> TLS_TCB_ALIGN, which at least STACK_ALIGN (see allocate_stack).
> Therefore, the additional rounding-up step is removed.
> 
> ALso move the initialization of the default stack size from
> __pthread_initialize_minimal_internal to __pthread_early_init.
> This introduces an extra system call during single-threaded startup,
> but this simplifies the initialization sequence.  No locking is
> needed around the writes to __default_pthread_attr because the
> process is single-threaded at this point.

LGTM, with some comments below.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>

> ---
>  elf/dl-tls.c                      |  5 ++--
>  nptl/allocatestack.c              | 25 +++++++++--------
>  nptl/nptl-init.c                  | 46 ++-----------------------------
>  nptl/nptl-stack.h                 | 11 +++++++-
>  nptl/pthreadP.h                   |  4 ---
>  sysdeps/nptl/pthread_early_init.h | 28 +++++++++++++++++++
>  6 files changed, 58 insertions(+), 61 deletions(-)
> 
> diff --git a/elf/dl-tls.c b/elf/dl-tls.c
> index 91031c2b72..e531ec5913 100644
> --- a/elf/dl-tls.c
> +++ b/elf/dl-tls.c
> @@ -386,8 +386,9 @@ allocate_dtv (void *result)
>    return result;
>  }
>  
> -
> -/* Get size and alignment requirements of the static TLS block.  */
> +/* Get size and alignment requirements of the static TLS block.  This
> +   function is no longer used by glibc itself, but the GCC sanitizers
> +   use it despite the GLIBC_PRIVATE status.  */
>  void
>  _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
>  {

Ok. I am not very found of adding this as de facto ABI, maybe we
proper export it outside GLIBC_PRIVATE since now binaries do rely
on them (since the sanitizer API project seems to be stalled).

> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
> index c0a5c4d96d..dc81a2ca73 100644
> --- a/nptl/allocatestack.c
> +++ b/nptl/allocatestack.c
> @@ -254,6 +254,8 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>    struct pthread *pd;
>    size_t size;
>    size_t pagesize_m1 = __getpagesize () - 1;
> +  size_t tls_static_size_for_stack = __nptl_tls_static_size_for_stack ();
> +  size_t tls_static_align_m1 = GLRO (dl_tls_static_align) - 1;
>  
>    assert (powerof2 (pagesize_m1 + 1));
>    assert (TCB_ALIGNMENT >= STACK_ALIGN);
> @@ -284,17 +286,18 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>        /* If the user also specified the size of the stack make sure it
>  	 is large enough.  */
>        if (attr->stacksize != 0
> -	  && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
> +	  && attr->stacksize < (tls_static_size_for_stack
> +				+ MINIMAL_REST_STACK))
>  	return EINVAL;
>  
>        /* Adjust stack size for alignment of the TLS block.  */

Ok.

>  #if TLS_TCB_AT_TP
>        adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
> -	    & __static_tls_align_m1;
> +	    & tls_static_align_m1;
>        assert (size > adj + TLS_TCB_SIZE);
>  #elif TLS_DTV_AT_TP
> -      adj = ((uintptr_t) stackaddr - __static_tls_size)
> -	    & __static_tls_align_m1;
> +      adj = ((uintptr_t) stackaddr - tls_static_size_for_stack)
> +	    & tls_static_align_m1;
>        assert (size > adj);
>  #endif
>  

Ok.

> @@ -307,7 +310,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>  			       - TLS_TCB_SIZE - adj);
>  #elif TLS_DTV_AT_TP
>        pd = (struct pthread *) (((uintptr_t) stackaddr
> -				- __static_tls_size - adj)
> +				- tls_static_size_for_stack - adj)
>  			       - TLS_PRE_TCB_SIZE);
>  #endif
>  
> @@ -366,7 +369,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>  			| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
>  
>        /* Adjust the stack size for alignment.  */
> -      size &= ~__static_tls_align_m1;
> +      size &= ~tls_static_align_m1;
>        assert (size != 0);
>  
>        /* Make sure the size of the stack is enough for the guard and

Ok.

> @@ -385,7 +388,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>  	/* Arithmetic overflow.  */
>  	return EINVAL;
>        size += guardsize;
> -      if (__builtin_expect (size < ((guardsize + __static_tls_size
> +      if (__builtin_expect (size < ((guardsize + tls_static_size_for_stack
>  				     + MINIMAL_REST_STACK + pagesize_m1)
>  				    & ~pagesize_m1),
>  			    0))

Use __glibc_likely here.

> @@ -414,11 +417,11 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>  #if TLS_TCB_AT_TP
>  	  pd = (struct pthread *) ((((uintptr_t) mem + size)
>  				    - TLS_TCB_SIZE)
> -				   & ~__static_tls_align_m1);
> +				   & ~tls_static_align_m1);
>  #elif TLS_DTV_AT_TP
>  	  pd = (struct pthread *) ((((uintptr_t) mem + size
> -				    - __static_tls_size)
> -				    & ~__static_tls_align_m1)
> +				    - tls_static_size_for_stack)
> +				    & ~tls_static_align_m1)
>  				   - TLS_PRE_TCB_SIZE);
>  #endif
>  

Ok.

> @@ -602,7 +605,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>  
>  # if TLS_TCB_AT_TP
>    /* The stack begins before the TCB and the static TLS block.  */
> -  stacktop = ((char *) (pd + 1) - __static_tls_size);
> +  stacktop = ((char *) (pd + 1) - tls_static_size_for_stack);
>  # elif TLS_DTV_AT_TP
>    stacktop = (char *) (pd - 1);
>  # endif

Ok.

> diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
> index bc4831ac89..732e580355 100644
> --- a/nptl/nptl-init.c
> +++ b/nptl/nptl-init.c
> @@ -36,10 +36,7 @@
>  #include <kernel-features.h>
>  #include <libc-pointer-arith.h>
>  #include <pthread_mutex_conf.h>
> -
> -/* Size and alignment of static TLS block.  */
> -size_t __static_tls_size;
> -size_t __static_tls_align_m1;
> +#include <nptl-stack.h>
>  
>  /* Version of the library, used in libthread_db to detect mismatches.  */
>  static const char nptl_version[] __attribute_used__ = VERSION;
> @@ -47,44 +44,6 @@ static const char nptl_version[] __attribute_used__ = VERSION;
>  void
>  __pthread_initialize_minimal_internal (void)
>  {
> -  /* Get the size of the static and alignment requirements for the TLS
> -     block.  */
> -  size_t static_tls_align;
> -  _dl_get_tls_static_info (&__static_tls_size, &static_tls_align);
> -
> -  /* Make sure the size takes all the alignments into account.  */
> -  if (STACK_ALIGN > static_tls_align)
> -    static_tls_align = STACK_ALIGN;
> -  __static_tls_align_m1 = static_tls_align - 1;
> -
> -  __static_tls_size = roundup (__static_tls_size, static_tls_align);
> -
> -  /* Determine the default allowed stack size.  This is the size used
> -     in case the user does not specify one.  */
> -  struct rlimit limit;
> -  if (__getrlimit (RLIMIT_STACK, &limit) != 0
> -      || limit.rlim_cur == RLIM_INFINITY)
> -    /* The system limit is not usable.  Use an architecture-specific
> -       default.  */
> -    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
> -  else if (limit.rlim_cur < PTHREAD_STACK_MIN)
> -    /* The system limit is unusably small.
> -       Use the minimal size acceptable.  */
> -    limit.rlim_cur = PTHREAD_STACK_MIN;
> -
> -  /* Make sure it meets the minimum size that allocate_stack
> -     (allocatestack.c) will demand, which depends on the page size.  */
> -  const uintptr_t pagesz = GLRO(dl_pagesize);
> -  const size_t minstack = pagesz + __static_tls_size + MINIMAL_REST_STACK;
> -  if (limit.rlim_cur < minstack)
> -    limit.rlim_cur = minstack;
> -
> -  /* Round the resource limit up to page size.  */
> -  limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);
> -  lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
> -  __default_pthread_attr.internal.stacksize = limit.rlim_cur;
> -  __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
> -  lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
>  }
>  strong_alias (__pthread_initialize_minimal_internal,
>  	      __pthread_initialize_minimal)

Do we still need the empty __pthread_initialize_minimal_internal function?

> @@ -101,5 +60,6 @@ strong_alias (__pthread_initialize_minimal_internal,
>  size_t
>  __pthread_get_minstack (const pthread_attr_t *attr)
>  {
> -  return GLRO(dl_pagesize) + __static_tls_size + PTHREAD_STACK_MIN;
> +  return (GLRO(dl_pagesize) + __nptl_tls_static_size_for_stack ()
> +	  + PTHREAD_STACK_MIN);
>  }

Ok.

> diff --git a/nptl/nptl-stack.h b/nptl/nptl-stack.h
> index 8631b61816..a6bd8df77f 100644
> --- a/nptl/nptl-stack.h
> +++ b/nptl/nptl-stack.h
> @@ -20,7 +20,8 @@
>  #ifndef _NPTL_STACK_H
>  #define _NPTL_STACK_H
>  
> -#include <descr.h>
> +#include <nptl/descr.h>
> +#include <ldsodefs.h>
>  #include <list.h>
>  #include <stdbool.h>
>  
> @@ -47,4 +48,12 @@ libc_hidden_proto (__nptl_deallocate_stack)
>  /* Free stacks until cache size is lower than LIMIT.  */
>  void __nptl_free_stacks (size_t limit) attribute_hidden;
>  
> +/* Compute the size of the static TLS area based on data from the
> +   dynamic loader.  */
> +static inline size_t
> +__nptl_tls_static_size_for_stack (void)
> +{
> +  return roundup (GLRO (dl_tls_static_size), GLRO (dl_tls_static_align));
> +}
> +
>  #endif /* _NPTL_STACK_H */

Ok.

> diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
> index e33b071a4d..9ee61af8b3 100644
> --- a/nptl/pthreadP.h
> +++ b/nptl/pthreadP.h
> @@ -205,10 +205,6 @@ libc_hidden_proto (__default_pthread_attr_lock)
>  /* Called from __libc_freeres to deallocate the default attribute.  */
>  extern void __default_pthread_attr_freeres (void) attribute_hidden;
>  
> -/* Size and alignment of static TLS block.  */
> -extern size_t __static_tls_size attribute_hidden;
> -extern size_t __static_tls_align_m1 attribute_hidden;
> -
>  /* Attribute handling.  */
>  extern struct pthread_attr *__attr_list attribute_hidden;
>  extern int __attr_list_lock attribute_hidden;

Ok.

> diff --git a/sysdeps/nptl/pthread_early_init.h b/sysdeps/nptl/pthread_early_init.h
> index 2d15303dd9..5b49ce39c2 100644
> --- a/sysdeps/nptl/pthread_early_init.h
> +++ b/sysdeps/nptl/pthread_early_init.h
> @@ -19,12 +19,40 @@
>  #ifndef _PTHREAD_EARLY_INIT_H
>  #define _PTHREAD_EARLY_INIT_H 1
>  
> +#include <nptl/nptl-stack.h>
>  #include <nptl/pthreadP.h>
>  #include <pthread_mutex_conf.h>
> +#include <sys/resource.h>
>  
>  static inline void
>  __pthread_early_init (void)
>  {
> +  /* Determine the default allowed stack size.  This is the size used
> +     in case the user does not specify one.  */
> +  struct rlimit limit;
> +  if (__getrlimit (RLIMIT_STACK, &limit) != 0
> +      || limit.rlim_cur == RLIM_INFINITY)
> +    /* The system limit is not usable.  Use an architecture-specific
> +       default.  */
> +    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
> +  else if (limit.rlim_cur < PTHREAD_STACK_MIN)
> +    /* The system limit is unusably small.
> +       Use the minimal size acceptable.  */
> +    limit.rlim_cur = PTHREAD_STACK_MIN;

Maybe we should move away from non-LFS inside glibc call and use 
__getlimit64 instead here.

> +
> +  /* Make sure it meets the minimum size that allocate_stack
> +     (allocatestack.c) will demand, which depends on the page size.  */
> +  const uintptr_t pagesz = GLRO(dl_pagesize);
> +  const size_t minstack = (pagesz + __nptl_tls_static_size_for_stack ()
> +                           + MINIMAL_REST_STACK);
> +  if (limit.rlim_cur < minstack)
> +    limit.rlim_cur = minstack;
> +
> +  /* Round the resource limit up to page size.  */
> +  limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);
> +  __default_pthread_attr.internal.stacksize = limit.rlim_cur;
> +  __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
> +
>  #if HAVE_TUNABLES
>    __pthread_tunables_init ();
>  #endif
> 

Ok.
  
Florian Weimer May 17, 2021, 8:14 a.m. UTC | #2
* Adhemerval Zanella via Libc-alpha:

>> -/* Get size and alignment requirements of the static TLS block.  */
>> +/* Get size and alignment requirements of the static TLS block.  This
>> +   function is no longer used by glibc itself, but the GCC sanitizers
>> +   use it despite the GLIBC_PRIVATE status.  */
>>  void
>>  _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
>>  {
>
> Ok. I am not very found of adding this as de facto ABI, maybe we
> proper export it outside GLIBC_PRIVATE since now binaries do rely
> on them (since the sanitizer API project seems to be stalled).

The API is not future-proof because I want to implement resizable static
TLS one day.  I think it's useful to document that GCC uses it.  (A
first version of this patch removed this function, but then I checked
GCC.)

>> @@ -385,7 +388,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>>  	/* Arithmetic overflow.  */
>>  	return EINVAL;
>>        size += guardsize;
>> -      if (__builtin_expect (size < ((guardsize + __static_tls_size
>> +      if (__builtin_expect (size < ((guardsize + tls_static_size_for_stack
>>  				     + MINIMAL_REST_STACK + pagesize_m1)
>>  				    & ~pagesize_m1),
>>  			    0))
>
> Use __glibc_likely here.

I think we should eliminate all these hints from the thread creation
code.  It seems unlikely that there is a performance impact, and the
likely/unlikely hints are wrong for applications that consistently use
specific pthread_create features anyway.  But I'd prefer this to be a
separate patch.

> Do we still need the empty __pthread_initialize_minimal_internal
> function?

It is still called from the preinit code.

>> diff --git a/sysdeps/nptl/pthread_early_init.h b/sysdeps/nptl/pthread_early_init.h
>> index 2d15303dd9..5b49ce39c2 100644
>> --- a/sysdeps/nptl/pthread_early_init.h
>> +++ b/sysdeps/nptl/pthread_early_init.h
>> @@ -19,12 +19,40 @@
>>  #ifndef _PTHREAD_EARLY_INIT_H
>>  #define _PTHREAD_EARLY_INIT_H 1
>>  
>> +#include <nptl/nptl-stack.h>
>>  #include <nptl/pthreadP.h>
>>  #include <pthread_mutex_conf.h>
>> +#include <sys/resource.h>
>>  
>>  static inline void
>>  __pthread_early_init (void)
>>  {
>> +  /* Determine the default allowed stack size.  This is the size used
>> +     in case the user does not specify one.  */
>> +  struct rlimit limit;
>> +  if (__getrlimit (RLIMIT_STACK, &limit) != 0
>> +      || limit.rlim_cur == RLIM_INFINITY)
>> +    /* The system limit is not usable.  Use an architecture-specific
>> +       default.  */
>> +    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
>> +  else if (limit.rlim_cur < PTHREAD_STACK_MIN)
>> +    /* The system limit is unusably small.
>> +       Use the minimal size acceptable.  */
>> +    limit.rlim_cur = PTHREAD_STACK_MIN;
>
> Maybe we should move away from non-LFS inside glibc call and use 
> __getlimit64 instead here.

Yes, there's another call in nptl/pthread_getattr_np.c.

Thanks,
Florian
  
Adhemerval Zanella Netto May 18, 2021, 5:33 p.m. UTC | #3
On 17/05/2021 05:14, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>>> -/* Get size and alignment requirements of the static TLS block.  */
>>> +/* Get size and alignment requirements of the static TLS block.  This
>>> +   function is no longer used by glibc itself, but the GCC sanitizers
>>> +   use it despite the GLIBC_PRIVATE status.  */
>>>  void
>>>  _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
>>>  {
>>
>> Ok. I am not very found of adding this as de facto ABI, maybe we
>> proper export it outside GLIBC_PRIVATE since now binaries do rely
>> on them (since the sanitizer API project seems to be stalled).
> 
> The API is not future-proof because I want to implement resizable static
> TLS one day.  I think it's useful to document that GCC uses it.  (A
> first version of this patch removed this function, but then I checked
> GCC.)

Maybe we can fix it on gcc/sanitizer so we might not be bounded to add
backward compatibility symbols for GLIBC_PRIVATE.

> 
>>> @@ -385,7 +388,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
>>>  	/* Arithmetic overflow.  */
>>>  	return EINVAL;
>>>        size += guardsize;
>>> -      if (__builtin_expect (size < ((guardsize + __static_tls_size
>>> +      if (__builtin_expect (size < ((guardsize + tls_static_size_for_stack
>>>  				     + MINIMAL_REST_STACK + pagesize_m1)
>>>  				    & ~pagesize_m1),
>>>  			    0))
>>
>> Use __glibc_likely here.
> 
> I think we should eliminate all these hints from the thread creation
> code.  It seems unlikely that there is a performance impact, and the
> likely/unlikely hints are wrong for applications that consistently use
> specific pthread_create features anyway.  But I'd prefer this to be a
> separate patch.

Fair enough.  In fact, I think glibc overuse the branch hints in a
lot of places and I have the hint most of them does not improve much.


> 
>> Do we still need the empty __pthread_initialize_minimal_internal
>> function?
> 
> It is still called from the preinit code.

Ok.

> 
>>> diff --git a/sysdeps/nptl/pthread_early_init.h b/sysdeps/nptl/pthread_early_init.h
>>> index 2d15303dd9..5b49ce39c2 100644
>>> --- a/sysdeps/nptl/pthread_early_init.h
>>> +++ b/sysdeps/nptl/pthread_early_init.h
>>> @@ -19,12 +19,40 @@
>>>  #ifndef _PTHREAD_EARLY_INIT_H
>>>  #define _PTHREAD_EARLY_INIT_H 1
>>>  
>>> +#include <nptl/nptl-stack.h>
>>>  #include <nptl/pthreadP.h>
>>>  #include <pthread_mutex_conf.h>
>>> +#include <sys/resource.h>
>>>  
>>>  static inline void
>>>  __pthread_early_init (void)
>>>  {
>>> +  /* Determine the default allowed stack size.  This is the size used
>>> +     in case the user does not specify one.  */
>>> +  struct rlimit limit;
>>> +  if (__getrlimit (RLIMIT_STACK, &limit) != 0
>>> +      || limit.rlim_cur == RLIM_INFINITY)
>>> +    /* The system limit is not usable.  Use an architecture-specific
>>> +       default.  */
>>> +    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
>>> +  else if (limit.rlim_cur < PTHREAD_STACK_MIN)
>>> +    /* The system limit is unusably small.
>>> +       Use the minimal size acceptable.  */
>>> +    limit.rlim_cur = PTHREAD_STACK_MIN;
>>
>> Maybe we should move away from non-LFS inside glibc call and use 
>> __getlimit64 instead here.
> 
> Yes, there's another call in nptl/pthread_getattr_np.c.
> 
> Thanks,
> Florian
>
  

Patch

diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 91031c2b72..e531ec5913 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -386,8 +386,9 @@  allocate_dtv (void *result)
   return result;
 }
 
-
-/* Get size and alignment requirements of the static TLS block.  */
+/* Get size and alignment requirements of the static TLS block.  This
+   function is no longer used by glibc itself, but the GCC sanitizers
+   use it despite the GLIBC_PRIVATE status.  */
 void
 _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
 {
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index c0a5c4d96d..dc81a2ca73 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -254,6 +254,8 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
   struct pthread *pd;
   size_t size;
   size_t pagesize_m1 = __getpagesize () - 1;
+  size_t tls_static_size_for_stack = __nptl_tls_static_size_for_stack ();
+  size_t tls_static_align_m1 = GLRO (dl_tls_static_align) - 1;
 
   assert (powerof2 (pagesize_m1 + 1));
   assert (TCB_ALIGNMENT >= STACK_ALIGN);
@@ -284,17 +286,18 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
       /* If the user also specified the size of the stack make sure it
 	 is large enough.  */
       if (attr->stacksize != 0
-	  && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
+	  && attr->stacksize < (tls_static_size_for_stack
+				+ MINIMAL_REST_STACK))
 	return EINVAL;
 
       /* Adjust stack size for alignment of the TLS block.  */
 #if TLS_TCB_AT_TP
       adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
-	    & __static_tls_align_m1;
+	    & tls_static_align_m1;
       assert (size > adj + TLS_TCB_SIZE);
 #elif TLS_DTV_AT_TP
-      adj = ((uintptr_t) stackaddr - __static_tls_size)
-	    & __static_tls_align_m1;
+      adj = ((uintptr_t) stackaddr - tls_static_size_for_stack)
+	    & tls_static_align_m1;
       assert (size > adj);
 #endif
 
@@ -307,7 +310,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 			       - TLS_TCB_SIZE - adj);
 #elif TLS_DTV_AT_TP
       pd = (struct pthread *) (((uintptr_t) stackaddr
-				- __static_tls_size - adj)
+				- tls_static_size_for_stack - adj)
 			       - TLS_PRE_TCB_SIZE);
 #endif
 
@@ -366,7 +369,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 			| ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
 
       /* Adjust the stack size for alignment.  */
-      size &= ~__static_tls_align_m1;
+      size &= ~tls_static_align_m1;
       assert (size != 0);
 
       /* Make sure the size of the stack is enough for the guard and
@@ -385,7 +388,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 	/* Arithmetic overflow.  */
 	return EINVAL;
       size += guardsize;
-      if (__builtin_expect (size < ((guardsize + __static_tls_size
+      if (__builtin_expect (size < ((guardsize + tls_static_size_for_stack
 				     + MINIMAL_REST_STACK + pagesize_m1)
 				    & ~pagesize_m1),
 			    0))
@@ -414,11 +417,11 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 #if TLS_TCB_AT_TP
 	  pd = (struct pthread *) ((((uintptr_t) mem + size)
 				    - TLS_TCB_SIZE)
-				   & ~__static_tls_align_m1);
+				   & ~tls_static_align_m1);
 #elif TLS_DTV_AT_TP
 	  pd = (struct pthread *) ((((uintptr_t) mem + size
-				    - __static_tls_size)
-				    & ~__static_tls_align_m1)
+				    - tls_static_size_for_stack)
+				    & ~tls_static_align_m1)
 				   - TLS_PRE_TCB_SIZE);
 #endif
 
@@ -602,7 +605,7 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 
 # if TLS_TCB_AT_TP
   /* The stack begins before the TCB and the static TLS block.  */
-  stacktop = ((char *) (pd + 1) - __static_tls_size);
+  stacktop = ((char *) (pd + 1) - tls_static_size_for_stack);
 # elif TLS_DTV_AT_TP
   stacktop = (char *) (pd - 1);
 # endif
diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
index bc4831ac89..732e580355 100644
--- a/nptl/nptl-init.c
+++ b/nptl/nptl-init.c
@@ -36,10 +36,7 @@ 
 #include <kernel-features.h>
 #include <libc-pointer-arith.h>
 #include <pthread_mutex_conf.h>
-
-/* Size and alignment of static TLS block.  */
-size_t __static_tls_size;
-size_t __static_tls_align_m1;
+#include <nptl-stack.h>
 
 /* Version of the library, used in libthread_db to detect mismatches.  */
 static const char nptl_version[] __attribute_used__ = VERSION;
@@ -47,44 +44,6 @@  static const char nptl_version[] __attribute_used__ = VERSION;
 void
 __pthread_initialize_minimal_internal (void)
 {
-  /* Get the size of the static and alignment requirements for the TLS
-     block.  */
-  size_t static_tls_align;
-  _dl_get_tls_static_info (&__static_tls_size, &static_tls_align);
-
-  /* Make sure the size takes all the alignments into account.  */
-  if (STACK_ALIGN > static_tls_align)
-    static_tls_align = STACK_ALIGN;
-  __static_tls_align_m1 = static_tls_align - 1;
-
-  __static_tls_size = roundup (__static_tls_size, static_tls_align);
-
-  /* Determine the default allowed stack size.  This is the size used
-     in case the user does not specify one.  */
-  struct rlimit limit;
-  if (__getrlimit (RLIMIT_STACK, &limit) != 0
-      || limit.rlim_cur == RLIM_INFINITY)
-    /* The system limit is not usable.  Use an architecture-specific
-       default.  */
-    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
-  else if (limit.rlim_cur < PTHREAD_STACK_MIN)
-    /* The system limit is unusably small.
-       Use the minimal size acceptable.  */
-    limit.rlim_cur = PTHREAD_STACK_MIN;
-
-  /* Make sure it meets the minimum size that allocate_stack
-     (allocatestack.c) will demand, which depends on the page size.  */
-  const uintptr_t pagesz = GLRO(dl_pagesize);
-  const size_t minstack = pagesz + __static_tls_size + MINIMAL_REST_STACK;
-  if (limit.rlim_cur < minstack)
-    limit.rlim_cur = minstack;
-
-  /* Round the resource limit up to page size.  */
-  limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);
-  lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
-  __default_pthread_attr.internal.stacksize = limit.rlim_cur;
-  __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
-  lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
 }
 strong_alias (__pthread_initialize_minimal_internal,
 	      __pthread_initialize_minimal)
@@ -101,5 +60,6 @@  strong_alias (__pthread_initialize_minimal_internal,
 size_t
 __pthread_get_minstack (const pthread_attr_t *attr)
 {
-  return GLRO(dl_pagesize) + __static_tls_size + PTHREAD_STACK_MIN;
+  return (GLRO(dl_pagesize) + __nptl_tls_static_size_for_stack ()
+	  + PTHREAD_STACK_MIN);
 }
diff --git a/nptl/nptl-stack.h b/nptl/nptl-stack.h
index 8631b61816..a6bd8df77f 100644
--- a/nptl/nptl-stack.h
+++ b/nptl/nptl-stack.h
@@ -20,7 +20,8 @@ 
 #ifndef _NPTL_STACK_H
 #define _NPTL_STACK_H
 
-#include <descr.h>
+#include <nptl/descr.h>
+#include <ldsodefs.h>
 #include <list.h>
 #include <stdbool.h>
 
@@ -47,4 +48,12 @@  libc_hidden_proto (__nptl_deallocate_stack)
 /* Free stacks until cache size is lower than LIMIT.  */
 void __nptl_free_stacks (size_t limit) attribute_hidden;
 
+/* Compute the size of the static TLS area based on data from the
+   dynamic loader.  */
+static inline size_t
+__nptl_tls_static_size_for_stack (void)
+{
+  return roundup (GLRO (dl_tls_static_size), GLRO (dl_tls_static_align));
+}
+
 #endif /* _NPTL_STACK_H */
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index e33b071a4d..9ee61af8b3 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -205,10 +205,6 @@  libc_hidden_proto (__default_pthread_attr_lock)
 /* Called from __libc_freeres to deallocate the default attribute.  */
 extern void __default_pthread_attr_freeres (void) attribute_hidden;
 
-/* Size and alignment of static TLS block.  */
-extern size_t __static_tls_size attribute_hidden;
-extern size_t __static_tls_align_m1 attribute_hidden;
-
 /* Attribute handling.  */
 extern struct pthread_attr *__attr_list attribute_hidden;
 extern int __attr_list_lock attribute_hidden;
diff --git a/sysdeps/nptl/pthread_early_init.h b/sysdeps/nptl/pthread_early_init.h
index 2d15303dd9..5b49ce39c2 100644
--- a/sysdeps/nptl/pthread_early_init.h
+++ b/sysdeps/nptl/pthread_early_init.h
@@ -19,12 +19,40 @@ 
 #ifndef _PTHREAD_EARLY_INIT_H
 #define _PTHREAD_EARLY_INIT_H 1
 
+#include <nptl/nptl-stack.h>
 #include <nptl/pthreadP.h>
 #include <pthread_mutex_conf.h>
+#include <sys/resource.h>
 
 static inline void
 __pthread_early_init (void)
 {
+  /* Determine the default allowed stack size.  This is the size used
+     in case the user does not specify one.  */
+  struct rlimit limit;
+  if (__getrlimit (RLIMIT_STACK, &limit) != 0
+      || limit.rlim_cur == RLIM_INFINITY)
+    /* The system limit is not usable.  Use an architecture-specific
+       default.  */
+    limit.rlim_cur = ARCH_STACK_DEFAULT_SIZE;
+  else if (limit.rlim_cur < PTHREAD_STACK_MIN)
+    /* The system limit is unusably small.
+       Use the minimal size acceptable.  */
+    limit.rlim_cur = PTHREAD_STACK_MIN;
+
+  /* Make sure it meets the minimum size that allocate_stack
+     (allocatestack.c) will demand, which depends on the page size.  */
+  const uintptr_t pagesz = GLRO(dl_pagesize);
+  const size_t minstack = (pagesz + __nptl_tls_static_size_for_stack ()
+                           + MINIMAL_REST_STACK);
+  if (limit.rlim_cur < minstack)
+    limit.rlim_cur = minstack;
+
+  /* Round the resource limit up to page size.  */
+  limit.rlim_cur = ALIGN_UP (limit.rlim_cur, pagesz);
+  __default_pthread_attr.internal.stacksize = limit.rlim_cur;
+  __default_pthread_attr.internal.guardsize = GLRO (dl_pagesize);
+
 #if HAVE_TUNABLES
   __pthread_tunables_init ();
 #endif