elf: Avoid using memalign for TLS allocations [BZ #17730]

Message ID 20160621113340.940B540124EEB@oldenburg.str.redhat.com
State Committed
Delegated to: Carlos O'Donell
Headers

Commit Message

Florian Weimer June 21, 2016, 11:33 a.m. UTC
  Instead of a flag which indicates the pointer can be freed, dtv_t
now includes the pointer which should be freed.  Due to padding,
the size of dtv_t does not increase.

To avoid using memalign, the new allocate_dtv_entry function
allocates a sufficiently large buffer so that a sub-buffer
can be found in it which starts with an aligned pointer.  Both
the aligned and original pointers are kept, the latter for calling
free later.

2016-06-20  Florian Weimer  <fweimer@redhat.com>

	[BZ #17730]
	Avoid using memalign for TLS allocations.
	* sysdeps/generic/dl-dtv.h (struct dtv_pointer): New.  Replaces
	is_static member with to_free member.
	(union dtv): Use struct dtv_pointer.
	* csu/libc-tls.c (__libc_setup_tls): Set to_free member of struct
	dtv_pointer instead of is_static.
	* elf/dl-tls.c (_dl_allocate_tls_init): Likewise.
	(_dl_deallocate_tls): Free to_free member of struct dtv_pointer
	instead of val.
	(allocate_dtv_entry): New function.
	(allocate_and_init): Return struct dtv_pointer.  Call
	allocate_dtv_entry instead of __libc_memalign.
	(_dl_update_slotinfo): Free to_free member of struct dtv_pointer
	instead of val.
	(tls_get_addr_tail): Set to_free member of struct dtv_pointer
	instead of is_static.  Adjust call to allocate_and_init.
	* nptl/allocatestack.c (get_cached_stack): Free to_free member of
	struct dtv_pointer instead of val.
  

Comments

Carlos O'Donell July 11, 2016, 2:54 a.m. UTC | #1
On 06/21/2016 07:33 AM, Florian Weimer wrote:
> Instead of a flag which indicates the pointer can be freed, dtv_t
> now includes the pointer which should be freed.  Due to padding,
> the size of dtv_t does not increase.
> 
> To avoid using memalign, the new allocate_dtv_entry function
> allocates a sufficiently large buffer so that a sub-buffer
> can be found in it which starts with an aligned pointer.  Both
> the aligned and original pointers are kept, the latter for calling
> free later.

This looks good to me.

I like the logical simplication when going from is_static and the
flag value to just the value and the pointer to free.

OK to checkin.

> 2016-06-20  Florian Weimer  <fweimer@redhat.com>
> 
> 	[BZ #17730]
> 	Avoid using memalign for TLS allocations.
> 	* sysdeps/generic/dl-dtv.h (struct dtv_pointer): New.  Replaces
> 	is_static member with to_free member.
> 	(union dtv): Use struct dtv_pointer.
> 	* csu/libc-tls.c (__libc_setup_tls): Set to_free member of struct
> 	dtv_pointer instead of is_static.
> 	* elf/dl-tls.c (_dl_allocate_tls_init): Likewise.
> 	(_dl_deallocate_tls): Free to_free member of struct dtv_pointer
> 	instead of val.
> 	(allocate_dtv_entry): New function.
> 	(allocate_and_init): Return struct dtv_pointer.  Call
> 	allocate_dtv_entry instead of __libc_memalign.
> 	(_dl_update_slotinfo): Free to_free member of struct dtv_pointer
> 	instead of val.
> 	(tls_get_addr_tail): Set to_free member of struct dtv_pointer
> 	instead of is_static.  Adjust call to allocate_and_init.
> 	* nptl/allocatestack.c (get_cached_stack): Free to_free member of
> 	struct dtv_pointer instead of val.
> 
> diff --git a/csu/libc-tls.c b/csu/libc-tls.c
> index d6425e0..235ac79 100644
> --- a/csu/libc-tls.c
> +++ b/csu/libc-tls.c
> @@ -175,7 +175,7 @@ __libc_setup_tls (size_t tcbsize, size_t tcbalign)
>  #else
>  # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
>  #endif
> -  _dl_static_dtv[2].pointer.is_static = true;
> +  _dl_static_dtv[2].pointer.to_free = NULL;

OK.

>    /* sbrk gives us zero'd memory, so we don't need to clear the remainder.  */
>    memcpy (_dl_static_dtv[2].pointer.val, initimage, filesz);
>  
> diff --git a/elf/dl-tls.c b/elf/dl-tls.c
> index ed13fd9..be6a3c7 100644
> --- a/elf/dl-tls.c
> +++ b/elf/dl-tls.c
> @@ -494,7 +494,7 @@ _dl_allocate_tls_init (void *result)
>  	  maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
>  
>  	  dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
> -	  dtv[map->l_tls_modid].pointer.is_static = false;
> +	  dtv[map->l_tls_modid].pointer.to_free = NULL;

OK.

>  
>  	  if (map->l_tls_offset == NO_TLS_OFFSET
>  	      || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
> @@ -551,9 +551,7 @@ _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
>  
>    /* We need to free the memory allocated for non-static TLS.  */
>    for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
> -    if (! dtv[1 + cnt].pointer.is_static
> -	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
> -      free (dtv[1 + cnt].pointer.val);
> +    free (dtv[1 + cnt].pointer.to_free);

OK.

>  
>    /* The array starts with dtv[-1].  */
>    if (dtv != GL(dl_initial_dtv))
> @@ -594,21 +592,49 @@ rtld_hidden_def (_dl_deallocate_tls)
>  #  define GET_ADDR_OFFSET ti->ti_offset
>  # endif
>  
> +/* Allocate one DTV entry.  */
> +static struct dtv_pointer
> +allocate_dtv_entry (size_t alignment, size_t size)
> +{
> +  if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
> +    {
> +      /* The alignment is supported by malloc.  */
> +      void *ptr = malloc (size);
> +      return (struct dtv_pointer) { ptr, ptr };
> +    }
>  
> -static void *
> +  /* Emulate memalign to by manually aligning a pointer returned by
> +     malloc.  First compute the size with an overflow check.  */
> +  size_t alloc_size = size + alignment;
> +  if (alloc_size < size)
> +    return (struct dtv_pointer) {};
> +
> +  /* Perform the allocation.  This is the pointer we need to free
> +     later.  */
> +  void *start = malloc (alloc_size);
> +  if (start == NULL)
> +    return (struct dtv_pointer) {};
> +
> +  /* Find the aligned position within the larger allocation.  */
> +  void *aligned = (void *) roundup ((uintptr_t) start, alignment);
> +
> +  return (struct dtv_pointer) { .val = aligned, .to_free = start };
> +}

OK.

> +
> +static struct dtv_pointer
>  allocate_and_init (struct link_map *map)
>  {
> -  void *newp;
> -
> -  newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
> -  if (newp == NULL)
> +  struct dtv_pointer result = allocate_dtv_entry
> +    (map->l_tls_align, map->l_tls_blocksize);
> +  if (result.val == NULL)
>      oom ();
>  
>    /* Initialize the memory.  */
> -  memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
> +  memset (__mempcpy (result.val, map->l_tls_initimage,
> +		     map->l_tls_initimage_size),
>  	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
>  
> -  return newp;
> +  return result;
>  }
>  
>  
> @@ -678,12 +704,9 @@ _dl_update_slotinfo (unsigned long int req_modid)
>  		    {
>  		      /* If this modid was used at some point the memory
>  			 might still be allocated.  */
> -		      if (! dtv[total + cnt].pointer.is_static
> -			  && (dtv[total + cnt].pointer.val
> -			      != TLS_DTV_UNALLOCATED))
> -			free (dtv[total + cnt].pointer.val);
> +		      free (dtv[total + cnt].pointer.to_free);
>  		      dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
> -		      dtv[total + cnt].pointer.is_static = false;
> +		      dtv[total + cnt].pointer.to_free = NULL;
>  		    }
>  
>  		  continue;
> @@ -708,16 +731,9 @@ _dl_update_slotinfo (unsigned long int req_modid)
>  		 dtv entry free it.  */
>  	      /* XXX Ideally we will at some point create a memory
>  		 pool.  */
> -	      if (! dtv[modid].pointer.is_static
> -		  && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED)
> -		/* Note that free is called for NULL is well.  We
> -		   deallocate even if it is this dtv entry we are
> -		   supposed to load.  The reason is that we call
> -		   memalign and not malloc.  */
> -		free (dtv[modid].pointer.val);
> -
> +	      free (dtv[modid].pointer.to_free);
>  	      dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
> -	      dtv[modid].pointer.is_static = false;
> +	      dtv[modid].pointer.to_free = NULL;

OK.

>  
>  	      if (modid == req_modid)
>  		the_map = map;
> @@ -780,7 +796,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
>  #endif
>  	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
>  
> -	  dtv[GET_ADDR_MODULE].pointer.is_static = true;
> +	  dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
>  	  dtv[GET_ADDR_MODULE].pointer.val = p;
>  
>  	  return (char *) p + GET_ADDR_OFFSET;
> @@ -788,10 +804,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
>        else
>  	__rtld_lock_unlock_recursive (GL(dl_load_lock));
>      }
> -  void *p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map);
> -  assert (!dtv[GET_ADDR_MODULE].pointer.is_static);
> +  struct dtv_pointer result = allocate_and_init (the_map);
> +  dtv[GET_ADDR_MODULE].pointer = result;
> +  assert (result.to_free != NULL);
>  
> -  return (char *) p + GET_ADDR_OFFSET;
> +  return (char *) result.val + GET_ADDR_OFFSET;
>  }
>  
>  
> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
> index 6b42b11..60b34dc 100644
> --- a/nptl/allocatestack.c
> +++ b/nptl/allocatestack.c
> @@ -245,9 +245,7 @@ get_cached_stack (size_t *sizep, void **memp)
>    /* Clear the DTV.  */
>    dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
>    for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
> -    if (! dtv[1 + cnt].pointer.is_static
> -	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
> -      free (dtv[1 + cnt].pointer.val);
> +    free (dtv[1 + cnt].pointer.to_free);

OK.

>    memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
>  
>    /* Re-initialize the TLS.  */
> diff --git a/sysdeps/generic/dl-dtv.h b/sysdeps/generic/dl-dtv.h
> index 36c5c58..39d8fe2 100644
> --- a/sysdeps/generic/dl-dtv.h
> +++ b/sysdeps/generic/dl-dtv.h
> @@ -19,15 +19,17 @@
>  #ifndef _DL_DTV_H
>  #define _DL_DTV_H
>  
> +struct dtv_pointer
> +{
> +  void *val;                    /* Pointer to data, or TLS_DTV_UNALLOCATED.  */
> +  void *to_free;                /* Unaligned pointer, for deallocation.  */
> +};
> +
>  /* Type for the dtv.  */
>  typedef union dtv
>  {
>    size_t counter;
> -  struct
> -  {
> -    void *val;
> -    bool is_static;
> -  } pointer;
> +  struct dtv_pointer pointer;

OK.

>  } dtv_t;
>  
>  /* Value used for dtv entries for which the allocation is delayed.  */
> 

Cheers,
Carlos.
  
Florian Weimer Aug. 3, 2016, 2:52 p.m. UTC | #2
On 07/11/2016 04:54 AM, Carlos O'Donell wrote:
> On 06/21/2016 07:33 AM, Florian Weimer wrote:
>> > Instead of a flag which indicates the pointer can be freed, dtv_t
>> > now includes the pointer which should be freed.  Due to padding,
>> > the size of dtv_t does not increase.
>> >
>> > To avoid using memalign, the new allocate_dtv_entry function
>> > allocates a sufficiently large buffer so that a sub-buffer
>> > can be found in it which starts with an aligned pointer.  Both
>> > the aligned and original pointers are kept, the latter for calling
>> > free later.
> This looks good to me.

Committed as well.

Thanks,
Florian
  

Patch

diff --git a/csu/libc-tls.c b/csu/libc-tls.c
index d6425e0..235ac79 100644
--- a/csu/libc-tls.c
+++ b/csu/libc-tls.c
@@ -175,7 +175,7 @@  __libc_setup_tls (size_t tcbsize, size_t tcbalign)
 #else
 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 #endif
-  _dl_static_dtv[2].pointer.is_static = true;
+  _dl_static_dtv[2].pointer.to_free = NULL;
   /* sbrk gives us zero'd memory, so we don't need to clear the remainder.  */
   memcpy (_dl_static_dtv[2].pointer.val, initimage, filesz);
 
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index ed13fd9..be6a3c7 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -494,7 +494,7 @@  _dl_allocate_tls_init (void *result)
 	  maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
 
 	  dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
-	  dtv[map->l_tls_modid].pointer.is_static = false;
+	  dtv[map->l_tls_modid].pointer.to_free = NULL;
 
 	  if (map->l_tls_offset == NO_TLS_OFFSET
 	      || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
@@ -551,9 +551,7 @@  _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
 
   /* We need to free the memory allocated for non-static TLS.  */
   for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
-    if (! dtv[1 + cnt].pointer.is_static
-	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
-      free (dtv[1 + cnt].pointer.val);
+    free (dtv[1 + cnt].pointer.to_free);
 
   /* The array starts with dtv[-1].  */
   if (dtv != GL(dl_initial_dtv))
@@ -594,21 +592,49 @@  rtld_hidden_def (_dl_deallocate_tls)
 #  define GET_ADDR_OFFSET ti->ti_offset
 # endif
 
+/* Allocate one DTV entry.  */
+static struct dtv_pointer
+allocate_dtv_entry (size_t alignment, size_t size)
+{
+  if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
+    {
+      /* The alignment is supported by malloc.  */
+      void *ptr = malloc (size);
+      return (struct dtv_pointer) { ptr, ptr };
+    }
 
-static void *
+  /* Emulate memalign to by manually aligning a pointer returned by
+     malloc.  First compute the size with an overflow check.  */
+  size_t alloc_size = size + alignment;
+  if (alloc_size < size)
+    return (struct dtv_pointer) {};
+
+  /* Perform the allocation.  This is the pointer we need to free
+     later.  */
+  void *start = malloc (alloc_size);
+  if (start == NULL)
+    return (struct dtv_pointer) {};
+
+  /* Find the aligned position within the larger allocation.  */
+  void *aligned = (void *) roundup ((uintptr_t) start, alignment);
+
+  return (struct dtv_pointer) { .val = aligned, .to_free = start };
+}
+
+static struct dtv_pointer
 allocate_and_init (struct link_map *map)
 {
-  void *newp;
-
-  newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
-  if (newp == NULL)
+  struct dtv_pointer result = allocate_dtv_entry
+    (map->l_tls_align, map->l_tls_blocksize);
+  if (result.val == NULL)
     oom ();
 
   /* Initialize the memory.  */
-  memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
+  memset (__mempcpy (result.val, map->l_tls_initimage,
+		     map->l_tls_initimage_size),
 	  '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 
-  return newp;
+  return result;
 }
 
 
@@ -678,12 +704,9 @@  _dl_update_slotinfo (unsigned long int req_modid)
 		    {
 		      /* If this modid was used at some point the memory
 			 might still be allocated.  */
-		      if (! dtv[total + cnt].pointer.is_static
-			  && (dtv[total + cnt].pointer.val
-			      != TLS_DTV_UNALLOCATED))
-			free (dtv[total + cnt].pointer.val);
+		      free (dtv[total + cnt].pointer.to_free);
 		      dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
-		      dtv[total + cnt].pointer.is_static = false;
+		      dtv[total + cnt].pointer.to_free = NULL;
 		    }
 
 		  continue;
@@ -708,16 +731,9 @@  _dl_update_slotinfo (unsigned long int req_modid)
 		 dtv entry free it.  */
 	      /* XXX Ideally we will at some point create a memory
 		 pool.  */
-	      if (! dtv[modid].pointer.is_static
-		  && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED)
-		/* Note that free is called for NULL is well.  We
-		   deallocate even if it is this dtv entry we are
-		   supposed to load.  The reason is that we call
-		   memalign and not malloc.  */
-		free (dtv[modid].pointer.val);
-
+	      free (dtv[modid].pointer.to_free);
 	      dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
-	      dtv[modid].pointer.is_static = false;
+	      dtv[modid].pointer.to_free = NULL;
 
 	      if (modid == req_modid)
 		the_map = map;
@@ -780,7 +796,7 @@  tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
 #endif
 	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
 
-	  dtv[GET_ADDR_MODULE].pointer.is_static = true;
+	  dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
 	  dtv[GET_ADDR_MODULE].pointer.val = p;
 
 	  return (char *) p + GET_ADDR_OFFSET;
@@ -788,10 +804,11 @@  tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
       else
 	__rtld_lock_unlock_recursive (GL(dl_load_lock));
     }
-  void *p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map);
-  assert (!dtv[GET_ADDR_MODULE].pointer.is_static);
+  struct dtv_pointer result = allocate_and_init (the_map);
+  dtv[GET_ADDR_MODULE].pointer = result;
+  assert (result.to_free != NULL);
 
-  return (char *) p + GET_ADDR_OFFSET;
+  return (char *) result.val + GET_ADDR_OFFSET;
 }
 
 
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 6b42b11..60b34dc 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -245,9 +245,7 @@  get_cached_stack (size_t *sizep, void **memp)
   /* Clear the DTV.  */
   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
   for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
-    if (! dtv[1 + cnt].pointer.is_static
-	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
-      free (dtv[1 + cnt].pointer.val);
+    free (dtv[1 + cnt].pointer.to_free);
   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
 
   /* Re-initialize the TLS.  */
diff --git a/sysdeps/generic/dl-dtv.h b/sysdeps/generic/dl-dtv.h
index 36c5c58..39d8fe2 100644
--- a/sysdeps/generic/dl-dtv.h
+++ b/sysdeps/generic/dl-dtv.h
@@ -19,15 +19,17 @@ 
 #ifndef _DL_DTV_H
 #define _DL_DTV_H
 
+struct dtv_pointer
+{
+  void *val;                    /* Pointer to data, or TLS_DTV_UNALLOCATED.  */
+  void *to_free;                /* Unaligned pointer, for deallocation.  */
+};
+
 /* Type for the dtv.  */
 typedef union dtv
 {
   size_t counter;
-  struct
-  {
-    void *val;
-    bool is_static;
-  } pointer;
+  struct dtv_pointer pointer;
 } dtv_t;
 
 /* Value used for dtv entries for which the allocation is delayed.  */