elf: Avoid using memalign for TLS allocations [BZ #17730]
Commit Message
Instead of a flag which indicates the pointer can be freed, dtv_t
now includes the pointer which should be freed. Due to padding,
the size of dtv_t does not increase.
To avoid using memalign, the new allocate_dtv_entry function
allocates a sufficiently large buffer so that a sub-buffer
can be found in it which starts with an aligned pointer. Both
the aligned and original pointers are kept, the latter for calling
free later.
2016-06-20 Florian Weimer <fweimer@redhat.com>
[BZ #17730]
Avoid using memalign for TLS allocations.
* sysdeps/generic/dl-dtv.h (struct dtv_pointer): New. Replaces
is_static member with to_free member.
(union dtv): Use struct dtv_pointer.
* csu/libc-tls.c (__libc_setup_tls): Set to_free member of struct
dtv_pointer instead of is_static.
* elf/dl-tls.c (_dl_allocate_tls_init): Likewise.
(_dl_deallocate_tls): Free to_free member of struct dtv_pointer
instead of val.
(allocate_dtv_entry): New function.
(allocate_and_init): Return struct dtv_pointer. Call
allocate_dtv_entry instead of __libc_memalign.
(_dl_update_slotinfo): Free to_free member of struct dtv_pointer
instead of val.
(tls_get_addr_tail): Set to_free member of struct dtv_pointer
instead of is_static. Adjust call to allocate_and_init.
* nptl/allocatestack.c (get_cached_stack): Free to_free member of
struct dtv_pointer instead of val.
Comments
On 06/21/2016 07:33 AM, Florian Weimer wrote:
> Instead of a flag which indicates the pointer can be freed, dtv_t
> now includes the pointer which should be freed. Due to padding,
> the size of dtv_t does not increase.
>
> To avoid using memalign, the new allocate_dtv_entry function
> allocates a sufficiently large buffer so that a sub-buffer
> can be found in it which starts with an aligned pointer. Both
> the aligned and original pointers are kept, the latter for calling
> free later.
This looks good to me.
I like the logical simplication when going from is_static and the
flag value to just the value and the pointer to free.
OK to checkin.
> 2016-06-20 Florian Weimer <fweimer@redhat.com>
>
> [BZ #17730]
> Avoid using memalign for TLS allocations.
> * sysdeps/generic/dl-dtv.h (struct dtv_pointer): New. Replaces
> is_static member with to_free member.
> (union dtv): Use struct dtv_pointer.
> * csu/libc-tls.c (__libc_setup_tls): Set to_free member of struct
> dtv_pointer instead of is_static.
> * elf/dl-tls.c (_dl_allocate_tls_init): Likewise.
> (_dl_deallocate_tls): Free to_free member of struct dtv_pointer
> instead of val.
> (allocate_dtv_entry): New function.
> (allocate_and_init): Return struct dtv_pointer. Call
> allocate_dtv_entry instead of __libc_memalign.
> (_dl_update_slotinfo): Free to_free member of struct dtv_pointer
> instead of val.
> (tls_get_addr_tail): Set to_free member of struct dtv_pointer
> instead of is_static. Adjust call to allocate_and_init.
> * nptl/allocatestack.c (get_cached_stack): Free to_free member of
> struct dtv_pointer instead of val.
>
> diff --git a/csu/libc-tls.c b/csu/libc-tls.c
> index d6425e0..235ac79 100644
> --- a/csu/libc-tls.c
> +++ b/csu/libc-tls.c
> @@ -175,7 +175,7 @@ __libc_setup_tls (size_t tcbsize, size_t tcbalign)
> #else
> # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
> #endif
> - _dl_static_dtv[2].pointer.is_static = true;
> + _dl_static_dtv[2].pointer.to_free = NULL;
OK.
> /* sbrk gives us zero'd memory, so we don't need to clear the remainder. */
> memcpy (_dl_static_dtv[2].pointer.val, initimage, filesz);
>
> diff --git a/elf/dl-tls.c b/elf/dl-tls.c
> index ed13fd9..be6a3c7 100644
> --- a/elf/dl-tls.c
> +++ b/elf/dl-tls.c
> @@ -494,7 +494,7 @@ _dl_allocate_tls_init (void *result)
> maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
>
> dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
> - dtv[map->l_tls_modid].pointer.is_static = false;
> + dtv[map->l_tls_modid].pointer.to_free = NULL;
OK.
>
> if (map->l_tls_offset == NO_TLS_OFFSET
> || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
> @@ -551,9 +551,7 @@ _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
>
> /* We need to free the memory allocated for non-static TLS. */
> for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
> - if (! dtv[1 + cnt].pointer.is_static
> - && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
> - free (dtv[1 + cnt].pointer.val);
> + free (dtv[1 + cnt].pointer.to_free);
OK.
>
> /* The array starts with dtv[-1]. */
> if (dtv != GL(dl_initial_dtv))
> @@ -594,21 +592,49 @@ rtld_hidden_def (_dl_deallocate_tls)
> # define GET_ADDR_OFFSET ti->ti_offset
> # endif
>
> +/* Allocate one DTV entry. */
> +static struct dtv_pointer
> +allocate_dtv_entry (size_t alignment, size_t size)
> +{
> + if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
> + {
> + /* The alignment is supported by malloc. */
> + void *ptr = malloc (size);
> + return (struct dtv_pointer) { ptr, ptr };
> + }
>
> -static void *
> + /* Emulate memalign to by manually aligning a pointer returned by
> + malloc. First compute the size with an overflow check. */
> + size_t alloc_size = size + alignment;
> + if (alloc_size < size)
> + return (struct dtv_pointer) {};
> +
> + /* Perform the allocation. This is the pointer we need to free
> + later. */
> + void *start = malloc (alloc_size);
> + if (start == NULL)
> + return (struct dtv_pointer) {};
> +
> + /* Find the aligned position within the larger allocation. */
> + void *aligned = (void *) roundup ((uintptr_t) start, alignment);
> +
> + return (struct dtv_pointer) { .val = aligned, .to_free = start };
> +}
OK.
> +
> +static struct dtv_pointer
> allocate_and_init (struct link_map *map)
> {
> - void *newp;
> -
> - newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
> - if (newp == NULL)
> + struct dtv_pointer result = allocate_dtv_entry
> + (map->l_tls_align, map->l_tls_blocksize);
> + if (result.val == NULL)
> oom ();
>
> /* Initialize the memory. */
> - memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
> + memset (__mempcpy (result.val, map->l_tls_initimage,
> + map->l_tls_initimage_size),
> '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
>
> - return newp;
> + return result;
> }
>
>
> @@ -678,12 +704,9 @@ _dl_update_slotinfo (unsigned long int req_modid)
> {
> /* If this modid was used at some point the memory
> might still be allocated. */
> - if (! dtv[total + cnt].pointer.is_static
> - && (dtv[total + cnt].pointer.val
> - != TLS_DTV_UNALLOCATED))
> - free (dtv[total + cnt].pointer.val);
> + free (dtv[total + cnt].pointer.to_free);
> dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
> - dtv[total + cnt].pointer.is_static = false;
> + dtv[total + cnt].pointer.to_free = NULL;
> }
>
> continue;
> @@ -708,16 +731,9 @@ _dl_update_slotinfo (unsigned long int req_modid)
> dtv entry free it. */
> /* XXX Ideally we will at some point create a memory
> pool. */
> - if (! dtv[modid].pointer.is_static
> - && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED)
> - /* Note that free is called for NULL is well. We
> - deallocate even if it is this dtv entry we are
> - supposed to load. The reason is that we call
> - memalign and not malloc. */
> - free (dtv[modid].pointer.val);
> -
> + free (dtv[modid].pointer.to_free);
> dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
> - dtv[modid].pointer.is_static = false;
> + dtv[modid].pointer.to_free = NULL;
OK.
>
> if (modid == req_modid)
> the_map = map;
> @@ -780,7 +796,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
> #endif
> __rtld_lock_unlock_recursive (GL(dl_load_lock));
>
> - dtv[GET_ADDR_MODULE].pointer.is_static = true;
> + dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
> dtv[GET_ADDR_MODULE].pointer.val = p;
>
> return (char *) p + GET_ADDR_OFFSET;
> @@ -788,10 +804,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
> else
> __rtld_lock_unlock_recursive (GL(dl_load_lock));
> }
> - void *p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map);
> - assert (!dtv[GET_ADDR_MODULE].pointer.is_static);
> + struct dtv_pointer result = allocate_and_init (the_map);
> + dtv[GET_ADDR_MODULE].pointer = result;
> + assert (result.to_free != NULL);
>
> - return (char *) p + GET_ADDR_OFFSET;
> + return (char *) result.val + GET_ADDR_OFFSET;
> }
>
>
> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
> index 6b42b11..60b34dc 100644
> --- a/nptl/allocatestack.c
> +++ b/nptl/allocatestack.c
> @@ -245,9 +245,7 @@ get_cached_stack (size_t *sizep, void **memp)
> /* Clear the DTV. */
> dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
> for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
> - if (! dtv[1 + cnt].pointer.is_static
> - && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
> - free (dtv[1 + cnt].pointer.val);
> + free (dtv[1 + cnt].pointer.to_free);
OK.
> memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
>
> /* Re-initialize the TLS. */
> diff --git a/sysdeps/generic/dl-dtv.h b/sysdeps/generic/dl-dtv.h
> index 36c5c58..39d8fe2 100644
> --- a/sysdeps/generic/dl-dtv.h
> +++ b/sysdeps/generic/dl-dtv.h
> @@ -19,15 +19,17 @@
> #ifndef _DL_DTV_H
> #define _DL_DTV_H
>
> +struct dtv_pointer
> +{
> + void *val; /* Pointer to data, or TLS_DTV_UNALLOCATED. */
> + void *to_free; /* Unaligned pointer, for deallocation. */
> +};
> +
> /* Type for the dtv. */
> typedef union dtv
> {
> size_t counter;
> - struct
> - {
> - void *val;
> - bool is_static;
> - } pointer;
> + struct dtv_pointer pointer;
OK.
> } dtv_t;
>
> /* Value used for dtv entries for which the allocation is delayed. */
>
Cheers,
Carlos.
On 07/11/2016 04:54 AM, Carlos O'Donell wrote:
> On 06/21/2016 07:33 AM, Florian Weimer wrote:
>> > Instead of a flag which indicates the pointer can be freed, dtv_t
>> > now includes the pointer which should be freed. Due to padding,
>> > the size of dtv_t does not increase.
>> >
>> > To avoid using memalign, the new allocate_dtv_entry function
>> > allocates a sufficiently large buffer so that a sub-buffer
>> > can be found in it which starts with an aligned pointer. Both
>> > the aligned and original pointers are kept, the latter for calling
>> > free later.
> This looks good to me.
Committed as well.
Thanks,
Florian
@@ -175,7 +175,7 @@ __libc_setup_tls (size_t tcbsize, size_t tcbalign)
#else
# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
#endif
- _dl_static_dtv[2].pointer.is_static = true;
+ _dl_static_dtv[2].pointer.to_free = NULL;
/* sbrk gives us zero'd memory, so we don't need to clear the remainder. */
memcpy (_dl_static_dtv[2].pointer.val, initimage, filesz);
@@ -494,7 +494,7 @@ _dl_allocate_tls_init (void *result)
maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
- dtv[map->l_tls_modid].pointer.is_static = false;
+ dtv[map->l_tls_modid].pointer.to_free = NULL;
if (map->l_tls_offset == NO_TLS_OFFSET
|| map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
@@ -551,9 +551,7 @@ _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
/* We need to free the memory allocated for non-static TLS. */
for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
- if (! dtv[1 + cnt].pointer.is_static
- && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
- free (dtv[1 + cnt].pointer.val);
+ free (dtv[1 + cnt].pointer.to_free);
/* The array starts with dtv[-1]. */
if (dtv != GL(dl_initial_dtv))
@@ -594,21 +592,49 @@ rtld_hidden_def (_dl_deallocate_tls)
# define GET_ADDR_OFFSET ti->ti_offset
# endif
+/* Allocate one DTV entry. */
+static struct dtv_pointer
+allocate_dtv_entry (size_t alignment, size_t size)
+{
+ if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
+ {
+ /* The alignment is supported by malloc. */
+ void *ptr = malloc (size);
+ return (struct dtv_pointer) { ptr, ptr };
+ }
-static void *
+ /* Emulate memalign to by manually aligning a pointer returned by
+ malloc. First compute the size with an overflow check. */
+ size_t alloc_size = size + alignment;
+ if (alloc_size < size)
+ return (struct dtv_pointer) {};
+
+ /* Perform the allocation. This is the pointer we need to free
+ later. */
+ void *start = malloc (alloc_size);
+ if (start == NULL)
+ return (struct dtv_pointer) {};
+
+ /* Find the aligned position within the larger allocation. */
+ void *aligned = (void *) roundup ((uintptr_t) start, alignment);
+
+ return (struct dtv_pointer) { .val = aligned, .to_free = start };
+}
+
+static struct dtv_pointer
allocate_and_init (struct link_map *map)
{
- void *newp;
-
- newp = __libc_memalign (map->l_tls_align, map->l_tls_blocksize);
- if (newp == NULL)
+ struct dtv_pointer result = allocate_dtv_entry
+ (map->l_tls_align, map->l_tls_blocksize);
+ if (result.val == NULL)
oom ();
/* Initialize the memory. */
- memset (__mempcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size),
+ memset (__mempcpy (result.val, map->l_tls_initimage,
+ map->l_tls_initimage_size),
'\0', map->l_tls_blocksize - map->l_tls_initimage_size);
- return newp;
+ return result;
}
@@ -678,12 +704,9 @@ _dl_update_slotinfo (unsigned long int req_modid)
{
/* If this modid was used at some point the memory
might still be allocated. */
- if (! dtv[total + cnt].pointer.is_static
- && (dtv[total + cnt].pointer.val
- != TLS_DTV_UNALLOCATED))
- free (dtv[total + cnt].pointer.val);
+ free (dtv[total + cnt].pointer.to_free);
dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
- dtv[total + cnt].pointer.is_static = false;
+ dtv[total + cnt].pointer.to_free = NULL;
}
continue;
@@ -708,16 +731,9 @@ _dl_update_slotinfo (unsigned long int req_modid)
dtv entry free it. */
/* XXX Ideally we will at some point create a memory
pool. */
- if (! dtv[modid].pointer.is_static
- && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED)
- /* Note that free is called for NULL is well. We
- deallocate even if it is this dtv entry we are
- supposed to load. The reason is that we call
- memalign and not malloc. */
- free (dtv[modid].pointer.val);
-
+ free (dtv[modid].pointer.to_free);
dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
- dtv[modid].pointer.is_static = false;
+ dtv[modid].pointer.to_free = NULL;
if (modid == req_modid)
the_map = map;
@@ -780,7 +796,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
#endif
__rtld_lock_unlock_recursive (GL(dl_load_lock));
- dtv[GET_ADDR_MODULE].pointer.is_static = true;
+ dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
dtv[GET_ADDR_MODULE].pointer.val = p;
return (char *) p + GET_ADDR_OFFSET;
@@ -788,10 +804,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
else
__rtld_lock_unlock_recursive (GL(dl_load_lock));
}
- void *p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map);
- assert (!dtv[GET_ADDR_MODULE].pointer.is_static);
+ struct dtv_pointer result = allocate_and_init (the_map);
+ dtv[GET_ADDR_MODULE].pointer = result;
+ assert (result.to_free != NULL);
- return (char *) p + GET_ADDR_OFFSET;
+ return (char *) result.val + GET_ADDR_OFFSET;
}
@@ -245,9 +245,7 @@ get_cached_stack (size_t *sizep, void **memp)
/* Clear the DTV. */
dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
- if (! dtv[1 + cnt].pointer.is_static
- && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
- free (dtv[1 + cnt].pointer.val);
+ free (dtv[1 + cnt].pointer.to_free);
memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
/* Re-initialize the TLS. */
@@ -19,15 +19,17 @@
#ifndef _DL_DTV_H
#define _DL_DTV_H
+struct dtv_pointer
+{
+ void *val; /* Pointer to data, or TLS_DTV_UNALLOCATED. */
+ void *to_free; /* Unaligned pointer, for deallocation. */
+};
+
/* Type for the dtv. */
typedef union dtv
{
size_t counter;
- struct
- {
- void *val;
- bool is_static;
- } pointer;
+ struct dtv_pointer pointer;
} dtv_t;
/* Value used for dtv entries for which the allocation is delayed. */