@@ -292,13 +292,19 @@
#if USE_TCACHE
/* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */
# define TCACHE_MAX_BINS 64
+/* Number of bins beyond the TCACHE_MAX_BINS reserved for ranges of chunksizes
+ to allow tcaches to cache chunks beyond ~1k size. */
+# define TCACHE_UNBOUND_SIZE_BINS 10
+# define TCACHE_ALL_BINS (TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS)
# define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)
+# define TCACHE_FIXED_SIZE_BINS \
+ (mp_.tcache_bins < TCACHE_MAX_BINS ? mp_.tcache_bins : TCACHE_MAX_BINS)
/* Only used to pre-fill the tunables. */
-# define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)
+# define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - MALLOC_ALIGNMENT + 1)
-/* When "x" is from chunksize(). */
-# define csize2tidx(x) (((x) - MINSIZE) / MALLOC_ALIGNMENT)
+/* When "x" is from chunksize(). Only usable for single sized tcache bins. */
+# define fast_csize2tidx(x) (((x) - MINSIZE) / MALLOC_ALIGNMENT)
/* When "x" is a user-provided size. */
# define usize2tidx(x) csize2tidx (request2size (x))
@@ -1926,7 +1932,7 @@ static struct malloc_par mp_ =
,
.tcache_count = TCACHE_FILL_COUNT,
.tcache_bins = TCACHE_MAX_BINS,
- .tcache_max_bytes = tidx2usize (TCACHE_MAX_BINS-1),
+ .tcache_max_bytes = MAX_TCACHE_SIZE,
.tcache_unsorted_limit = 0 /* No limit. */
#endif
};
@@ -3119,8 +3125,8 @@ typedef struct tcache_entry
time), this is for performance reasons. */
typedef struct tcache_perthread_struct
{
- uint16_t counts[TCACHE_MAX_BINS];
- tcache_entry *entries[TCACHE_MAX_BINS];
+ uint16_t counts[TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS];
+ tcache_entry *entries[TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS];
} tcache_perthread_struct;
static __thread bool tcache_shutting_down = false;
@@ -3153,6 +3159,45 @@ tcache_key_initialize (void)
}
}
+static __always_inline size_t
+csize2tidx(size_t nb)
+{
+ if (__glibc_likely (nb < tidx2usize (TCACHE_MAX_BINS)))
+ return fast_csize2tidx(nb);
+ else if (__glibc_likely (mp_.tcache_bins < TCACHE_MAX_BINS))
+ return TCACHE_MAX_BINS;
+ else
+ {
+ size_t idx = TCACHE_MAX_BINS
+ + __builtin_clz (tidx2usize (TCACHE_MAX_BINS-1))
+ - __builtin_clz (nb);
+ return idx < TCACHE_ALL_BINS ? idx : TCACHE_ALL_BINS - 1;
+ }
+}
+
+/* Returns the position in the tcache bin where the specified size is first found in.
+ This function is used by both tcache_get and tcache_put to locate the proper
+ position in the tcache bin linked-list. */
+static __always_inline tcache_entry **
+tcache_location_for_size (size_t nb, size_t tc_idx)
+{
+ if (__glibc_likely (nb <= tidx2usize (TCACHE_MAX_BINS-1)))
+ return &(tcache->entries[tc_idx]);
+ else
+ {
+ tcache_entry **tep = &(tcache->entries[tc_idx]);
+ tcache_entry *te = REVEAL_PTR (*tep);
+ while (te != NULL
+ && __glibc_unlikely (chunksize (mem2chunk (te)) < nb))
+ {
+ tep = & (te->next);
+ te = REVEAL_PTR (te->next);
+ }
+
+ return tep;
+ }
+}
+
/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void
@@ -3164,8 +3209,9 @@ tcache_put (mchunkptr chunk, size_t tc_idx)
detect a double free. */
e->key = tcache_key;
- e->next = PROTECT_PTR (&e->next, REVEAL_PTR (tcache->entries[tc_idx]));
- tcache->entries[tc_idx] = PROTECT_PTR (&(tcache->entries[tc_idx]), e);
+ tcache_entry **entry = tcache_location_for_size (chunksize (chunk), tc_idx);
+ e->next = PROTECT_PTR (&e->next, REVEAL_PTR (*entry));
+ *entry = PROTECT_PTR (entry, e);
++(tcache->counts[tc_idx]);
}
@@ -3173,7 +3219,7 @@ tcache_put (mchunkptr chunk, size_t tc_idx)
available chunks to remove. Removes chunk from the middle of the
list. */
static __always_inline void *
-tcache_get_n (size_t tc_idx, tcache_entry **ep)
+tcache_remove_entry (size_t tc_idx, tcache_entry **ep)
{
tcache_entry *e = REVEAL_PTR (*ep);
@@ -3187,18 +3233,18 @@ tcache_get_n (size_t tc_idx, tcache_entry **ep)
return (void *) e;
}
-/* Like the above, but removes from the head of the list. */
+/* Like the above, but traverses the list to find proper location to get the
+ entry within the respective tcache bin. */
static __always_inline void *
-tcache_get (size_t tc_idx)
+tcache_get (size_t nb, size_t tc_idx)
{
- return tcache_get_n (tc_idx, & tcache->entries[tc_idx]);
-}
+ tcache_entry **entry = tcache_location_for_size (nb, tc_idx);
+ tcache_entry *e = REVEAL_PTR (*entry);
+ if (__glibc_unlikely (tc_idx >= TCACHE_MAX_BINS)
+ && (e == NULL || chunksize (mem2chunk (e)) != nb))
+ return NULL;
-/* Iterates through the tcache linked list. */
-static __always_inline tcache_entry *
-tcache_next (tcache_entry *e)
-{
- return (tcache_entry *) REVEAL_PTR (e->next);
+ return tcache_remove_entry (tc_idx, entry);
}
/* Check if tcache is available for alloc by corresponding tc_idx. */
@@ -3281,7 +3327,7 @@ tcache_thread_shutdown (void)
/* Free all of the entries and the tcache itself back to the arena
heap for coalescing. */
- for (i = 0; i < TCACHE_MAX_BINS; ++i)
+ for (i = 0; i < TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS; ++i)
{
while (REVEAL_PTR (tcache_tmp->entries[i]))
{
@@ -3330,7 +3376,7 @@ tcache_init(void)
memset (tcache, 0, sizeof (tcache_perthread_struct));
}
- for (int i = 0; i < mp_.tcache_bins; i++)
+ for (int i = 0; i < TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS; i++)
tcache->entries[i] = PROTECT_PTR (&(tcache->entries[i]), NULL);
}
@@ -3357,7 +3403,7 @@ tcache_try_malloc (size_t bytes, void **memptr)
MAYBE_INIT_TCACHE ();
if (tcache_available (tc_idx))
- *memptr = tcache_get (tc_idx);
+ *memptr = tcache_get (tbytes, tc_idx);
else
*memptr = NULL;
@@ -3692,20 +3738,28 @@ _mid_memalign (size_t alignment, size_t bytes, void *address)
if (tcache_available (tc_idx))
{
- /* The tcache itself isn't encoded, but the chain is. */
- tcache_entry **tep = & tcache->entries[tc_idx];
+ tcache_entry **tep = tcache_location_for_size (tbytes, tc_idx);
tcache_entry *te = REVEAL_PTR (*tep);
+
+ /* Make sure returned chunk is of expected size. */
+ if (te == NULL
+ || (tc_idx >= TCACHE_MAX_BINS
+ && chunksize (mem2chunk (te)) != tbytes))
+ goto tcache_memalign_abort;
+
+ /* The tcache itself isn't encoded, but the chain is. */
while (te != NULL && !PTR_IS_ALIGNED (te, alignment))
{
tep = & (te->next);
- te = tcache_next (te);
+ te = REVEAL_PTR (te->next);
}
if (te != NULL)
{
- void *victim = tcache_get_n (tc_idx, tep);
+ void *victim = tcache_remove_entry (tc_idx, tep);
return tag_new_usable (victim);
}
}
+tcache_memalign_abort:
}
#endif
@@ -3992,8 +4046,8 @@ _int_malloc (mstate av, size_t bytes)
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
- size_t tc_idx = csize2tidx (nb);
- if (tcache != NULL && tc_idx < mp_.tcache_bins)
+ size_t tc_idx = fast_csize2tidx (nb);
+ if (tcache != NULL && tc_idx < TCACHE_FIXED_SIZE_BINS)
{
mchunkptr tc_victim;
@@ -4053,8 +4107,8 @@ _int_malloc (mstate av, size_t bytes)
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
- size_t tc_idx = csize2tidx (nb);
- if (tcache != NULL && tc_idx < mp_.tcache_bins)
+ size_t tc_idx = fast_csize2tidx (nb);
+ if (__glibc_likely (tcache != NULL) && tc_idx < TCACHE_FIXED_SIZE_BINS)
{
mchunkptr tc_victim;
@@ -4115,8 +4169,8 @@ _int_malloc (mstate av, size_t bytes)
#if USE_TCACHE
INTERNAL_SIZE_T tcache_nb = 0;
- size_t tc_idx = csize2tidx (nb);
- if (tcache != NULL && tc_idx < mp_.tcache_bins)
+ size_t tc_idx = fast_csize2tidx (nb);
+ if (tcache != NULL && tc_idx < TCACHE_FIXED_SIZE_BINS)
tcache_nb = nb;
int return_cached = 0;
@@ -4294,7 +4348,7 @@ _int_malloc (mstate av, size_t bytes)
&& mp_.tcache_unsorted_limit > 0
&& tcache_unsorted_count > mp_.tcache_unsorted_limit)
{
- return tcache_get (tc_idx);
+ return tcache_get (nb, tc_idx);
}
#endif
@@ -4307,7 +4361,7 @@ _int_malloc (mstate av, size_t bytes)
/* If all the small chunks we found ended up cached, return one now. */
if (return_cached)
{
- return tcache_get (tc_idx);
+ return tcache_get (nb, tc_idx);
}
#endif
@@ -5553,14 +5607,12 @@ do_set_arena_max (size_t value)
static __always_inline int
do_set_tcache_max (size_t value)
{
- if (value <= MAX_TCACHE_SIZE)
- {
- LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
- mp_.tcache_max_bytes = value;
- mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
- return 1;
- }
- return 0;
+ LIBC_PROBE (memory_tunable_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+ mp_.tcache_max_bytes = value;
+ mp_.tcache_bins = TCACHE_MAX_BINS + TCACHE_UNBOUND_SIZE_BINS;
+ mp_.tcache_bins = csize2tidx (request2size(value)) + 1;
+
+ return 1;
}
static __always_inline int