@@ -77,6 +77,8 @@ multi-arch = @multi_arch@
mach-interface-list = @mach_interface_list@
+experimental-malloc = @experimental_malloc@
+
nss-crypt = @libc_cv_nss_crypt@
static-nss-crypt = @libc_cv_static_nss_crypt@
@@ -313,6 +313,13 @@ AC_ARG_ENABLE([multi-arch],
[multi_arch=$enableval],
[multi_arch=default])
+AC_ARG_ENABLE([experimental-malloc],
+ AC_HELP_STRING([--disable-experimental-malloc],
+ [disable experimental malloc features]),
+ [experimental_malloc=$enableval],
+ [experimental_malloc=yes])
+AC_SUBST(experimental_malloc)
+
AC_ARG_ENABLE([nss-crypt],
AC_HELP_STRING([--enable-nss-crypt],
[enable libcrypt to use nss]),
@@ -64,5 +64,17 @@ glibc {
env_alias: MALLOC_ARENA_TEST
minval: 1
}
+ tcache_max {
+ type: SIZE_T
+ env_alias: MALLOC_TCACHE_MAX
+ }
+ tcache_count {
+ type: SIZE_T
+ env_alias: MALLOC_TCACHE_COUNT
+ }
+ tcache_unsorted_limit {
+ type: SIZE_T
+ env_alias: MALLOC_TCACHE_UNSORTED_LIMIT
+ }
}
}
@@ -168,6 +168,9 @@ tst-malloc-usable-static-ENV = $(tst-malloc-usable-ENV)
tst-malloc-usable-tunables-ENV = GLIBC_TUNABLES=glibc.malloc.check=3
tst-malloc-usable-static-tunables-ENV = $(tst-malloc-usable-tunables-ENV)
+ifeq ($(experimental-malloc),yes)
+CPPFLAGS-malloc.c += -DUSE_TCACHE
+endif
# Uncomment this for test releases. For public releases it is too expensive.
#CPPFLAGS-malloc.o += -DMALLOC_DEBUG=1
@@ -236,6 +236,9 @@ DL_TUNABLE_CALLBACK_FNDECL (set_perturb_byte, int32_t)
DL_TUNABLE_CALLBACK_FNDECL (set_trim_threshold, size_t)
DL_TUNABLE_CALLBACK_FNDECL (set_arena_max, size_t)
DL_TUNABLE_CALLBACK_FNDECL (set_arena_test, size_t)
+DL_TUNABLE_CALLBACK_FNDECL (set_tcache_max, size_t)
+DL_TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
+DL_TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
#else
/* Initialization routine. */
#include <string.h>
@@ -322,6 +325,11 @@ ptmalloc_init (void)
TUNABLE_SET_VAL_WITH_CALLBACK (mmap_max, NULL, set_mmaps_max);
TUNABLE_SET_VAL_WITH_CALLBACK (arena_max, NULL, set_arena_max);
TUNABLE_SET_VAL_WITH_CALLBACK (arena_test, NULL, set_arena_test);
+#if USE_TCACHE
+ TUNABLE_SET_VAL_WITH_CALLBACK (tcache_max, NULL, set_tcache_max);
+ TUNABLE_SET_VAL_WITH_CALLBACK (tcache_count, NULL, set_tcache_count);
+ TUNABLE_SET_VAL_WITH_CALLBACK (tcache_unsorted_limit, NULL, set_tcache_unsorted_limit);
+#endif
__libc_lock_unlock (main_arena.mutex);
#else
const char *s = NULL;
@@ -371,7 +379,23 @@ ptmalloc_init (void)
if (memcmp (envline, "ARENA_TEST", 10) == 0)
__libc_mallopt (M_ARENA_TEST, atoi (&envline[11]));
}
+#if USE_TCACHE
+ if (!__builtin_expect (__libc_enable_secure, 0))
+ {
+ if (memcmp (envline, "TCACHE_MAX", 10) == 0)
+ __libc_mallopt (M_TCACHE_MAX, atoi (&envline[11]));
+ }
+#endif
break;
+#if USE_TCACHE
+ case 12:
+ if (!__builtin_expect (__libc_enable_secure, 0))
+ {
+ if (memcmp (envline, "TCACHE_COUNT", 12) == 0)
+ __libc_mallopt (M_TCACHE_COUNT, atoi (&envline[13]));
+ }
+ break;
+#endif
case 15:
if (!__builtin_expect (__libc_enable_secure, 0))
{
@@ -381,6 +405,15 @@ ptmalloc_init (void)
__libc_mallopt (M_MMAP_THRESHOLD, atoi (&envline[16]));
}
break;
+#if USE_TCACHE
+ case 21:
+ if (!__builtin_expect (__libc_enable_secure, 0))
+ {
+ if (memcmp (envline, "TCACHE_UNSORTED_LIMIT", 21) == 0)
+ __libc_mallopt (M_TCACHE_UNSORTED_LIMIT, atoi (&envline[22]));
+ }
+ break;
+#endif
default:
break;
}
@@ -297,6 +297,33 @@ __malloc_assert (const char *assertion, const char *file, unsigned int line,
}
#endif
+#ifndef USE_TCACHE
+# define USE_TCACHE 0
+#endif
+#if USE_TCACHE
+/* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */
+# define MAX_TCACHE_SIZE (MALLOC_ALIGNMENT * 63)
+# define TCACHE_IDX ((MAX_TCACHE_SIZE / MALLOC_ALIGNMENT) + 1)
+# define size2tidx_(bytes) (((bytes) + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
+
+# define tidx2csize(idx) ((idx)*MALLOC_ALIGNMENT + SIZE_SZ)
+# define tidx2usize(idx) ((idx)*MALLOC_ALIGNMENT)
+
+/* When "x" is a user-provided size. */
+# define usize2tidx(x) size2tidx_(x)
+/* When "x" is from chunksize(). */
+# define csize2tidx(x) size2tidx_((x)-SIZE_SZ)
+
+/* Rounds up, so...
+ idx 0 bytes 0
+ idx 1 bytes 1..8
+ idx 2 bytes 9..16
+ etc. */
+
+/* This is another arbitrary limit, which tunables can change. */
+# define TCACHE_FILL_COUNT 7
+#endif
+
/*
REALLOC_ZERO_BYTES_FREES should be set if a call to
@@ -1711,6 +1738,17 @@ struct malloc_par
/* First address handed out by MORECORE/sbrk. */
char *sbrk_base;
+
+#if USE_TCACHE
+ /* Maximum number of buckets to use. */
+ size_t tcache_max;
+ size_t tcache_max_bytes;
+ /* Maximum number of chunks in each bucket. */
+ size_t tcache_count;
+ /* Maximum number of chunks to remove from the unsorted list, which
+ don't match. */
+ size_t tcache_unsorted_limit;
+#endif
};
/* There are several instances of this struct ("arenas") in this
@@ -1749,8 +1787,22 @@ static struct malloc_par mp_ =
.trim_threshold = DEFAULT_TRIM_THRESHOLD,
#define NARENAS_FROM_NCORES(n) ((n) * (sizeof (long) == 4 ? 2 : 8))
.arena_test = NARENAS_FROM_NCORES (1)
+#if USE_TCACHE
+ ,
+ .tcache_count = TCACHE_FILL_COUNT,
+ .tcache_max = TCACHE_IDX,
+ .tcache_max_bytes = tidx2usize (TCACHE_IDX),
+ .tcache_unsorted_limit = 0 /* No limit */
+#endif
};
+/* Non public mallopt parameters. */
+#if USE_TCACHE
+# define M_TCACHE_COUNT -9
+# define M_TCACHE_MAX -10
+# define M_TCACHE_UNSORTED_LIMIT -11
+#endif
+
/* Maximum size of memory handled in fastbins. */
static INTERNAL_SIZE_T global_max_fast;
@@ -2874,6 +2926,38 @@ mremap_chunk (mchunkptr p, size_t new_size)
/*------------------------ Public wrappers. --------------------------------*/
+#if USE_TCACHE
+
+typedef struct TCacheEntry {
+ struct TCacheEntry *next;
+} TCacheEntry;
+
+/* There is one of these for each thread, which contains the
+ per-thread cache (hence "TCache"). Keeping overall size low is
+ mildly important. INITTED makes sure we don't use the cache when
+ we're shutting down the thread. Note that COUNTS and ENTRIES are
+ redundant, this is for performance reasons. */
+typedef struct TCache {
+ /* 0 = uninitted, 1 = normal, anything else = shutting down */
+ char initted;
+ char counts[TCACHE_IDX];
+ TCacheEntry *entries[TCACHE_IDX];
+} TCache;
+
+static __thread TCache tcache = {0,0,0,{0},{0}};
+
+static void __attribute__ ((section ("__libc_thread_freeres_fn")))
+tcache_thread_freeres (void)
+{
+ /* We should flush the cache out to the main arena also, but most of
+ the time we're just exiting the process completely. */
+ if (tcache.initted == 1)
+ tcache.initted = 2;
+}
+text_set_element (__libc_thread_subfreeres, tcache_thread_freeres);
+
+#endif
+
void *
__libc_malloc (size_t bytes)
{
@@ -2884,6 +2968,22 @@ __libc_malloc (size_t bytes)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));
+#if USE_TCACHE
+ /* int_free also calls request2size, be careful to not pad twice. */
+ size_t tbytes = request2size (bytes);
+ size_t tc_idx = csize2tidx (tbytes);
+
+ if (tc_idx < mp_.tcache_max
+ && tc_idx < TCACHE_IDX /* to appease gcc */
+ && tcache.entries[tc_idx] != NULL
+ && tcache.initted == 1)
+ {
+ TCacheEntry *e = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e->next;
+ --tcache.counts[tc_idx];
+ return (void *) e;
+ }
+#endif
arena_get (ar_ptr, bytes);
@@ -3335,6 +3435,10 @@ _int_malloc (mstate av, size_t bytes)
mchunkptr fwd; /* misc temp for linking */
mchunkptr bck; /* misc temp for linking */
+#if USE_TCACHE
+ size_t tcache_unsorted_count; /* count of unsorted chunks processed */
+#endif
+
const char *errstr = NULL;
/*
@@ -3387,6 +3491,38 @@ _int_malloc (mstate av, size_t bytes)
return NULL;
}
check_remalloced_chunk (av, victim, nb);
+#if USE_TCACHE
+ /* While we're here, if we see other chunks of the same size,
+ stash them in the tcache. */
+ size_t tc_idx = csize2tidx (nb);
+ if (tc_idx < mp_.tcache_max)
+ {
+ mchunkptr tc_victim;
+ int found = 0;
+
+ /* While bin not empty and tcache not full, copy chunks over. */
+ while (tcache.counts[tc_idx] < mp_.tcache_count
+ && (pp = *fb) != NULL)
+ {
+ do
+ {
+ tc_victim = pp;
+ if (tc_victim == NULL)
+ break;
+ }
+ while ((pp = catomic_compare_and_exchange_val_acq (fb, tc_victim->fd, tc_victim))
+ != tc_victim);
+ if (tc_victim != 0)
+ {
+ TCacheEntry *e = (TCacheEntry *) chunk2mem (tc_victim);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ ++tcache.counts[tc_idx];
+ ++found;
+ }
+ }
+ }
+#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
@@ -3425,6 +3561,37 @@ _int_malloc (mstate av, size_t bytes)
if (av != &main_arena)
set_non_main_arena (victim);
check_malloced_chunk (av, victim, nb);
+#if USE_TCACHE
+ /* While we're here, if we see other chunks of the same size,
+ stash them in the tcache. */
+ size_t tc_idx = csize2tidx (nb);
+ if (tc_idx < mp_.tcache_max)
+ {
+ mchunkptr tc_victim;
+ int found = 0;
+
+ /* While bin not empty and tcache not full, copy chunks over. */
+ while (tcache.counts[tc_idx] < mp_.tcache_count
+ && (tc_victim = last (bin)) != bin)
+ {
+ if (tc_victim != 0)
+ {
+ bck = tc_victim->bk;
+ set_inuse_bit_at_offset (tc_victim, nb);
+ if (av != &main_arena)
+ set_non_main_arena (tc_victim);
+ bin->bk = bck;
+ bck->fd = bin;
+
+ TCacheEntry *e = (TCacheEntry *) chunk2mem (tc_victim);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ ++tcache.counts[tc_idx];
+ ++found;
+ }
+ }
+ }
+#endif
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
@@ -3463,6 +3630,16 @@ _int_malloc (mstate av, size_t bytes)
otherwise need to expand memory to service a "small" request.
*/
+#if USE_TCACHE
+ INTERNAL_SIZE_T tcache_nb = 0;
+ if (csize2tidx (nb) <= mp_.tcache_max)
+ tcache_nb = nb;
+ size_t tc_idx = csize2tidx (nb);
+ int return_cached = 0;
+
+ tcache_unsorted_count = 0;
+#endif
+
for (;; )
{
int iters = 0;
@@ -3523,10 +3700,29 @@ _int_malloc (mstate av, size_t bytes)
set_inuse_bit_at_offset (victim, size);
if (av != &main_arena)
set_non_main_arena (victim);
+#if USE_TCACHE
+ /* Fill cache first, return to user only if cache fills.
+ We may return one of these chunks later. */
+ if (tcache_nb
+ && tcache.counts[tc_idx] < mp_.tcache_count)
+ {
+ TCacheEntry *e = (TCacheEntry *) chunk2mem (victim);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ ++tcache.counts[tc_idx];
+ return_cached = 1;
+ continue;
+ }
+ else
+ {
+#endif
check_malloced_chunk (av, victim, nb);
void *p = chunk2mem (victim);
alloc_perturb (p, bytes);
return p;
+#if USE_TCACHE
+ }
+#endif
}
/* place chunk in bin */
@@ -3593,11 +3789,37 @@ _int_malloc (mstate av, size_t bytes)
fwd->bk = victim;
bck->fd = victim;
+#if USE_TCACHE
+ /* If we've processed as many chunks as we're allowed while
+ filling the cache, return one of the cached ones. */
+ ++tcache_unsorted_count;
+ if (return_cached
+ && mp_.tcache_unsorted_limit > 0
+ && tcache_unsorted_count > mp_.tcache_unsorted_limit)
+ {
+ TCacheEntry *e = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e->next;
+ --tcache.counts[tc_idx];
+ return (void *) e;
+ }
+#endif
+
#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)
break;
}
+#if USE_TCACHE
+ /* If all the small chunks we found ended up cached, return one now. */
+ if (return_cached)
+ {
+ TCacheEntry *e = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e->next;
+ --tcache.counts[tc_idx];
+ return (void *) e;
+ }
+#endif
+
/*
If a large request, scan through the chunks of current bin in
sorted order to find smallest that fits. Use the skip list for this.
@@ -3883,6 +4105,23 @@ _int_free (mstate av, mchunkptr p, int have_lock)
check_inuse_chunk(av, p);
+#if USE_TCACHE
+ {
+ size_t tc_idx = csize2tidx (size);
+
+ if (tc_idx < mp_.tcache_max
+ && tcache.counts[tc_idx] < mp_.tcache_count
+ && tcache.initted == 1)
+ {
+ TCacheEntry *e = (TCacheEntry *) chunk2mem (p);
+ e->next = tcache.entries[tc_idx];
+ tcache.entries[tc_idx] = e;
+ ++tcache.counts[tc_idx];
+ return;
+ }
+ }
+#endif
+
/*
If eligible, place chunk on a fastbin so it can be found
and used quickly in malloc.
@@ -4844,6 +5083,38 @@ do_set_arena_max (size_t value)
return 1;
}
+#ifdef USE_TCACHE
+static inline int
+__always_inline
+do_set_tcache_max (size_t value)
+{
+ LIBC_PROBE (memory_mallopt_tcache_max_bytes, 2, value, mp_.tcache_max_bytes);
+ if (value >= 0 && value <= MAX_TCACHE_SIZE)
+ {
+ mp_.tcache_max_bytes = value;
+ mp_.tcache_max = usize2tidx (value) + 1;
+ }
+ return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_count (size_t value)
+{
+ LIBC_PROBE (memory_mallopt_tcache_count, 2, value, mp_.tcache_count);
+ mp_.tcache_count = value;
+ return 1;
+}
+
+static inline int
+__always_inline
+do_set_tcache_unsorted_limit (size_t value)
+{
+ LIBC_PROBE (memory_mallopt_tcache_unsorted_limit, 2, value, mp_.tcache_unsorted_limit);
+ mp_.tcache_unsorted_limit = value;
+ return 1;
+}
+#endif
int
__libc_mallopt (int param_number, int value)
@@ -4904,6 +5175,20 @@ __libc_mallopt (int param_number, int value)
if (value > 0)
do_set_arena_test (value);
break;
+#if USE_TCACHE
+ case M_TCACHE_COUNT:
+ if (value >= 0)
+ do_set_tcache_count (value);
+ break;
+ case M_TCACHE_MAX:
+ if (value >= 0)
+ do_set_tcache_max (value);
+ break;
+ case M_TCACHE_UNSORTED_LIMIT:
+ if (value >= 0)
+ do_set_tcache_unsorted_limit (value);
+ break;
+#endif
}
__libc_lock_unlock (av->mutex);
return res;