>@@ -3472,7 +3472,7 @@ __libc_realloc (void *oldmem, size_t bytes)
> size_t sz = memsize (oldp);
> memcpy (newp, oldmem, sz);
> (void) tag_region (chunk2mem (oldp), sz);
>- _int_free_chunk (ar_ptr, oldp, chunksize (oldp), 0);
>+ _int_free_chunk (ar_ptr, oldp, chunksize (oldp));
> }
> }
>
>@@ -4549,7 +4545,7 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
> (void) tag_region (oldmem, sz);
> newmem = tag_new_usable (newmem);
> memcpy (newmem, oldmem, sz);
>- _int_free_chunk (av, oldp, chunksize (oldp), 1);
>+ _int_free_merge_chunk (av, oldp, chunksize (oldp));
> check_inuse_chunk (av, newp);
> return newmem;
> }
Could you add some workaround to realloc to put user requested chunk
into tcache? For remainders, it's OK as user never requested. But for
some expanding scenario, the old chunk is requested by user, thus
put them into tcache may improve performance comparing to put them
in bins?
Hi,
>@@ -3472,7 +3472,7 @@ __libc_realloc (void *oldmem, size_t bytes)
> size_t sz = memsize (oldp);
> memcpy (newp, oldmem, sz);
> (void) tag_region (chunk2mem (oldp), sz);
>- _int_free_chunk (ar_ptr, oldp, chunksize (oldp), 0);
>+ _int_free_chunk (ar_ptr, oldp, chunksize (oldp));
> }
> Could you add some workaround to realloc to put user requested chunk
> into tcache? For remainders, it's OK as user never requested. But for
> some expanding scenario, the old chunk is requested by user, thus
> put them into tcache may improve performance comparing to put them
> in bins?
It's possible - but note realloc() doesn't use tcache on allocation (and hasn't
historically). In general it seems like a bad idea to keep the arena lock while
doing memcpy (or tagging) of large blocks since that can block other threads.
If we moved the locking into _int_realloc(), we could unlock if we can't reuse
the existing/next block, and then use __libc_malloc/__libc_free so that it can
use tcache in both cases. We could even avoid locking by doing a speculative
check.
Do you see any performance issues with realloc()? There are lots of things
that could be improved - for example it should not split off tiny blocks when
shrinking, and similarly when growing, it should force a minimum percentage
of growth.
Cheers,
Wilco
@@ -233,10 +233,10 @@ free_check (void *mem)
}
else
{
+ __libc_lock_unlock (main_arena.mutex);
/* Mark the chunk as belonging to the library again. */
(void)tag_region (chunk2mem (p), memsize (p));
- _int_free_chunk (&main_arena, p, chunksize (p), 1);
- __libc_lock_unlock (main_arena.mutex);
+ _int_free_chunk (&main_arena, p, chunksize (p));
}
__set_errno (err);
}
@@ -1024,7 +1024,7 @@ typedef struct malloc_chunk* mchunkptr;
/* Internal routines. */
static void* _int_malloc(mstate, size_t);
-static void _int_free_chunk (mstate, mchunkptr, INTERNAL_SIZE_T, int);
+static void _int_free_chunk (mstate, mchunkptr, INTERNAL_SIZE_T);
static void _int_free_merge_chunk (mstate, mchunkptr, INTERNAL_SIZE_T);
static INTERNAL_SIZE_T _int_free_create_chunk (mstate,
mchunkptr, INTERNAL_SIZE_T,
@@ -2403,7 +2403,7 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
CHUNK_HDR_SZ | PREV_INUSE);
set_foot (chunk_at_offset (old_top, old_size), CHUNK_HDR_SZ);
set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
- _int_free_chunk (av, old_top, chunksize (old_top), 1);
+ _int_free_merge_chunk (av, old_top, chunksize (old_top));
}
else
{
@@ -2674,7 +2674,7 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
/* If possible, release the rest. */
if (old_size >= MINSIZE)
{
- _int_free_chunk (av, old_top, chunksize (old_top), 1);
+ _int_free_merge_chunk (av, old_top, chunksize (old_top));
}
}
}
@@ -3171,12 +3171,12 @@ tcache_thread_shutdown (void)
tcache_tmp->entries[i] = REVEAL_PTR (e->next);
e->key = 0;
p = mem2chunk (e);
- _int_free_chunk (arena_for_chunk (p), p, chunksize (p), 0);
+ _int_free_chunk (arena_for_chunk (p), p, chunksize (p));
}
}
p = mem2chunk (tcache_tmp);
- _int_free_chunk (arena_for_chunk (p), p, chunksize (p), 0);
+ _int_free_chunk (arena_for_chunk (p), p, chunksize (p));
}
/* Initialize tcache. In the rare case there isn't any memory available,
@@ -3350,7 +3350,7 @@ __libc_free (void *mem)
size - MINSIZE)))
return malloc_printerr_tail ("free(): invalid size");
- _int_free_chunk (arena_for_chunk (p), p, size, 0);
+ _int_free_chunk (arena_for_chunk (p), p, size);
}
libc_hidden_def (__libc_free)
@@ -3472,7 +3472,7 @@ __libc_realloc (void *oldmem, size_t bytes)
size_t sz = memsize (oldp);
memcpy (newp, oldmem, sz);
(void) tag_region (chunk2mem (oldp), sz);
- _int_free_chunk (ar_ptr, oldp, chunksize (oldp), 0);
+ _int_free_chunk (ar_ptr, oldp, chunksize (oldp));
}
}
@@ -4253,11 +4253,10 @@ _int_malloc (mstate av, size_t bytes)
------------------------------ free ------------------------------
*/
-/* Free chunk P of SIZE bytes to the arena. HAVE_LOCK indicates where
- the arena for P has already been locked. Caller must ensure chunk
- and size are valid. */
+/* Free chunk P of SIZE bytes to the arena AV (which is not locked).
+ Caller must ensure chunk and size are valid. */
static void
-_int_free_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size, int have_lock)
+_int_free_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size)
{
/*
Consolidate other non-mmapped chunks as they arrive.
@@ -4265,22 +4264,14 @@ _int_free_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size, int have_lock)
if (!chunk_is_mmapped(p)) {
- /* Preserve errno in case block merging results in munmap. */
- int err = errno;
-
- /* If we're single-threaded, don't lock the arena. */
if (SINGLE_THREAD_P)
- have_lock = true;
-
- if (!have_lock)
- __libc_lock_lock (av->mutex);
-
- _int_free_merge_chunk (av, p, size);
-
- if (!have_lock)
- __libc_lock_unlock (av->mutex);
-
- __set_errno (err);
+ _int_free_merge_chunk (av, p, size);
+ else
+ {
+ __libc_lock_lock (av->mutex);
+ _int_free_merge_chunk (av, p, size);
+ __libc_lock_unlock (av->mutex);
+ }
}
/*
If the chunk was allocated via mmap, release via munmap().
@@ -4309,8 +4300,8 @@ _int_free_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size, int have_lock)
}
}
-/* Try to merge chunk P of SIZE bytes with its neighbors. Put the
- resulting chunk on the appropriate bin list. P must not be on a
+/* Try to merge chunk P of SIZE bytes from locked arena AV with its neighbors.
+ Put the resulting chunk on the appropriate bin list. P must not be on a
bin list yet, and it can be in use. */
static void
_int_free_merge_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size)
@@ -4440,6 +4431,9 @@ _int_free_maybe_trim (mstate av, INTERNAL_SIZE_T size)
if ATTEMPT_TRIMMING_THRESHOLD is reached. */
if (size >= ATTEMPT_TRIMMING_THRESHOLD)
{
+ /* Preserve errno. */
+ int err = errno;
+
if (av == &main_arena)
{
#ifndef MORECORE_CANNOT_TRIM
@@ -4456,6 +4450,8 @@ _int_free_maybe_trim (mstate av, INTERNAL_SIZE_T size)
assert (heap->ar_ptr == av);
heap_trim (heap, mp_.top_pad);
}
+
+ __set_errno (err);
}
}
@@ -4549,7 +4545,7 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
(void) tag_region (oldmem, sz);
newmem = tag_new_usable (newmem);
memcpy (newmem, oldmem, sz);
- _int_free_chunk (av, oldp, chunksize (oldp), 1);
+ _int_free_merge_chunk (av, oldp, chunksize (oldp));
check_inuse_chunk (av, newp);
return newmem;
}
@@ -4577,7 +4573,7 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
(av != &main_arena ? NON_MAIN_ARENA : 0));
/* Mark remainder as inuse so free() won't complain */
set_inuse_bit_at_offset (remainder, remainder_size);
- _int_free_chunk (av, remainder, chunksize (remainder), 1);
+ _int_free_merge_chunk (av, remainder, chunksize (remainder));
}
check_inuse_chunk (av, newp);