[v2] malloc: Cleanup tcache_init()

Message ID PAWPR08MB898240CDD45D674732284AFC83A92@PAWPR08MB8982.eurprd08.prod.outlook.com (mailing list archive)
State New
Headers
Series [v2] malloc: Cleanup tcache_init() |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent
linaro-tcwg-bot/tcwg_glibc_build--master-arm success Build passed
linaro-tcwg-bot/tcwg_glibc_build--master-aarch64 success Build passed
linaro-tcwg-bot/tcwg_glibc_check--master-arm success Test passed
redhat-pt-bot/TryBot-32bit success Build for i686
linaro-tcwg-bot/tcwg_glibc_check--master-aarch64 success Test passed

Commit Message

Wilco Dijkstra April 4, 2025, 11:27 a.m. UTC
  Hi Florian,

>> +  if (tcache)
>> +    memset (tcache, 0, sizeof (tcache_perthread_struct));
>
> I think we prefer: tcache != NULL

Good point - I also improved the comment and used "bytes" in
the memset in v2.

Cheers,
Wilco

v2: Add explict NULL, improve comment and memset

Cleanup tcache_init() by using the new __libc_malloc2 interface.
Move the tcache initialization check in __libc_malloc by using a
small tailcalled veneer.  Performance on bench-malloc-simple improves
by 0.6%, bench-malloc-thread by ~0.2% for 1 thread and ~0% for 32.

---
  

Patch

diff --git a/malloc/malloc.c b/malloc/malloc.c
index a0bc733482532ce34684d0357cb9076b03ac8a52..d3c89b5812813e9b0522862d083345d67979a588 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -588,9 +588,11 @@  tag_at (void *ptr)
   differs across systems, but is in all cases less than the maximum
   representable value of a size_t.
 */
-void*  __libc_malloc(size_t);
+void *__libc_malloc (size_t);
 libc_hidden_proto (__libc_malloc)
 
+static void *__libc_malloc2 (size_t);
+
 /*
   free(void* p)
   Releases the chunk of memory pointed to by p, that had been previously
@@ -3306,39 +3308,26 @@  tcache_thread_shutdown (void)
   __libc_free (tcache_tmp);
 }
 
+/* Initialize tcache.  In the rare case there isn't any memory available,
+   later calls will retry initialization.  */
 static void
 tcache_init(void)
 {
-  mstate ar_ptr;
-  void *victim = NULL;
-  const size_t bytes = sizeof (tcache_perthread_struct);
-
   if (tcache_shutting_down)
     return;
 
-  arena_get (ar_ptr, bytes);
-  victim = _int_malloc (ar_ptr, bytes);
-  if (!victim && ar_ptr != NULL)
-    {
-      ar_ptr = arena_get_retry (ar_ptr, bytes);
-      victim = _int_malloc (ar_ptr, bytes);
-    }
+  size_t bytes = sizeof (tcache_perthread_struct);
+  tcache = (tcache_perthread_struct *) __libc_malloc2 (bytes);
 
+  if (tcache != NULL)
+    memset (tcache, 0, bytes);
+}
 
-  if (ar_ptr != NULL)
-    __libc_lock_unlock (ar_ptr->mutex);
-
-  /* In a low memory situation, we may not be able to allocate memory
-     - in which case, we just keep trying later.  However, we
-     typically do this very early, so either there is sufficient
-     memory, or there isn't enough memory to do non-trivial
-     allocations anyway.  */
-  if (victim)
-    {
-      tcache = (tcache_perthread_struct *) victim;
-      memset (tcache, 0, sizeof (tcache_perthread_struct));
-    }
-
+static void * __attribute_noinline__
+tcache_malloc_init (size_t bytes)
+{
+  tcache_init ();
+  return __libc_malloc2 (bytes);
 }
 
 # define MAYBE_INIT_TCACHE() \
@@ -3393,8 +3382,6 @@  __libc_malloc2 (size_t bytes)
   if (!__malloc_initialized)
     ptmalloc_init ();
 
-  MAYBE_INIT_TCACHE ();
-
   if (SINGLE_THREAD_P)
     {
       victim = tag_new_usable (_int_malloc (&main_arena, bytes));
@@ -3430,9 +3417,14 @@  __libc_malloc (size_t bytes)
 {
 #if USE_TCACHE
   size_t tc_idx = csize2tidx (checked_request2size (bytes));
+  if (__glibc_likely (tc_idx < mp_.tcache_bins))
+    {
+      if (__glibc_unlikely (tcache == NULL))
+	return tcache_malloc_init (bytes);
 
-  if (tcache_available (tc_idx))
-    return tag_new_usable (tcache_get (tc_idx));
+      if (__glibc_likely (tcache->counts[tc_idx] > 0))
+	return tag_new_usable (tcache_get (tc_idx));
+    }
 #endif
 
   return __libc_malloc2 (bytes);