[11/16] malloc: Use branches instead of mtag_granule_mask

Message ID 93b2191bf171072a519f19e9f8e763af9156213f.1614874816.git.szabolcs.nagy@arm.com
State Committed
Commit 63a20eb03c0c363cf5271eb3a2fa0bb7552c01be
Headers
Series memory tagging improvements |

Commit Message

Szabolcs Nagy March 4, 2021, 4:33 p.m. UTC
  The branches may be better optimized since mtag_enabled is widely used.

Granule size larger than a chunk header is not supported since then we
cannot have both the chunk header and user area granule aligned.  To
fix that for targets with large granule, the chunk layout has to change.

So code that attempted to handle the granule mask generally was changed.
This simplified CHUNK_AVAILABLE_SIZE and the logic in malloc_usable_size.
---
 malloc/arena.c  |  1 -
 malloc/malloc.c | 34 ++++++++++++++--------------------
 2 files changed, 14 insertions(+), 21 deletions(-)
  

Patch

diff --git a/malloc/arena.c b/malloc/arena.c
index 1e83bb66bd..9fbbb38a15 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -306,7 +306,6 @@  ptmalloc_init (void)
 
       mtag_enabled = true;
       mtag_mmap_flags = __MTAG_MMAP_FLAGS;
-      mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1);
     }
 #endif
 
diff --git a/malloc/malloc.c b/malloc/malloc.c
index b1ee0f450b..8854afec88 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -442,7 +442,6 @@  void *(*__morecore)(ptrdiff_t) = __default_morecore;
 #ifdef USE_MTAG
 static bool mtag_enabled = false;
 static int mtag_mmap_flags = 0;
-static size_t mtag_granule_mask = ~(size_t)0;
 #else
 # define mtag_enabled false
 # define mtag_mmap_flags 0
@@ -1333,15 +1332,16 @@  nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
 
 /* Available size of chunk.  This is the size of the real usable data
-   in the chunk, plus the chunk header.  */
-#ifdef USE_MTAG
-#define CHUNK_AVAILABLE_SIZE(p) \
-  ((chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))	\
-   & mtag_granule_mask)
-#else
-#define CHUNK_AVAILABLE_SIZE(p) \
-  (chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
-#endif
+   in the chunk, plus the chunk header.  Note: If memory tagging is
+   enabled the layout changes to accomodate the granule size, this is
+   wasteful for small allocations so not done by default.  The logic
+   does not work if chunk headers are not granule aligned.  */
+_Static_assert (__MTAG_GRANULE_SIZE <= CHUNK_HDR_SZ,
+		"memory tagging is not supported with large granule.");
+#define CHUNK_AVAILABLE_SIZE(p)                                       \
+  (__MTAG_GRANULE_SIZE > SIZE_SZ && __glibc_unlikely (mtag_enabled) ? \
+    chunksize (p) :                                                   \
+    chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
 
 /* Check if REQ overflows when padded and aligned and if the resulting value
    is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
@@ -1353,7 +1353,6 @@  checked_request2size (size_t req, size_t *sz) __nonnull (1)
   if (__glibc_unlikely (req > PTRDIFF_MAX))
     return false;
 
-#ifdef USE_MTAG
   /* When using tagged memory, we cannot share the end of the user
      block with the header for the next chunk, so ensure that we
      allocate blocks that are rounded up to the granule size.  Take
@@ -1361,8 +1360,9 @@  checked_request2size (size_t req, size_t *sz) __nonnull (1)
      number.  Ideally, this would be part of request2size(), but that
      must be a macro that produces a compile time constant if passed
      a constant literal.  */
-  req = (req + ~mtag_granule_mask) & mtag_granule_mask;
-#endif
+  if (__glibc_unlikely (mtag_enabled))
+    req = (req + (__MTAG_GRANULE_SIZE - 1)) &
+	  ~(size_t)(__MTAG_GRANULE_SIZE - 1);
 
   *sz = request2size (req);
   return true;
@@ -5112,14 +5112,8 @@  musable (void *mem)
 	    result = chunksize (p) - CHUNK_HDR_SZ;
 	}
       else if (inuse (p))
-	result = chunksize (p) - SIZE_SZ;
+	result = CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ;
 
-#ifdef USE_MTAG
-      /* The usable space may be reduced if memory tagging is needed,
-	 since we cannot share the user-space data with malloc's internal
-	 data structure.  */
-      result &= mtag_granule_mask;
-#endif
       return result;
     }
   return 0;