[1/3] malloc: Add madvise support for Transparent Huge Pages

Message ID 20210813210429.1147112-2-adhemerval.zanella@linaro.org
State Superseded
Headers
Series malloc: improve THP effectiveness |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Netto Aug. 13, 2021, 9:04 p.m. UTC
  Linux Transparent Huge Pages (THP) current support three different
states: 'never', 'madvise', and 'always'.  The 'never' is
self-explanatory and 'always' will enable THP for all anonymous
memory.  However, 'madvise' is still the default for some system and
for such case THP will be only used if the memory range is explicity
advertise by the program through the madvise(MADV_HUGEPAGE) call.

To enable it a new tunable is provided, 'glibc.malloc.thp_pagesize',
where the user can either enable THP through madvise using the
default huge page size by using a value of '1' or by specifying
a different large page size if the system supports it (Linux current
only support one page size for THP, even if the architecture supports
multiple sizes).

This patch issues the madvise(MADV_HUGEPAGE) call after a successful
mmap() call at sysmalloc().  The default malloc_verify_thp_pagesize()
does not enable it even if the tunable is set.

Checked on x86_64-linux-gnu.
---
 NEWS                           |  5 +++-
 elf/dl-tunables.list           |  5 ++++
 elf/tst-rtld-list-tunables.exp |  1 +
 malloc/arena.c                 |  5 ++++
 malloc/malloc-internal.h       |  1 +
 malloc/malloc.c                | 45 ++++++++++++++++++++++++++++++++++
 manual/tunables.texi           | 11 +++++++++
 sysdeps/generic/malloc-thp.h   | 32 ++++++++++++++++++++++++
 8 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 sysdeps/generic/malloc-thp.h
  

Patch

diff --git a/NEWS b/NEWS
index 79c895e382..85b7933e4d 100644
--- a/NEWS
+++ b/NEWS
@@ -9,7 +9,10 @@  Version 2.35
 
 Major new features:
 
-  [Add new features here]
+* On Linux, a new tunable, glibc.malloc.thp_pagesize, can be used to
+  make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
+  It force the use of Transparent Huge Pages when madvise global mode
+  is set and might improve performance depending of the workload.
 
 Deprecated and removed features, and other changes affecting compatibility:
 
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index 8ddd4a2314..77d3662ffd 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -92,6 +92,11 @@  glibc {
       minval: 0
       security_level: SXID_IGNORE
     }
+    thp_pagesize {
+      type: SIZE_T
+      minval: 0
+      default: 0
+    }
   }
   cpu {
     hwcap_mask {
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
index 9f66c52885..532af4eabc 100644
--- a/elf/tst-rtld-list-tunables.exp
+++ b/elf/tst-rtld-list-tunables.exp
@@ -8,6 +8,7 @@  glibc.malloc.perturb: 0 (min: 0, max: 255)
 glibc.malloc.tcache_count: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.malloc.thp_pagesize: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10)
diff --git a/malloc/arena.c b/malloc/arena.c
index 667484630e..7ec316a906 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -231,6 +231,7 @@  TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
 TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
 #endif
 TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
+TUNABLE_CALLBACK_FNDECL (set_thp_pagesize, size_t)
 #else
 /* Initialization routine. */
 #include <string.h>
@@ -331,6 +332,7 @@  ptmalloc_init (void)
 	       TUNABLE_CALLBACK (set_tcache_unsorted_limit));
 # endif
   TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
+  TUNABLE_GET (thp_pagesize, size_t, TUNABLE_CALLBACK (set_thp_pagesize));
 #else
   if (__glibc_likely (_environ != NULL))
     {
@@ -509,6 +511,9 @@  new_heap (size_t size, size_t top_pad)
       __munmap (p2, HEAP_MAX_SIZE);
       return 0;
     }
+
+  sysmadvise_thp (p2, size);
+
   h = (heap_info *) p2;
   h->size = size;
   h->mprotect_size = size;
diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h
index 0c7b5a183c..2efef06f35 100644
--- a/malloc/malloc-internal.h
+++ b/malloc/malloc-internal.h
@@ -22,6 +22,7 @@ 
 #include <malloc-machine.h>
 #include <malloc-sysdep.h>
 #include <malloc-size.h>
+#include <malloc-thp.h>
 
 /* Called in the parent process before a fork.  */
 void __malloc_fork_lock_parent (void) attribute_hidden;
diff --git a/malloc/malloc.c b/malloc/malloc.c
index e065785af7..52ea84a63d 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1881,6 +1881,11 @@  struct malloc_par
   INTERNAL_SIZE_T arena_test;
   INTERNAL_SIZE_T arena_max;
 
+#if HAVE_TUNABLES
+  /* Transparent Large Page support.  */
+  INTERNAL_SIZE_T thp_pagesize;
+#endif
+
   /* Memory map support */
   int n_mmaps;
   int n_mmaps_max;
@@ -2009,6 +2014,20 @@  free_perturb (char *p, size_t n)
 
 #include <stap-probe.h>
 
+/* ----------- Routines dealing with transparent huge pages ----------- */
+
+static inline void
+sysmadvise_thp (void *p, INTERNAL_SIZE_T size)
+{
+#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
+  /* Do not consider areas smaller than a huge page or if the tunable is
+     not active.  */
+  if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize)
+    return;
+  __madvise (p, size, MADV_HUGEPAGE);
+#endif
+}
+
 /* ------------------- Support for multiple arenas -------------------- */
 #include "arena.c"
 
@@ -2446,6 +2465,8 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
 
           if (mm != MAP_FAILED)
             {
+	      sysmadvise_thp (mm, size);
+
               /*
                  The offset to the start of the mmapped region is stored
                  in the prev_size field of the chunk. This allows us to adjust
@@ -2607,6 +2628,8 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
       if (size > 0)
         {
           brk = (char *) (MORECORE (size));
+	  if (brk != (char *) (MORECORE_FAILURE))
+	    sysmadvise_thp (brk, size);
           LIBC_PROBE (memory_sbrk_more, 2, brk, size);
         }
 
@@ -2638,6 +2661,8 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
 
               if (mbrk != MAP_FAILED)
                 {
+		  sysmadvise_thp (mbrk, size);
+
                   /* We do not need, and cannot use, another sbrk call to find end */
                   brk = mbrk;
                   snd_brk = brk + size;
@@ -2749,6 +2774,8 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
                       correction = 0;
                       snd_brk = (char *) (MORECORE (0));
                     }
+		  else
+		    sysmadvise_thp (snd_brk, correction);
                 }
 
               /* handle non-contiguous cases */
@@ -2989,6 +3016,8 @@  mremap_chunk (mchunkptr p, size_t new_size)
   if (cp == MAP_FAILED)
     return 0;
 
+  sysmadvise_thp (cp, new_size);
+
   p = (mchunkptr) (cp + offset);
 
   assert (aligned_OK (chunk2mem (p)));
@@ -5325,6 +5354,22 @@  do_set_mxfast (size_t value)
   return 0;
 }
 
+#if HAVE_TUNABLES
+static __always_inline int
+do_set_thp_pagesize (size_t value)
+{
+  /* Only enable THP through madvise if the arch-specific return size is
+     larger than the default page size.  */
+  if (value > 0)
+    {
+      size_t thps = malloc_verify_thp_pagesize (value);
+      if (thps != GLRO(dl_pagesize))
+	mp_.thp_pagesize = thps;
+    }
+  return 0;
+}
+#endif
+
 int
 __libc_mallopt (int param_number, int value)
 {
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 658547c613..3364e85ef5 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -270,6 +270,17 @@  pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size
 passed to @code{malloc} for the largest bin size to enable.
 @end deftp
 
+@deftp Tunable glibc.malloc.thp_pagesize
+This tunable enables support for Transparent Huge Page through @code{madvise}
+with @code{MADV_HUGEPAGE} on the allocated memory range after @code{malloc}
+calls the system allocator.  Each architecture defines set of possible values,
+and the input value is rounded to the supported one.
+
+The default value of this tunable is 0, which disable its usage.  The value
+of 1 meants to use the default Huge Page size for the architecture, and
+a value larger than 2 is rounded to the supported size.
+@end deftp
+
 @node Dynamic Linking Tunables
 @section Dynamic Linking Tunables
 @cindex dynamic linking tunables
diff --git a/sysdeps/generic/malloc-thp.h b/sysdeps/generic/malloc-thp.h
new file mode 100644
index 0000000000..d70ceb8e1e
--- /dev/null
+++ b/sysdeps/generic/malloc-thp.h
@@ -0,0 +1,32 @@ 
+/* Transparent Huge Page support.  Generic implementation.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <https://www.gnu.org/licenses/>.  */
+
+#ifndef _MALLOC_THP_H
+#define _MALLOC_THP_H
+
+#include <ldsodefs.h>
+
+/* Return the prefered large page size for the request PAGESIZE.  The
+   requested value of 1 means the default size for the architecture.  */
+static inline size_t
+malloc_verify_thp_pagesize (size_t pagesize)
+{
+  return GLRO(dl_pagesize);
+}
+
+#endif /* _MALLOC_THP_H */