@@ -9,7 +9,10 @@ Version 2.35
Major new features:
- [Add new features here]
+* On Linux, a new tunable, glibc.malloc.thp_pagesize, can be used to
+ make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
+ It force the use of Transparent Huge Pages when madvise global mode
+ is set and might improve performance depending of the workload.
Deprecated and removed features, and other changes affecting compatibility:
@@ -92,6 +92,11 @@ glibc {
minval: 0
security_level: SXID_IGNORE
}
+ thp_pagesize {
+ type: SIZE_T
+ minval: 0
+ default: 0
+ }
}
cpu {
hwcap_mask {
@@ -8,6 +8,7 @@ glibc.malloc.perturb: 0 (min: 0, max: 255)
glibc.malloc.tcache_count: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0x[f]+)
+glibc.malloc.thp_pagesize: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0x[f]+)
glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0x[f]+)
glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10)
@@ -231,6 +231,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
#endif
TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
+TUNABLE_CALLBACK_FNDECL (set_thp_pagesize, size_t)
#else
/* Initialization routine. */
#include <string.h>
@@ -331,6 +332,7 @@ ptmalloc_init (void)
TUNABLE_CALLBACK (set_tcache_unsorted_limit));
# endif
TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
+ TUNABLE_GET (thp_pagesize, size_t, TUNABLE_CALLBACK (set_thp_pagesize));
#else
if (__glibc_likely (_environ != NULL))
{
@@ -509,6 +511,9 @@ new_heap (size_t size, size_t top_pad)
__munmap (p2, HEAP_MAX_SIZE);
return 0;
}
+
+ sysmadvise_thp (p2, size);
+
h = (heap_info *) p2;
h->size = size;
h->mprotect_size = size;
@@ -22,6 +22,7 @@
#include <malloc-machine.h>
#include <malloc-sysdep.h>
#include <malloc-size.h>
+#include <malloc-thp.h>
/* Called in the parent process before a fork. */
void __malloc_fork_lock_parent (void) attribute_hidden;
@@ -1881,6 +1881,11 @@ struct malloc_par
INTERNAL_SIZE_T arena_test;
INTERNAL_SIZE_T arena_max;
+#if HAVE_TUNABLES
+ /* Transparent Large Page support. */
+ INTERNAL_SIZE_T thp_pagesize;
+#endif
+
/* Memory map support */
int n_mmaps;
int n_mmaps_max;
@@ -2009,6 +2014,20 @@ free_perturb (char *p, size_t n)
#include <stap-probe.h>
+/* ----------- Routines dealing with transparent huge pages ----------- */
+
+static inline void
+sysmadvise_thp (void *p, INTERNAL_SIZE_T size)
+{
+#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
+ /* Do not consider areas smaller than a huge page or if the tunable is
+ not active. */
+ if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize)
+ return;
+ __madvise (p, size, MADV_HUGEPAGE);
+#endif
+}
+
/* ------------------- Support for multiple arenas -------------------- */
#include "arena.c"
@@ -2446,6 +2465,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
if (mm != MAP_FAILED)
{
+ sysmadvise_thp (mm, size);
+
/*
The offset to the start of the mmapped region is stored
in the prev_size field of the chunk. This allows us to adjust
@@ -2607,6 +2628,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
if (size > 0)
{
brk = (char *) (MORECORE (size));
+ if (brk != (char *) (MORECORE_FAILURE))
+ sysmadvise_thp (brk, size);
LIBC_PROBE (memory_sbrk_more, 2, brk, size);
}
@@ -2638,6 +2661,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
if (mbrk != MAP_FAILED)
{
+ sysmadvise_thp (mbrk, size);
+
/* We do not need, and cannot use, another sbrk call to find end */
brk = mbrk;
snd_brk = brk + size;
@@ -2749,6 +2774,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
correction = 0;
snd_brk = (char *) (MORECORE (0));
}
+ else
+ sysmadvise_thp (snd_brk, correction);
}
/* handle non-contiguous cases */
@@ -2989,6 +3016,8 @@ mremap_chunk (mchunkptr p, size_t new_size)
if (cp == MAP_FAILED)
return 0;
+ sysmadvise_thp (cp, new_size);
+
p = (mchunkptr) (cp + offset);
assert (aligned_OK (chunk2mem (p)));
@@ -5325,6 +5354,22 @@ do_set_mxfast (size_t value)
return 0;
}
+#if HAVE_TUNABLES
+static __always_inline int
+do_set_thp_pagesize (size_t value)
+{
+ /* Only enable THP through madvise if the arch-specific return size is
+ larger than the default page size. */
+ if (value > 0)
+ {
+ size_t thps = malloc_verify_thp_pagesize (value);
+ if (thps != GLRO(dl_pagesize))
+ mp_.thp_pagesize = thps;
+ }
+ return 0;
+}
+#endif
+
int
__libc_mallopt (int param_number, int value)
{
@@ -270,6 +270,17 @@ pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size
passed to @code{malloc} for the largest bin size to enable.
@end deftp
+@deftp Tunable glibc.malloc.thp_pagesize
+This tunable enables support for Transparent Huge Page through @code{madvise}
+with @code{MADV_HUGEPAGE} on the allocated memory range after @code{malloc}
+calls the system allocator. Each architecture defines set of possible values,
+and the input value is rounded to the supported one.
+
+The default value of this tunable is 0, which disable its usage. The value
+of 1 meants to use the default Huge Page size for the architecture, and
+a value larger than 2 is rounded to the supported size.
+@end deftp
+
@node Dynamic Linking Tunables
@section Dynamic Linking Tunables
@cindex dynamic linking tunables
new file mode 100644
@@ -0,0 +1,32 @@
+/* Transparent Huge Page support. Generic implementation.
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation; either version 2.1 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If
+ not, see <https://www.gnu.org/licenses/>. */
+
+#ifndef _MALLOC_THP_H
+#define _MALLOC_THP_H
+
+#include <ldsodefs.h>
+
+/* Return the prefered large page size for the request PAGESIZE. The
+ requested value of 1 means the default size for the architecture. */
+static inline size_t
+malloc_verify_thp_pagesize (size_t pagesize)
+{
+ return GLRO(dl_pagesize);
+}
+
+#endif /* _MALLOC_THP_H */