diff mbox series

[v5,1/7] malloc: Add madvise support for Transparent Huge Pages

Message ID 20211214185806.4109231-2-adhemerval.zanella@linaro.org
State Committed
Headers show
Series malloc: Improve Huge Page support | expand

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Dec. 14, 2021, 6:58 p.m. UTC
Linux Transparent Huge Pages (THP) current supports three different
states: 'never', 'madvise', and 'always'.  The 'never' is
self-explanatory and 'always' will enable THP for all anonymous
pages.  However, 'madvise' is still the default for some system and
for such case THP will be only used if the memory range is explicity
advertise by the program through a madvise(MADV_HUGEPAGE) call.

To enable it a new tunable is provided, 'glibc.malloc.hugetlb',
where setting to a value diffent than 0 enables the madvise call.

This patch issues the madvise(MADV_HUGEPAGE) call after a successful
mmap() call at sysmalloc() with sizes larger than the default huge
page size.  The madvise() call is disable is system does not support
THP or if it has the mode set to "never" and on Linux only support
one page size for THP, even if the architecture supports multiple
sizes.

To test is a new rule is added tests-malloc-hugetlb1, which run the
addes tests with the required GLIBC_TUNABLE setting.

Checked on x86_64-linux-gnu.
---
 NEWS                                       |  5 ++
 Rules                                      | 19 ++++++
 elf/dl-tunables.list                       |  5 ++
 elf/tst-rtld-list-tunables.exp             |  1 +
 malloc/Makefile                            | 16 +++++
 malloc/arena.c                             |  5 ++
 malloc/malloc-internal.h                   |  1 +
 malloc/malloc.c                            | 47 ++++++++++++++
 manual/tunables.texi                       | 10 +++
 sysdeps/generic/Makefile                   |  8 +++
 sysdeps/generic/malloc-hugepages.c         | 31 +++++++++
 sysdeps/generic/malloc-hugepages.h         | 37 +++++++++++
 sysdeps/unix/sysv/linux/malloc-hugepages.c | 74 ++++++++++++++++++++++
 13 files changed, 259 insertions(+)
 create mode 100644 sysdeps/generic/malloc-hugepages.c
 create mode 100644 sysdeps/generic/malloc-hugepages.h
 create mode 100644 sysdeps/unix/sysv/linux/malloc-hugepages.c

Comments

DJ Delorie Dec. 15, 2021, 3:06 a.m. UTC | #1
Minor tweaks to a comment but otherwise LGTM
Reviewed-by: DJ Delorie <dj@redhat.com>

Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:
> +* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
> +  make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
> +  It might improve performance with Transparent Huge Pages madvise mode
> +  depending of the workload.

Suggest replacing "It" with "Setting this" but it's just NEWS.  Ok.

> diff --git a/Rules b/Rules
> @@ -157,6 +157,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
>         $(tests-container:%=$(objpfx)%.out) \
>         $(tests-mcheck:%=$(objpfx)%-mcheck.out) \
>         $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \
> +       $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \

Ok.

>  tests-expected = $(tests) $(tests-internal) $(tests-printers) \
>  	$(tests-container) $(tests-malloc-check:%=%-malloc-check) \
> +	$(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
>  	$(tests-mcheck:%=%-mcheck)

Ok.

> +binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)

Ok.

> +binaries-malloc-hugetlb1-tests =

Ok.

> +ifneq "$(strip $(binaries-malloc-hugetlb1-tests))" ""
> +$(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o \
> +  $(link-extra-libs-tests) \
> +  $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
> +  $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
> +	$(+link-tests)
> +endif

Adds build rules for new targets, ok.

> +# All malloc-hugetlb1 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=1
> +define malloc-hugetlb1-ENVS
> +$(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1
> +endef
> +$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t))))

Ok.


> diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
> +    hugetlb {
> +      type: INT_32
> +      minval: 0
> +      maxval: 1
> +    }

Ok.

> diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
> +glibc.malloc.hugetlb: 0 (min: 0, max: 1)

Ok.

> diff --git a/malloc/Makefile b/malloc/Makefile
>  
> +# Run all testes with GLIBC_TUNABLE=glibc.malloc.hugetlb=1 that check the
> +# Transparent Huge Pages support.  We need exclude some tests that define
> +# the ENV vars.
> +tests-exclude-hugetlb1 = \
> +	tst-compathooks-off \
> +	tst-compathooks-on \
> +	tst-interpose-nothread \
> +	tst-interpose-thread \
> +	tst-interpose-static-nothread \
> +	tst-interpose-static-thread \
> +	tst-malloc-usable \
> +	tst-malloc-usable-tunables \
> +	tst-mallocstate
> +tests-malloc-hugetlb1 = \
> +	$(filter-out $(tests-exclude-hugetlb1), $(tests))

Ok.

> diff --git a/malloc/arena.c b/malloc/arena.c
> +TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t)

Ok.

> +  TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb));

Ok.

> @@ -508,6 +510,9 @@ new_heap (size_t size, size_t top_pad)
> +
> +  madvise_thp (p2, size);

Ok.

> diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h
> +#include <malloc-hugepages.h>

Ok.

> diff --git a/malloc/malloc.c b/malloc/malloc.c
> +#if HAVE_TUNABLES
> +  /* Transparent Large Page support.  */
> +  INTERNAL_SIZE_T thp_pagesize;
> +#endif

Ok.

> @@ -2008,6 +2013,20 @@ free_perturb (char *p, size_t n)
> +/* ----------- Routines dealing with transparent huge pages ----------- */
> +
> +static inline void
> +madvise_thp (void *p, INTERNAL_SIZE_T size)
> +{
> +#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
> +  /* Do not consider areas smaller than a huge page or if the tunable is
> +     not active.  */
> +  if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize)
> +    return;
> +  __madvise (p, size, MADV_HUGEPAGE);
> +#endif
> +}

Ok.

> +	      madvise_thp (mm, size);

Ok.

>        if (size > 0)
>          {
>            brk = (char *) (MORECORE (size));
> +	  if (brk != (char *) (MORECORE_FAILURE))
> +	    madvise_thp (brk, size);
>            LIBC_PROBE (memory_sbrk_more, 2, brk, size);
>          }

Ok.

>                if (mbrk != MAP_FAILED)
>                  {
> +		  madvise_thp (mbrk, size);

Ok.

> +		  else
> +		    madvise_thp (snd_brk, correction);

Ok.

> @@ -2988,6 +3015,8 @@ mremap_chunk (mchunkptr p, size_t new_size)
>    if (cp == MAP_FAILED)
>      return 0;
>  
> +  madvise_thp (cp, new_size);

Ok.

> +#if HAVE_TUNABLES
> +static __always_inline int
> +do_set_hugetlb (int32_t value)
> +{
> +  if (value == 1)
> +    {
> +      enum malloc_thp_mode_t thp_mode = __malloc_thp_mode ();
> +      /*
> +	 Only enables THP usage is system does support it and has at least
> +	 always or madvise mode.  Otherwise the madvise() call is wasteful.
> +       */
> +      if (thp_mode == malloc_thp_mode_madvise)
> +	mp_.thp_pagesize = __malloc_default_thp_pagesize ();
> +    }
> +  return 0;
> +}
> +#endif

s/is/if/

The "always or madvise mode" comment doesn't match the "== madvise"
logic.  I suspect the logic is ok, so... ok with fixed comment.

> diff --git a/manual/tunables.texi b/manual/tunables.texi
> +@deftp Tunable glibc.malloc.hugetlb
> +This tunable controls the usage of Huge Pages on @code{malloc} calls.  The
> +default value is @code{0}, which disables any additional support on
> +@code{malloc}.
> +
> +Setting its value to @code{1} enables the use of @code{madvise} with
> +@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}.  It is enabled
> +only if the system supports Transparent Huge Page (currently only on Linux).
> +@end deftp

Ok.

> diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile
> +ifeq ($(subdir),malloc)
> +sysdep_malloc_debug_routines += malloc-hugepages
> +endif
> +
> +ifeq ($(subdir),misc)
> +sysdep_routines += malloc-hugepages
> +endif

Ok.

> diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
> +/* Huge Page support.  Generic implementation.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public License as
> +   published by the Free Software Foundation; either version 2.1 of the
> +   License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; see the file COPYING.LIB.  If
> +   not, see <https://www.gnu.org/licenses/>.  */
> +
> +#include <malloc-hugepages.h>
> +
> +unsigned long int
> +__malloc_default_thp_pagesize (void)
> +{
> +  return 0;
> +}
> +
> +enum malloc_thp_mode_t
> +__malloc_thp_mode (void)
> +{
> +  return malloc_thp_mode_not_supported;
> +}

Ok.

> diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
> +/* Malloc huge page support.  Generic implementation.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public License as
> +   published by the Free Software Foundation; either version 2.1 of the
> +   License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; see the file COPYING.LIB.  If
> +   not, see <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef _MALLOC_HUGEPAGES_H
> +#define _MALLOC_HUGEPAGES_H
> +
> +#include <stddef.h>
> +
> +/* Return the default transparent huge page size.  */
> +unsigned long int __malloc_default_thp_pagesize (void) attribute_hidden;
> +
> +enum malloc_thp_mode_t
> +{
> +  malloc_thp_mode_always,
> +  malloc_thp_mode_madvise,
> +  malloc_thp_mode_never,
> +  malloc_thp_mode_not_supported
> +};
> +
> +enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
> +
> +#endif /* _MALLOC_HUGEPAGES_H */

Ok.

> diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
> +/* Huge Page support.  Linux implementation.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public License as
> +   published by the Free Software Foundation; either version 2.1 of the
> +   License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; see the file COPYING.LIB.  If
> +   not, see <https://www.gnu.org/licenses/>.  */
> +
> +#include <intprops.h>
> +#include <malloc-hugepages.h>
> +#include <not-cancel.h>
> +

Ok.

> +unsigned long int
> +__malloc_default_thp_pagesize (void)
> +{
> +  int fd = __open64_nocancel (
> +    "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY);
> +  if (fd == -1)
> +    return 0;
> +
> +  char str[INT_BUFSIZE_BOUND (unsigned long int)];
> +  ssize_t s = __read_nocancel (fd, str, sizeof (str));
> +  __close_nocancel (fd);
> +  if (s < 0)
> +    return 0;
> +
> +  unsigned long int r = 0;
> +  for (ssize_t i = 0; i < s; i++)
> +    {
> +      if (str[i] == '\n')
> +	break;
> +      r *= 10;
> +      r += str[i] - '0';
> +    }
> +  return r;
> +}

Ok.

> +enum malloc_thp_mode_t
> +__malloc_thp_mode (void)
> +{
> +  int fd = __open64_nocancel ("/sys/kernel/mm/transparent_hugepage/enabled",
> +			      O_RDONLY);
> +  if (fd == -1)
> +    return malloc_thp_mode_not_supported;
> +
> +  static const char mode_always[]  = "[always] madvise never\n";
> +  static const char mode_madvise[] = "always [madvise] never\n";
> +  static const char mode_never[]   = "always madvise [never]\n";
> +
> +  char str[sizeof(mode_always)];
> +  ssize_t s = __read_nocancel (fd, str, sizeof (str));
> +  __close_nocancel (fd);
> +
> +  if (s == sizeof (mode_always) - 1)
> +    {
> +      if (strcmp (str, mode_always) == 0)
> +	return malloc_thp_mode_always;
> +      else if (strcmp (str, mode_madvise) == 0)
> +	return malloc_thp_mode_madvise;
> +      else if (strcmp (str, mode_never) == 0)
> +	return malloc_thp_mode_never;
> +    }
> +  return malloc_thp_mode_not_supported;
> +}

Ok.
Adhemerval Zanella Dec. 15, 2021, 12:12 p.m. UTC | #2
On 15/12/2021 00:06, DJ Delorie wrote:
> 
> Minor tweaks to a comment but otherwise LGTM
> Reviewed-by: DJ Delorie <dj@redhat.com>

Thanks.
> 
> Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:
>> +* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
>> +  make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
>> +  It might improve performance with Transparent Huge Pages madvise mode
>> +  depending of the workload.
> 
> Suggest replacing "It" with "Setting this" but it's just NEWS.  Ok.

Ack.

> 
>> diff --git a/Rules b/Rules
>> @@ -157,6 +157,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
>>         $(tests-container:%=$(objpfx)%.out) \
>>         $(tests-mcheck:%=$(objpfx)%-mcheck.out) \
>>         $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \
>> +       $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \
> 
> Ok.
> 
>>  tests-expected = $(tests) $(tests-internal) $(tests-printers) \
>>  	$(tests-container) $(tests-malloc-check:%=%-malloc-check) \
>> +	$(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
>>  	$(tests-mcheck:%=%-mcheck)
> 
> Ok.
> 
>> +binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)
> 
> Ok.
> 
>> +binaries-malloc-hugetlb1-tests =
> 
> Ok.
> 
>> +ifneq "$(strip $(binaries-malloc-hugetlb1-tests))" ""
>> +$(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o \
>> +  $(link-extra-libs-tests) \
>> +  $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
>> +  $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
>> +	$(+link-tests)
>> +endif
> 
> Adds build rules for new targets, ok.
> 
>> +# All malloc-hugetlb1 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=1
>> +define malloc-hugetlb1-ENVS
>> +$(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1
>> +endef
>> +$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t))))
> 
> Ok.
> 
> 
>> diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
>> +    hugetlb {
>> +      type: INT_32
>> +      minval: 0
>> +      maxval: 1
>> +    }
> 
> Ok.
> 
>> diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
>> +glibc.malloc.hugetlb: 0 (min: 0, max: 1)
> 
> Ok.
> 
>> diff --git a/malloc/Makefile b/malloc/Makefile
>>  
>> +# Run all testes with GLIBC_TUNABLE=glibc.malloc.hugetlb=1 that check the
>> +# Transparent Huge Pages support.  We need exclude some tests that define
>> +# the ENV vars.
>> +tests-exclude-hugetlb1 = \
>> +	tst-compathooks-off \
>> +	tst-compathooks-on \
>> +	tst-interpose-nothread \
>> +	tst-interpose-thread \
>> +	tst-interpose-static-nothread \
>> +	tst-interpose-static-thread \
>> +	tst-malloc-usable \
>> +	tst-malloc-usable-tunables \
>> +	tst-mallocstate
>> +tests-malloc-hugetlb1 = \
>> +	$(filter-out $(tests-exclude-hugetlb1), $(tests))
> 
> Ok.
> 
>> diff --git a/malloc/arena.c b/malloc/arena.c
>> +TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t)
> 
> Ok.
> 
>> +  TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb));
> 
> Ok.
> 
>> @@ -508,6 +510,9 @@ new_heap (size_t size, size_t top_pad)
>> +
>> +  madvise_thp (p2, size);
> 
> Ok.
> 
>> diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h
>> +#include <malloc-hugepages.h>
> 
> Ok.
> 
>> diff --git a/malloc/malloc.c b/malloc/malloc.c
>> +#if HAVE_TUNABLES
>> +  /* Transparent Large Page support.  */
>> +  INTERNAL_SIZE_T thp_pagesize;
>> +#endif
> 
> Ok.
> 
>> @@ -2008,6 +2013,20 @@ free_perturb (char *p, size_t n)
>> +/* ----------- Routines dealing with transparent huge pages ----------- */
>> +
>> +static inline void
>> +madvise_thp (void *p, INTERNAL_SIZE_T size)
>> +{
>> +#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
>> +  /* Do not consider areas smaller than a huge page or if the tunable is
>> +     not active.  */
>> +  if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize)
>> +    return;
>> +  __madvise (p, size, MADV_HUGEPAGE);
>> +#endif
>> +}
> 
> Ok.
> 
>> +	      madvise_thp (mm, size);
> 
> Ok.
> 
>>        if (size > 0)
>>          {
>>            brk = (char *) (MORECORE (size));
>> +	  if (brk != (char *) (MORECORE_FAILURE))
>> +	    madvise_thp (brk, size);
>>            LIBC_PROBE (memory_sbrk_more, 2, brk, size);
>>          }
> 
> Ok.
> 
>>                if (mbrk != MAP_FAILED)
>>                  {
>> +		  madvise_thp (mbrk, size);
> 
> Ok.
> 
>> +		  else
>> +		    madvise_thp (snd_brk, correction);
> 
> Ok.
> 
>> @@ -2988,6 +3015,8 @@ mremap_chunk (mchunkptr p, size_t new_size)
>>    if (cp == MAP_FAILED)
>>      return 0;
>>  
>> +  madvise_thp (cp, new_size);
> 
> Ok.
> 
>> +#if HAVE_TUNABLES
>> +static __always_inline int
>> +do_set_hugetlb (int32_t value)
>> +{
>> +  if (value == 1)
>> +    {
>> +      enum malloc_thp_mode_t thp_mode = __malloc_thp_mode ();
>> +      /*
>> +	 Only enables THP usage is system does support it and has at least
>> +	 always or madvise mode.  Otherwise the madvise() call is wasteful.
>> +       */
>> +      if (thp_mode == malloc_thp_mode_madvise)
>> +	mp_.thp_pagesize = __malloc_default_thp_pagesize ();
>> +    }
>> +  return 0;
>> +}
>> +#endif
> 
> s/is/if/
> 
> The "always or madvise mode" comment doesn't match the "== madvise"
> logic.  I suspect the logic is ok, so... ok with fixed comment.

Indeed setting with 'always' does not make sense, khugepage kthread will
always scan all anonymous memory so issuing madvise will be just a wasted
syscall.  I changed to:

  /*
     Only enable THP madvise usage if system does support it and
     has 'madvise' mode.  Otherwise the madvise() call is wasteful. 
   */

> 
>> diff --git a/manual/tunables.texi b/manual/tunables.texi
>> +@deftp Tunable glibc.malloc.hugetlb
>> +This tunable controls the usage of Huge Pages on @code{malloc} calls.  The
>> +default value is @code{0}, which disables any additional support on
>> +@code{malloc}.
>> +
>> +Setting its value to @code{1} enables the use of @code{madvise} with
>> +@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}.  It is enabled
>> +only if the system supports Transparent Huge Page (currently only on Linux).
>> +@end deftp
> 
> Ok.
> 
>> diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile
>> +ifeq ($(subdir),malloc)
>> +sysdep_malloc_debug_routines += malloc-hugepages
>> +endif
>> +
>> +ifeq ($(subdir),misc)
>> +sysdep_routines += malloc-hugepages
>> +endif
> 
> Ok.
> 
>> diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
>> +/* Huge Page support.  Generic implementation.
>> +   Copyright (C) 2021 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public License as
>> +   published by the Free Software Foundation; either version 2.1 of the
>> +   License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; see the file COPYING.LIB.  If
>> +   not, see <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <malloc-hugepages.h>
>> +
>> +unsigned long int
>> +__malloc_default_thp_pagesize (void)
>> +{
>> +  return 0;
>> +}
>> +
>> +enum malloc_thp_mode_t
>> +__malloc_thp_mode (void)
>> +{
>> +  return malloc_thp_mode_not_supported;
>> +}
> 
> Ok.
> 
>> diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
>> +/* Malloc huge page support.  Generic implementation.
>> +   Copyright (C) 2021 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public License as
>> +   published by the Free Software Foundation; either version 2.1 of the
>> +   License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; see the file COPYING.LIB.  If
>> +   not, see <https://www.gnu.org/licenses/>.  */
>> +
>> +#ifndef _MALLOC_HUGEPAGES_H
>> +#define _MALLOC_HUGEPAGES_H
>> +
>> +#include <stddef.h>
>> +
>> +/* Return the default transparent huge page size.  */
>> +unsigned long int __malloc_default_thp_pagesize (void) attribute_hidden;
>> +
>> +enum malloc_thp_mode_t
>> +{
>> +  malloc_thp_mode_always,
>> +  malloc_thp_mode_madvise,
>> +  malloc_thp_mode_never,
>> +  malloc_thp_mode_not_supported
>> +};
>> +
>> +enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
>> +
>> +#endif /* _MALLOC_HUGEPAGES_H */
> 
> Ok.
> 
>> diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> +/* Huge Page support.  Linux implementation.
>> +   Copyright (C) 2021 Free Software Foundation, Inc.
>> +   This file is part of the GNU C Library.
>> +
>> +   The GNU C Library is free software; you can redistribute it and/or
>> +   modify it under the terms of the GNU Lesser General Public License as
>> +   published by the Free Software Foundation; either version 2.1 of the
>> +   License, or (at your option) any later version.
>> +
>> +   The GNU C Library is distributed in the hope that it will be useful,
>> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> +   Lesser General Public License for more details.
>> +
>> +   You should have received a copy of the GNU Lesser General Public
>> +   License along with the GNU C Library; see the file COPYING.LIB.  If
>> +   not, see <https://www.gnu.org/licenses/>.  */
>> +
>> +#include <intprops.h>
>> +#include <malloc-hugepages.h>
>> +#include <not-cancel.h>
>> +
> 
> Ok.
> 
>> +unsigned long int
>> +__malloc_default_thp_pagesize (void)
>> +{
>> +  int fd = __open64_nocancel (
>> +    "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY);
>> +  if (fd == -1)
>> +    return 0;
>> +
>> +  char str[INT_BUFSIZE_BOUND (unsigned long int)];
>> +  ssize_t s = __read_nocancel (fd, str, sizeof (str));
>> +  __close_nocancel (fd);
>> +  if (s < 0)
>> +    return 0;
>> +
>> +  unsigned long int r = 0;
>> +  for (ssize_t i = 0; i < s; i++)
>> +    {
>> +      if (str[i] == '\n')
>> +	break;
>> +      r *= 10;
>> +      r += str[i] - '0';
>> +    }
>> +  return r;
>> +}
> 
> Ok.
> 
>> +enum malloc_thp_mode_t
>> +__malloc_thp_mode (void)
>> +{
>> +  int fd = __open64_nocancel ("/sys/kernel/mm/transparent_hugepage/enabled",
>> +			      O_RDONLY);
>> +  if (fd == -1)
>> +    return malloc_thp_mode_not_supported;
>> +
>> +  static const char mode_always[]  = "[always] madvise never\n";
>> +  static const char mode_madvise[] = "always [madvise] never\n";
>> +  static const char mode_never[]   = "always madvise [never]\n";
>> +
>> +  char str[sizeof(mode_always)];
>> +  ssize_t s = __read_nocancel (fd, str, sizeof (str));
>> +  __close_nocancel (fd);
>> +
>> +  if (s == sizeof (mode_always) - 1)
>> +    {
>> +      if (strcmp (str, mode_always) == 0)
>> +	return malloc_thp_mode_always;
>> +      else if (strcmp (str, mode_madvise) == 0)
>> +	return malloc_thp_mode_madvise;
>> +      else if (strcmp (str, mode_never) == 0)
>> +	return malloc_thp_mode_never;
>> +    }
>> +  return malloc_thp_mode_not_supported;
>> +}
> 
> Ok.
>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index b53f230cca..589dea4ac3 100644
--- a/NEWS
+++ b/NEWS
@@ -91,6 +91,11 @@  Major new features:
   --enable-static-pie, which no longer has any effect on the build
   configuration.
 
+* On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
+  make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
+  It might improve performance with Transparent Huge Pages madvise mode
+  depending of the workload.
+
 Deprecated and removed features, and other changes affecting compatibility:
 
 * The LD_PREFER_MAP_32BIT_EXEC environment variable support has been
diff --git a/Rules b/Rules
index b1137afe71..471458ad4a 100644
--- a/Rules
+++ b/Rules
@@ -157,6 +157,7 @@  tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
        $(tests-container:%=$(objpfx)%.out) \
        $(tests-mcheck:%=$(objpfx)%-mcheck.out) \
        $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \
+       $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \
        $(tests-special) $(tests-printers-out)
 xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
 endif
@@ -168,6 +169,7 @@  tests-expected =
 else
 tests-expected = $(tests) $(tests-internal) $(tests-printers) \
 	$(tests-container) $(tests-malloc-check:%=%-malloc-check) \
+	$(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
 	$(tests-mcheck:%=%-mcheck)
 endif
 tests:
@@ -196,6 +198,7 @@  binaries-pie-notests =
 endif
 binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
 binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check)
+binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)
 else
 binaries-all-notests =
 binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
@@ -207,6 +210,7 @@  binaries-pie-tests =
 binaries-pie-notests =
 binaries-mcheck-tests =
 binaries-malloc-check-tests =
+binaries-malloc-hugetlb1-tests =
 endif
 
 binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
@@ -247,6 +251,14 @@  $(addprefix $(objpfx),$(binaries-malloc-check-tests)): %-malloc-check: %.o \
 	$(+link-tests)
 endif
 
+ifneq "$(strip $(binaries-malloc-hugetlb1-tests))" ""
+$(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o \
+  $(link-extra-libs-tests) \
+  $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
+  $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
+	$(+link-tests)
+endif
+
 ifneq "$(strip $(binaries-pie-tests))" ""
 $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
   $(link-extra-libs-tests) \
@@ -284,6 +296,13 @@  $(1)-malloc-check-ENV = MALLOC_CHECK_=3 \
 endef
 $(foreach t,$(tests-malloc-check),$(eval $(call malloc-check-ENVS,$(t))))
 
+# All malloc-hugetlb1 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=1
+define malloc-hugetlb1-ENVS
+$(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1
+endef
+$(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t))))
+
+
 # mcheck tests need the debug DSO to support -lmcheck.
 define mcheck-ENVS
 $(1)-mcheck-ENV = LD_PRELOAD=$(common-objpfx)/malloc/libc_malloc_debug.so
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index 46ffb23784..5e830403b4 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -92,6 +92,11 @@  glibc {
       minval: 0
       security_level: SXID_IGNORE
     }
+    hugetlb {
+      type: INT_32
+      minval: 0
+      maxval: 1
+    }
   }
   cpu {
     hwcap_mask {
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
index 9bf572715f..2acc296c15 100644
--- a/elf/tst-rtld-list-tunables.exp
+++ b/elf/tst-rtld-list-tunables.exp
@@ -1,6 +1,7 @@ 
 glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
 glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
 glibc.malloc.check: 0 (min: 0, max: 3)
+glibc.malloc.hugetlb: 0 (min: 0, max: 1)
 glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
 glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
diff --git a/malloc/Makefile b/malloc/Makefile
index 63cd7c0734..e47fd660f6 100644
--- a/malloc/Makefile
+++ b/malloc/Makefile
@@ -78,6 +78,22 @@  tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
 tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
 				  $(tests-static),$(tests))
 
+# Run all testes with GLIBC_TUNABLE=glibc.malloc.hugetlb=1 that check the
+# Transparent Huge Pages support.  We need exclude some tests that define
+# the ENV vars.
+tests-exclude-hugetlb1 = \
+	tst-compathooks-off \
+	tst-compathooks-on \
+	tst-interpose-nothread \
+	tst-interpose-thread \
+	tst-interpose-static-nothread \
+	tst-interpose-static-thread \
+	tst-malloc-usable \
+	tst-malloc-usable-tunables \
+	tst-mallocstate
+tests-malloc-hugetlb1 = \
+	$(filter-out $(tests-exclude-hugetlb1), $(tests))
+
 # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24.
 ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes)
 # Tests that don't play well with mcheck.  They are either bugs in mcheck or
diff --git a/malloc/arena.c b/malloc/arena.c
index 78ef4cf18c..cd00c7bef4 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -230,6 +230,7 @@  TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
 TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
 #endif
 TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
+TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t)
 #else
 /* Initialization routine. */
 #include <string.h>
@@ -330,6 +331,7 @@  ptmalloc_init (void)
 	       TUNABLE_CALLBACK (set_tcache_unsorted_limit));
 # endif
   TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
+  TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb));
 #else
   if (__glibc_likely (_environ != NULL))
     {
@@ -508,6 +510,9 @@  new_heap (size_t size, size_t top_pad)
       __munmap (p2, HEAP_MAX_SIZE);
       return 0;
     }
+
+  madvise_thp (p2, size);
+
   h = (heap_info *) p2;
   h->size = size;
   h->mprotect_size = size;
diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h
index 0c7b5a183c..7493e34d86 100644
--- a/malloc/malloc-internal.h
+++ b/malloc/malloc-internal.h
@@ -22,6 +22,7 @@ 
 #include <malloc-machine.h>
 #include <malloc-sysdep.h>
 #include <malloc-size.h>
+#include <malloc-hugepages.h>
 
 /* Called in the parent process before a fork.  */
 void __malloc_fork_lock_parent (void) attribute_hidden;
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 095d97a3be..b8103aaf10 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1880,6 +1880,11 @@  struct malloc_par
   INTERNAL_SIZE_T arena_test;
   INTERNAL_SIZE_T arena_max;
 
+#if HAVE_TUNABLES
+  /* Transparent Large Page support.  */
+  INTERNAL_SIZE_T thp_pagesize;
+#endif
+
   /* Memory map support */
   int n_mmaps;
   int n_mmaps_max;
@@ -2008,6 +2013,20 @@  free_perturb (char *p, size_t n)
 
 #include <stap-probe.h>
 
+/* ----------- Routines dealing with transparent huge pages ----------- */
+
+static inline void
+madvise_thp (void *p, INTERNAL_SIZE_T size)
+{
+#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
+  /* Do not consider areas smaller than a huge page or if the tunable is
+     not active.  */
+  if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize)
+    return;
+  __madvise (p, size, MADV_HUGEPAGE);
+#endif
+}
+
 /* ------------------- Support for multiple arenas -------------------- */
 #include "arena.c"
 
@@ -2445,6 +2464,8 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
 
           if (mm != MAP_FAILED)
             {
+	      madvise_thp (mm, size);
+
               /*
                  The offset to the start of the mmapped region is stored
                  in the prev_size field of the chunk. This allows us to adjust
@@ -2606,6 +2627,8 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
       if (size > 0)
         {
           brk = (char *) (MORECORE (size));
+	  if (brk != (char *) (MORECORE_FAILURE))
+	    madvise_thp (brk, size);
           LIBC_PROBE (memory_sbrk_more, 2, brk, size);
         }
 
@@ -2637,6 +2660,8 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
 
               if (mbrk != MAP_FAILED)
                 {
+		  madvise_thp (mbrk, size);
+
                   /* We do not need, and cannot use, another sbrk call to find end */
                   brk = mbrk;
                   snd_brk = brk + size;
@@ -2748,6 +2773,8 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
                       correction = 0;
                       snd_brk = (char *) (MORECORE (0));
                     }
+		  else
+		    madvise_thp (snd_brk, correction);
                 }
 
               /* handle non-contiguous cases */
@@ -2988,6 +3015,8 @@  mremap_chunk (mchunkptr p, size_t new_size)
   if (cp == MAP_FAILED)
     return 0;
 
+  madvise_thp (cp, new_size);
+
   p = (mchunkptr) (cp + offset);
 
   assert (aligned_OK (chunk2mem (p)));
@@ -5316,6 +5345,24 @@  do_set_mxfast (size_t value)
   return 0;
 }
 
+#if HAVE_TUNABLES
+static __always_inline int
+do_set_hugetlb (int32_t value)
+{
+  if (value == 1)
+    {
+      enum malloc_thp_mode_t thp_mode = __malloc_thp_mode ();
+      /*
+	 Only enables THP usage is system does support it and has at least
+	 always or madvise mode.  Otherwise the madvise() call is wasteful.
+       */
+      if (thp_mode == malloc_thp_mode_madvise)
+	mp_.thp_pagesize = __malloc_default_thp_pagesize ();
+    }
+  return 0;
+}
+#endif
+
 int
 __libc_mallopt (int param_number, int value)
 {
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 5d50b90f64..7f704e9b37 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -270,6 +270,16 @@  pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size
 passed to @code{malloc} for the largest bin size to enable.
 @end deftp
 
+@deftp Tunable glibc.malloc.hugetlb
+This tunable controls the usage of Huge Pages on @code{malloc} calls.  The
+default value is @code{0}, which disables any additional support on
+@code{malloc}.
+
+Setting its value to @code{1} enables the use of @code{madvise} with
+@code{MADV_HUGEPAGE} after memory allocation with @code{mmap}.  It is enabled
+only if the system supports Transparent Huge Page (currently only on Linux).
+@end deftp
+
 @node Dynamic Linking Tunables
 @section Dynamic Linking Tunables
 @cindex dynamic linking tunables
diff --git a/sysdeps/generic/Makefile b/sysdeps/generic/Makefile
index a209e85cc4..8eef83c94d 100644
--- a/sysdeps/generic/Makefile
+++ b/sysdeps/generic/Makefile
@@ -27,3 +27,11 @@  sysdep_routines += framestate unwind-pe
 shared-only-routines += framestate unwind-pe
 endif
 endif
+
+ifeq ($(subdir),malloc)
+sysdep_malloc_debug_routines += malloc-hugepages
+endif
+
+ifeq ($(subdir),misc)
+sysdep_routines += malloc-hugepages
+endif
diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
new file mode 100644
index 0000000000..8fb459a263
--- /dev/null
+++ b/sysdeps/generic/malloc-hugepages.c
@@ -0,0 +1,31 @@ 
+/* Huge Page support.  Generic implementation.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <https://www.gnu.org/licenses/>.  */
+
+#include <malloc-hugepages.h>
+
+unsigned long int
+__malloc_default_thp_pagesize (void)
+{
+  return 0;
+}
+
+enum malloc_thp_mode_t
+__malloc_thp_mode (void)
+{
+  return malloc_thp_mode_not_supported;
+}
diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
new file mode 100644
index 0000000000..f5a442e328
--- /dev/null
+++ b/sysdeps/generic/malloc-hugepages.h
@@ -0,0 +1,37 @@ 
+/* Malloc huge page support.  Generic implementation.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <https://www.gnu.org/licenses/>.  */
+
+#ifndef _MALLOC_HUGEPAGES_H
+#define _MALLOC_HUGEPAGES_H
+
+#include <stddef.h>
+
+/* Return the default transparent huge page size.  */
+unsigned long int __malloc_default_thp_pagesize (void) attribute_hidden;
+
+enum malloc_thp_mode_t
+{
+  malloc_thp_mode_always,
+  malloc_thp_mode_madvise,
+  malloc_thp_mode_never,
+  malloc_thp_mode_not_supported
+};
+
+enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
+
+#endif /* _MALLOC_HUGEPAGES_H */
diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
new file mode 100644
index 0000000000..7497e07260
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
@@ -0,0 +1,74 @@ 
+/* Huge Page support.  Linux implementation.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, see <https://www.gnu.org/licenses/>.  */
+
+#include <intprops.h>
+#include <malloc-hugepages.h>
+#include <not-cancel.h>
+
+unsigned long int
+__malloc_default_thp_pagesize (void)
+{
+  int fd = __open64_nocancel (
+    "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY);
+  if (fd == -1)
+    return 0;
+
+  char str[INT_BUFSIZE_BOUND (unsigned long int)];
+  ssize_t s = __read_nocancel (fd, str, sizeof (str));
+  __close_nocancel (fd);
+  if (s < 0)
+    return 0;
+
+  unsigned long int r = 0;
+  for (ssize_t i = 0; i < s; i++)
+    {
+      if (str[i] == '\n')
+	break;
+      r *= 10;
+      r += str[i] - '0';
+    }
+  return r;
+}
+
+enum malloc_thp_mode_t
+__malloc_thp_mode (void)
+{
+  int fd = __open64_nocancel ("/sys/kernel/mm/transparent_hugepage/enabled",
+			      O_RDONLY);
+  if (fd == -1)
+    return malloc_thp_mode_not_supported;
+
+  static const char mode_always[]  = "[always] madvise never\n";
+  static const char mode_madvise[] = "always [madvise] never\n";
+  static const char mode_never[]   = "always madvise [never]\n";
+
+  char str[sizeof(mode_always)];
+  ssize_t s = __read_nocancel (fd, str, sizeof (str));
+  __close_nocancel (fd);
+
+  if (s == sizeof (mode_always) - 1)
+    {
+      if (strcmp (str, mode_always) == 0)
+	return malloc_thp_mode_always;
+      else if (strcmp (str, mode_madvise) == 0)
+	return malloc_thp_mode_madvise;
+      else if (strcmp (str, mode_never) == 0)
+	return malloc_thp_mode_never;
+    }
+  return malloc_thp_mode_not_supported;
+}