[v5,4/7] malloc: Add Huge Page support for mmap()

Message ID 20211214185806.4109231-5-adhemerval.zanella@linaro.org
State Committed
Headers
Series malloc: Improve Huge Page support |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella Dec. 14, 2021, 6:58 p.m. UTC
  With the morecore hook removed, there is not easy way to provide huge
pages support on with glibc allocator without resorting to transparent
huge pages.  And some users and programs do prefer to use the huge pages
directly instead of THP for multiple reasons: no splitting, re-merging
by the VM, no TLB shootdowns for running processes, fast allocation
from the reserve pool, no competition with the rest of the processes
unlike THP, no swapping all, etc.

This patch extends the 'glibc.malloc.hugetlb' tunable: the value
'2' means to use huge pages directly with the system default size,
while a positive value means and specific page size that is matched
against the supported ones by the system.

Currently only memory allocated on sysmalloc() is handled, the arenas
still uses the default system page size.

To test is a new rule is added tests-malloc-hugetlb2, which run the
addes tests with the required GLIBC_TUNABLE setting.  On systems without
a reserved huge pages pool, is just stress the mmap(MAP_HUGETLB)
allocation failure.  To improve test coverage it is required to create
a pool with some allocated pages.

Checked on x86_64-linux-gnu.
---
 NEWS                                       |   8 +-
 Rules                                      |  17 +++
 elf/dl-tunables.list                       |   3 +-
 elf/tst-rtld-list-tunables.exp             |   2 +-
 malloc/Makefile                            |   8 +-
 malloc/malloc.c                            |  28 ++++-
 manual/tunables.texi                       |   7 ++
 sysdeps/generic/malloc-hugepages.c         |   8 ++
 sysdeps/generic/malloc-hugepages.h         |   7 ++
 sysdeps/unix/sysv/linux/malloc-hugepages.c | 127 +++++++++++++++++++++
 10 files changed, 203 insertions(+), 12 deletions(-)
  

Comments

DJ Delorie Dec. 15, 2021, 4:26 a.m. UTC | #1
A few comment tweaks.
One logic question.

Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:
> diff --git a/NEWS b/NEWS
> index 589dea4ac3..1b437a0f3a 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -92,9 +92,11 @@ Major new features:
>    configuration.
>  
>  * On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
> -  make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
> -  It might improve performance with Transparent Huge Pages madvise mode
> -  depending of the workload.
> +  either make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk
> +  or to use huge pages directly with mmap calls with the MAP_HUGETLB
> +  flags).  The former can improve performance when Transparent Huge Pages
> +  is set to 'madvise' mode while the latter uses the system reserved
> +  huge pages.

Ok.

> diff --git a/Rules b/Rules
>         $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \
> +       $(tests-malloc-hugetlb2:%=$(objpfx)%-malloc-hugetlb2.out) \

Ok.

>  	$(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
> +	$(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) \

Ok.

> @@ -199,6 +201,7 @@ endif
>  binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)
> +binaries-malloc-hugetlb2-tests = $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2)

Ok.

>  binaries-malloc-hugetlb1-tests =
> +binaries-malloc-hugetlb2-tests =

Ok.

> +ifneq "$(strip $(binaries-malloc-hugetlb2-tests))" ""
> +$(addprefix $(objpfx),$(binaries-malloc-hugetlb2-tests)): %-malloc-hugetlb2: %.o \
> +  $(link-extra-libs-tests) \
> +  $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
> +  $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
> +	$(+link-tests)
> +endif

Ok.

> +# All malloc-hugetlb2 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=2
> +define malloc-hugetlb2-ENVS
> +$(1)-malloc-hugetlb2-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=2
> +endef
> +$(foreach t,$(tests-malloc-hugetlb2),$(eval $(call malloc-hugetlb2-ENVS,$(t))))

Ok.

> diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
>      hugetlb {
> -      type: INT_32
> +      type: SIZE_T
>        minval: 0
> -      maxval: 1
>      }

Ok.

> diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
> -glibc.malloc.hugetlb: 0 (min: 0, max: 1)
> +glibc.malloc.hugetlb: 0x0 (min: 0x0, max: 0x[f]+)

Ok.

> diff --git a/malloc/Makefile b/malloc/Makefile

> -# Run all testes with GLIBC_TUNABLE=glibc.malloc.hugetlb=1 that check the
> -# Transparent Huge Pages support.  We need exclude some tests that define
> -# the ENV vars.
> +# Run all tests with GLIBC_TUNABLE=glibc.malloc.hugetlb={1,2} which check
> +# the Transparent Huge Pages support (1) or automatic huge page support (2).
> +# We need exclude some tests that define the ENV vars.

Ok.

>  	tst-mallocstate
>  tests-malloc-hugetlb1 = \
>  	$(filter-out $(tests-exclude-hugetlb1), $(tests))
> +tests-malloc-hugetlb2 = \
> +	$(filter-out $(tests-exclude-hugetlb1), $(tests))

Ok.

> diff --git a/malloc/malloc.c b/malloc/malloc.c
>  #if HAVE_TUNABLES
>    /* Transparent Large Page support.  */
>    INTERNAL_SIZE_T thp_pagesize;
> +  /* A value different than 0 means to align mmap allocation to hp_pagesize
> +     add hp_flags on flags.  */
> +  INTERNAL_SIZE_T hp_pagesize;
> +  int hp_flags;
>  #endif

Ok.

> -  madvise_thp (mm, size);
> +#ifdef MAP_HUGETLB
> +  if (!(extra_flags & MAP_HUGETLB))
> +    madvise_thp (mm, size);
> +#endif

Ok.

> @@ -2528,7 +2535,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>        || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
>  	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
>      {
> -      char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
> +      char *mm;
> +#if HAVE_TUNABLES
> +      if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize)
> +	{
> +	  /* There is no need to isse the THP madvise call if Huge Pages are
> +	     used directly.  */
> +	  mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av);
> +	  if (mm != MAP_FAILED)
> +	    return mm;
> +	}
> +#endif
> +      mm = sysmalloc_mmap (nb, pagesize, 0, av);

Ok.

> @@ -2609,7 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>          }
>        else if (!tried_mmap)
>  	{
> -	  /* We can at least try to use to mmap memory.  */
> +	  /* We can at least try to use to mmap memory.  If new_heap fails
> +	     it is unlikely that trying to allocage huge page will succeed.  */

s/allocage/allocate/

"huge page" should either be "a huge page" or "huge pages"

> @@ -5395,6 +5414,9 @@ do_set_hugetlb (int32_t value)
>        if (thp_mode == malloc_thp_mode_madvise)
>  	mp_.thp_pagesize = __malloc_default_thp_pagesize ();
>      }
> +  else if (value >= 2)
> +    __malloc_hugepage_config (value == 2 ? 0 : value, &mp_.hp_pagesize,
> +			      &mp_.hp_flags);
>    return 0;
>  }

Ok.

> diff --git a/manual/tunables.texi b/manual/tunables.texi
>  Setting its value to @code{1} enables the use of @code{madvise} with
>  @code{MADV_HUGEPAGE} after memory allocation with @code{mmap}.  It is enabled
>  only if the system supports Transparent Huge Page (currently only on Linux).
> +
> +Setting its value to @code{2} enables the use of Huge Page directly with
> +@code{mmap} with the use of @code{MAP_HUGETLB} flag.  The huge page size
> +to use will be the default one provided by the system.  A value larger than
> +@code{2} specifies huge page size, which will be matched against the system
> +supported ones.  If provided value is invalid, @code{MAP_HUGETLB} will not
> +be used.

Ok.


> diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
> @@ -29,3 +29,11 @@ __malloc_thp_mode (void)
>  {
>    return malloc_thp_mode_not_supported;
>  }
> +
> +/* Return the default transparent huge page size.  */
> +void
> +__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
> +{
> +  *pagesize = 0;
> +  *flags = 0;
> +}

Ok.

> diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
> +/* Return the support huge page size from the REQUESTED sizes on PAGESIZE
> +   along with the required extra mmap flags on FLAGS,  Requesting the value
> +   of 0 returns the default huge page size, otherwise the value will be
> +   matched against the supported on by the system.  */
> +void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
> +     attribute_hidden;

s/support/supported/
s/supported on by/sizes supported by/

> diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
> index 7497e07260..120c78b42a 100644
> --- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
> +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
> @@ -17,8 +17,10 @@
>     not, see <https://www.gnu.org/licenses/>.  */
>  
>  #include <intprops.h>
> +#include <dirent.h>
>  #include <malloc-hugepages.h>
>  #include <not-cancel.h>
> +#include <sys/mman.h>

Ok.

> @@ -72,3 +74,128 @@ __malloc_thp_mode (void)
> +static size_t
> +malloc_default_hugepage_size (void)
> +{
> +  int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
> +  if (fd == -1)
> +    return 0;
> +
> +  size_t hpsize = 0;
> +
> +  char buf[512];
> +  off64_t off = 0;
> +  while (1)
> +    {
> +      ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
> +      if (r < 0)
> +	break;
> +      buf[r - 1] = '\0';

This always overwrites the last byte of the file, shouldn't this be
buf[r] ?

> +      /* If the tag is not found, read the last line again.  */
> +      const char *s = strstr (buf, "Hugepagesize:");
> +      if (s == NULL)
> +	{
> +	  char *nl = strrchr (buf, '\n');
> +	  if (nl == NULL)
> +	    break;
> +	  off += (nl + 1) - buf;
> +	  continue;
> +	}
> +
> +      /* The default huge page size is in the form:
> +	 Hugepagesize:       NUMBER kB  */
> +      s += sizeof ("Hugepagesize: ") - 1;
> +      for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
> +	{
> +	  if (s[i] == ' ')
> +	    continue;
> +	  hpsize *= 10;
> +	  hpsize += s[i] - '0';
> +	}
> +      hpsize *= 1024;
> +      break;
> +    }
> +
> +  __close_nocancel (fd);
> +
> +  return hpsize;
> +}

Ok.

> +static inline int
> +hugepage_flags (size_t pagesize)
> +{
> +  return MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
> +}

Ok.

> +void
> +__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
> +{
> +  *pagesize = 0;
> +  *flags = 0;
> +
> +  if (requested == 0)
> +    {
> +      *pagesize = malloc_default_hugepage_size ();
> +      if (pagesize != 0)
> +	*flags = hugepage_flags (*pagesize);
> +      return;
> +    }

Ok.

> +  /* Each entry represents a supported huge page in the form of:
> +     hugepages-<size>kB.  */
> +  int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
> +				 O_RDONLY | O_DIRECTORY, 0);
> +  if (dirfd == -1)
> +    return;
> +
> +  char buffer[1024];
> +  while (true)
> +    {
> +#if !IS_IN(libc)
> +# define __getdents64 getdents64
> +#endif
> +      ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
> +      if (ret == -1)
> +	break;
> +      else if (ret == 0)
> +        break;

Ok.

> +
> +      bool found = false;
> +      char *begin = buffer, *end = buffer + ret;
> +      while (begin != end)
> +        {
> +          unsigned short int d_reclen;
> +          memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
> +                  sizeof (d_reclen));

Because alignment; ok.

> +          const char *dname = begin + offsetof (struct dirent64, d_name);
> +          begin += d_reclen;
> +
> +          if (dname[0] == '.'
> +	      || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
> +            continue;
> +

Ok.

> +	  size_t hpsize = 0;
> +	  const char *sizestr = dname + sizeof ("hugepages-") - 1;
> +	  for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
> +	    {
> +	      hpsize *= 10;
> +	      hpsize += sizestr[i] - '0';
> +	    }
> +	  hpsize *= 1024;

Ok.

> +	  if (hpsize == requested)
> +	    {
> +	      *pagesize = hpsize;
> +	      *flags = hugepage_flags (*pagesize);
> +	      found = true;
> +	      break;
> +	    }
> +        }
> +      if (found)
> +	break;
> +    }
> +
> +  __close_nocancel (dirfd);
> +}

Ok.
  
Adhemerval Zanella Dec. 15, 2021, 1:08 p.m. UTC | #2
On 15/12/2021 01:26, DJ Delorie wrote:
> 
> A few comment tweaks.
> One logic question.
> 
> Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:
> 
>> @@ -2609,7 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>>          }
>>        else if (!tried_mmap)
>>  	{
>> -	  /* We can at least try to use to mmap memory.  */
>> +	  /* We can at least try to use to mmap memory.  If new_heap fails
>> +	     it is unlikely that trying to allocage huge page will succeed.  */
> 
> s/allocage/allocate/

Ack.

> 
> "huge page" should either be "a huge page" or "huge pages"

Ack.

>> diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
>> +/* Return the support huge page size from the REQUESTED sizes on PAGESIZE
>> +   along with the required extra mmap flags on FLAGS,  Requesting the value
>> +   of 0 returns the default huge page size, otherwise the value will be
>> +   matched against the supported on by the system.  */
>> +void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
>> +     attribute_hidden;
> 
> s/support/supported/
> s/supported on by/sizes supported by/
> 

Ack.

>> diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> index 7497e07260..120c78b42a 100644
>> --- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> @@ -17,8 +17,10 @@
>>     not, see <https://www.gnu.org/licenses/>.  */
>>  
>>  #include <intprops.h>
>> +#include <dirent.h>
>>  #include <malloc-hugepages.h>
>>  #include <not-cancel.h>
>> +#include <sys/mman.h>
> 
> Ok.
> 
>> @@ -72,3 +74,128 @@ __malloc_thp_mode (void)
>> +static size_t
>> +malloc_default_hugepage_size (void)
>> +{
>> +  int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
>> +  if (fd == -1)
>> +    return 0;
>> +
>> +  size_t hpsize = 0;
>> +
>> +  char buf[512];
>> +  off64_t off = 0;
>> +  while (1)
>> +    {
>> +      ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
>> +      if (r < 0)
>> +	break;
>> +      buf[r - 1] = '\0';
> 
> This always overwrites the last byte of the file, shouldn't this be
> buf[r] ?

Yes, I have fixed it.

Is this patch ok with the above fix?
  
DJ Delorie Dec. 15, 2021, 5:43 p.m. UTC | #3
Adhemerval Zanella <adhemerval.zanella@linaro.org> writes:
>>> +      buf[r - 1] = '\0';
>> 
>> This always overwrites the last byte of the file, shouldn't this be
>> buf[r] ?
>
> Yes, I have fixed it.
>
> Is this patch ok with the above fix?

Yes.

Reviewed-by: DJ Delorie <dj@redhat.com>
  

Patch

diff --git a/NEWS b/NEWS
index 589dea4ac3..1b437a0f3a 100644
--- a/NEWS
+++ b/NEWS
@@ -92,9 +92,11 @@  Major new features:
   configuration.
 
 * On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
-  make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
-  It might improve performance with Transparent Huge Pages madvise mode
-  depending of the workload.
+  either make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk
+  or to use huge pages directly with mmap calls with the MAP_HUGETLB
+  flags).  The former can improve performance when Transparent Huge Pages
+  is set to 'madvise' mode while the latter uses the system reserved
+  huge pages.
 
 Deprecated and removed features, and other changes affecting compatibility:
 
diff --git a/Rules b/Rules
index 471458ad4a..542a37eef0 100644
--- a/Rules
+++ b/Rules
@@ -158,6 +158,7 @@  tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
        $(tests-mcheck:%=$(objpfx)%-mcheck.out) \
        $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \
        $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \
+       $(tests-malloc-hugetlb2:%=$(objpfx)%-malloc-hugetlb2.out) \
        $(tests-special) $(tests-printers-out)
 xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
 endif
@@ -170,6 +171,7 @@  else
 tests-expected = $(tests) $(tests-internal) $(tests-printers) \
 	$(tests-container) $(tests-malloc-check:%=%-malloc-check) \
 	$(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
+	$(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) \
 	$(tests-mcheck:%=%-mcheck)
 endif
 tests:
@@ -199,6 +201,7 @@  endif
 binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
 binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check)
 binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)
+binaries-malloc-hugetlb2-tests = $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2)
 else
 binaries-all-notests =
 binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
@@ -211,6 +214,7 @@  binaries-pie-notests =
 binaries-mcheck-tests =
 binaries-malloc-check-tests =
 binaries-malloc-hugetlb1-tests =
+binaries-malloc-hugetlb2-tests =
 endif
 
 binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
@@ -259,6 +263,14 @@  $(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o
 	$(+link-tests)
 endif
 
+ifneq "$(strip $(binaries-malloc-hugetlb2-tests))" ""
+$(addprefix $(objpfx),$(binaries-malloc-hugetlb2-tests)): %-malloc-hugetlb2: %.o \
+  $(link-extra-libs-tests) \
+  $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
+  $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
+	$(+link-tests)
+endif
+
 ifneq "$(strip $(binaries-pie-tests))" ""
 $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
   $(link-extra-libs-tests) \
@@ -302,6 +314,11 @@  $(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1
 endef
 $(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t))))
 
+# All malloc-hugetlb2 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=2
+define malloc-hugetlb2-ENVS
+$(1)-malloc-hugetlb2-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=2
+endef
+$(foreach t,$(tests-malloc-hugetlb2),$(eval $(call malloc-hugetlb2-ENVS,$(t))))
 
 # mcheck tests need the debug DSO to support -lmcheck.
 define mcheck-ENVS
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index 5e830403b4..14b87cc405 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -93,9 +93,8 @@  glibc {
       security_level: SXID_IGNORE
     }
     hugetlb {
-      type: INT_32
+      type: SIZE_T
       minval: 0
-      maxval: 1
     }
   }
   cpu {
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
index 2acc296c15..46237aa60f 100644
--- a/elf/tst-rtld-list-tunables.exp
+++ b/elf/tst-rtld-list-tunables.exp
@@ -1,7 +1,7 @@ 
 glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
 glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
 glibc.malloc.check: 0 (min: 0, max: 3)
-glibc.malloc.hugetlb: 0 (min: 0, max: 1)
+glibc.malloc.hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
 glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
diff --git a/malloc/Makefile b/malloc/Makefile
index e47fd660f6..83de7f2a35 100644
--- a/malloc/Makefile
+++ b/malloc/Makefile
@@ -78,9 +78,9 @@  tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
 tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
 				  $(tests-static),$(tests))
 
-# Run all testes with GLIBC_TUNABLE=glibc.malloc.hugetlb=1 that check the
-# Transparent Huge Pages support.  We need exclude some tests that define
-# the ENV vars.
+# Run all tests with GLIBC_TUNABLE=glibc.malloc.hugetlb={1,2} which check
+# the Transparent Huge Pages support (1) or automatic huge page support (2).
+# We need exclude some tests that define the ENV vars.
 tests-exclude-hugetlb1 = \
 	tst-compathooks-off \
 	tst-compathooks-on \
@@ -93,6 +93,8 @@  tests-exclude-hugetlb1 = \
 	tst-mallocstate
 tests-malloc-hugetlb1 = \
 	$(filter-out $(tests-exclude-hugetlb1), $(tests))
+tests-malloc-hugetlb2 = \
+	$(filter-out $(tests-exclude-hugetlb1), $(tests))
 
 # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24.
 ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes)
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 4151d043a2..3e2f427d94 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1883,6 +1883,10 @@  struct malloc_par
 #if HAVE_TUNABLES
   /* Transparent Large Page support.  */
   INTERNAL_SIZE_T thp_pagesize;
+  /* A value different than 0 means to align mmap allocation to hp_pagesize
+     add hp_flags on flags.  */
+  INTERNAL_SIZE_T hp_pagesize;
+  int hp_flags;
 #endif
 
   /* Memory map support */
@@ -2440,7 +2444,10 @@  sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
   if (mm == MAP_FAILED)
     return mm;
 
-  madvise_thp (mm, size);
+#ifdef MAP_HUGETLB
+  if (!(extra_flags & MAP_HUGETLB))
+    madvise_thp (mm, size);
+#endif
 
   /*
     The offset to the start of the mmapped region is stored in the prev_size
@@ -2528,7 +2535,18 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
       || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
 	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
     {
-      char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
+      char *mm;
+#if HAVE_TUNABLES
+      if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize)
+	{
+	  /* There is no need to isse the THP madvise call if Huge Pages are
+	     used directly.  */
+	  mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av);
+	  if (mm != MAP_FAILED)
+	    return mm;
+	}
+#endif
+      mm = sysmalloc_mmap (nb, pagesize, 0, av);
       if (mm != MAP_FAILED)
 	return mm;
       tried_mmap = true;
@@ -2609,7 +2627,8 @@  sysmalloc (INTERNAL_SIZE_T nb, mstate av)
         }
       else if (!tried_mmap)
 	{
-	  /* We can at least try to use to mmap memory.  */
+	  /* We can at least try to use to mmap memory.  If new_heap fails
+	     it is unlikely that trying to allocage huge page will succeed.  */
 	  char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
 	  if (mm != MAP_FAILED)
 	    return mm;
@@ -5395,6 +5414,9 @@  do_set_hugetlb (int32_t value)
       if (thp_mode == malloc_thp_mode_madvise)
 	mp_.thp_pagesize = __malloc_default_thp_pagesize ();
     }
+  else if (value >= 2)
+    __malloc_hugepage_config (value == 2 ? 0 : value, &mp_.hp_pagesize,
+			      &mp_.hp_flags);
   return 0;
 }
 #endif
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 7f704e9b37..8a110b2927 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -278,6 +278,13 @@  default value is @code{0}, which disables any additional support on
 Setting its value to @code{1} enables the use of @code{madvise} with
 @code{MADV_HUGEPAGE} after memory allocation with @code{mmap}.  It is enabled
 only if the system supports Transparent Huge Page (currently only on Linux).
+
+Setting its value to @code{2} enables the use of Huge Page directly with
+@code{mmap} with the use of @code{MAP_HUGETLB} flag.  The huge page size
+to use will be the default one provided by the system.  A value larger than
+@code{2} specifies huge page size, which will be matched against the system
+supported ones.  If provided value is invalid, @code{MAP_HUGETLB} will not
+be used.
 @end deftp
 
 @node Dynamic Linking Tunables
diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
index 8fb459a263..946284a33c 100644
--- a/sysdeps/generic/malloc-hugepages.c
+++ b/sysdeps/generic/malloc-hugepages.c
@@ -29,3 +29,11 @@  __malloc_thp_mode (void)
 {
   return malloc_thp_mode_not_supported;
 }
+
+/* Return the default transparent huge page size.  */
+void
+__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
+{
+  *pagesize = 0;
+  *flags = 0;
+}
diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
index f5a442e328..b830ad823e 100644
--- a/sysdeps/generic/malloc-hugepages.h
+++ b/sysdeps/generic/malloc-hugepages.h
@@ -34,4 +34,11 @@  enum malloc_thp_mode_t
 
 enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
 
+/* Return the support huge page size from the REQUESTED sizes on PAGESIZE
+   along with the required extra mmap flags on FLAGS,  Requesting the value
+   of 0 returns the default huge page size, otherwise the value will be
+   matched against the supported on by the system.  */
+void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
+     attribute_hidden;
+
 #endif /* _MALLOC_HUGEPAGES_H */
diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
index 7497e07260..120c78b42a 100644
--- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
+++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
@@ -17,8 +17,10 @@ 
    not, see <https://www.gnu.org/licenses/>.  */
 
 #include <intprops.h>
+#include <dirent.h>
 #include <malloc-hugepages.h>
 #include <not-cancel.h>
+#include <sys/mman.h>
 
 unsigned long int
 __malloc_default_thp_pagesize (void)
@@ -72,3 +74,128 @@  __malloc_thp_mode (void)
     }
   return malloc_thp_mode_not_supported;
 }
+
+static size_t
+malloc_default_hugepage_size (void)
+{
+  int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
+  if (fd == -1)
+    return 0;
+
+  size_t hpsize = 0;
+
+  char buf[512];
+  off64_t off = 0;
+  while (1)
+    {
+      ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
+      if (r < 0)
+	break;
+      buf[r - 1] = '\0';
+
+      /* If the tag is not found, read the last line again.  */
+      const char *s = strstr (buf, "Hugepagesize:");
+      if (s == NULL)
+	{
+	  char *nl = strrchr (buf, '\n');
+	  if (nl == NULL)
+	    break;
+	  off += (nl + 1) - buf;
+	  continue;
+	}
+
+      /* The default huge page size is in the form:
+	 Hugepagesize:       NUMBER kB  */
+      s += sizeof ("Hugepagesize: ") - 1;
+      for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
+	{
+	  if (s[i] == ' ')
+	    continue;
+	  hpsize *= 10;
+	  hpsize += s[i] - '0';
+	}
+      hpsize *= 1024;
+      break;
+    }
+
+  __close_nocancel (fd);
+
+  return hpsize;
+}
+
+static inline int
+hugepage_flags (size_t pagesize)
+{
+  return MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
+}
+
+void
+__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
+{
+  *pagesize = 0;
+  *flags = 0;
+
+  if (requested == 0)
+    {
+      *pagesize = malloc_default_hugepage_size ();
+      if (pagesize != 0)
+	*flags = hugepage_flags (*pagesize);
+      return;
+    }
+
+  /* Each entry represents a supported huge page in the form of:
+     hugepages-<size>kB.  */
+  int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
+				 O_RDONLY | O_DIRECTORY, 0);
+  if (dirfd == -1)
+    return;
+
+  char buffer[1024];
+  while (true)
+    {
+#if !IS_IN(libc)
+# define __getdents64 getdents64
+#endif
+      ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
+      if (ret == -1)
+	break;
+      else if (ret == 0)
+        break;
+
+      bool found = false;
+      char *begin = buffer, *end = buffer + ret;
+      while (begin != end)
+        {
+          unsigned short int d_reclen;
+          memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
+                  sizeof (d_reclen));
+          const char *dname = begin + offsetof (struct dirent64, d_name);
+          begin += d_reclen;
+
+          if (dname[0] == '.'
+	      || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
+            continue;
+
+	  size_t hpsize = 0;
+	  const char *sizestr = dname + sizeof ("hugepages-") - 1;
+	  for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
+	    {
+	      hpsize *= 10;
+	      hpsize += sizestr[i] - '0';
+	    }
+	  hpsize *= 1024;
+
+	  if (hpsize == requested)
+	    {
+	      *pagesize = hpsize;
+	      *flags = hugepage_flags (*pagesize);
+	      found = true;
+	      break;
+	    }
+        }
+      if (found)
+	break;
+    }
+
+  __close_nocancel (dirfd);
+}