Message ID | 20211214185806.4109231-5-adhemerval.zanella@linaro.org |
---|---|
State | Committed |
Headers | show |
Series | malloc: Improve Huge Page support | expand |
Context | Check | Description |
---|---|---|
dj/TryBot-apply_patch | success | Patch applied to master at the time it was sent |
A few comment tweaks. One logic question. Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes: > diff --git a/NEWS b/NEWS > index 589dea4ac3..1b437a0f3a 100644 > --- a/NEWS > +++ b/NEWS > @@ -92,9 +92,11 @@ Major new features: > configuration. > > * On Linux, a new tunable, glibc.malloc.hugetlb, can be used to > - make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls. > - It might improve performance with Transparent Huge Pages madvise mode > - depending of the workload. > + either make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk > + or to use huge pages directly with mmap calls with the MAP_HUGETLB > + flags). The former can improve performance when Transparent Huge Pages > + is set to 'madvise' mode while the latter uses the system reserved > + huge pages. Ok. > diff --git a/Rules b/Rules > $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \ > + $(tests-malloc-hugetlb2:%=$(objpfx)%-malloc-hugetlb2.out) \ Ok. > $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \ > + $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) \ Ok. > @@ -199,6 +201,7 @@ endif > binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) > +binaries-malloc-hugetlb2-tests = $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) Ok. > binaries-malloc-hugetlb1-tests = > +binaries-malloc-hugetlb2-tests = Ok. > +ifneq "$(strip $(binaries-malloc-hugetlb2-tests))" "" > +$(addprefix $(objpfx),$(binaries-malloc-hugetlb2-tests)): %-malloc-hugetlb2: %.o \ > + $(link-extra-libs-tests) \ > + $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ > + $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) > + $(+link-tests) > +endif Ok. > +# All malloc-hugetlb2 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=2 > +define malloc-hugetlb2-ENVS > +$(1)-malloc-hugetlb2-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=2 > +endef > +$(foreach t,$(tests-malloc-hugetlb2),$(eval $(call malloc-hugetlb2-ENVS,$(t)))) Ok. > diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list > hugetlb { > - type: INT_32 > + type: SIZE_T > minval: 0 > - maxval: 1 > } Ok. > diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp > -glibc.malloc.hugetlb: 0 (min: 0, max: 1) > +glibc.malloc.hugetlb: 0x0 (min: 0x0, max: 0x[f]+) Ok. > diff --git a/malloc/Makefile b/malloc/Makefile > -# Run all testes with GLIBC_TUNABLE=glibc.malloc.hugetlb=1 that check the > -# Transparent Huge Pages support. We need exclude some tests that define > -# the ENV vars. > +# Run all tests with GLIBC_TUNABLE=glibc.malloc.hugetlb={1,2} which check > +# the Transparent Huge Pages support (1) or automatic huge page support (2). > +# We need exclude some tests that define the ENV vars. Ok. > tst-mallocstate > tests-malloc-hugetlb1 = \ > $(filter-out $(tests-exclude-hugetlb1), $(tests)) > +tests-malloc-hugetlb2 = \ > + $(filter-out $(tests-exclude-hugetlb1), $(tests)) Ok. > diff --git a/malloc/malloc.c b/malloc/malloc.c > #if HAVE_TUNABLES > /* Transparent Large Page support. */ > INTERNAL_SIZE_T thp_pagesize; > + /* A value different than 0 means to align mmap allocation to hp_pagesize > + add hp_flags on flags. */ > + INTERNAL_SIZE_T hp_pagesize; > + int hp_flags; > #endif Ok. > - madvise_thp (mm, size); > +#ifdef MAP_HUGETLB > + if (!(extra_flags & MAP_HUGETLB)) > + madvise_thp (mm, size); > +#endif Ok. > @@ -2528,7 +2535,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) > || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold) > && (mp_.n_mmaps < mp_.n_mmaps_max))) > { > - char *mm = sysmalloc_mmap (nb, pagesize, 0, av); > + char *mm; > +#if HAVE_TUNABLES > + if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize) > + { > + /* There is no need to isse the THP madvise call if Huge Pages are > + used directly. */ > + mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av); > + if (mm != MAP_FAILED) > + return mm; > + } > +#endif > + mm = sysmalloc_mmap (nb, pagesize, 0, av); Ok. > @@ -2609,7 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) > } > else if (!tried_mmap) > { > - /* We can at least try to use to mmap memory. */ > + /* We can at least try to use to mmap memory. If new_heap fails > + it is unlikely that trying to allocage huge page will succeed. */ s/allocage/allocate/ "huge page" should either be "a huge page" or "huge pages" > @@ -5395,6 +5414,9 @@ do_set_hugetlb (int32_t value) > if (thp_mode == malloc_thp_mode_madvise) > mp_.thp_pagesize = __malloc_default_thp_pagesize (); > } > + else if (value >= 2) > + __malloc_hugepage_config (value == 2 ? 0 : value, &mp_.hp_pagesize, > + &mp_.hp_flags); > return 0; > } Ok. > diff --git a/manual/tunables.texi b/manual/tunables.texi > Setting its value to @code{1} enables the use of @code{madvise} with > @code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled > only if the system supports Transparent Huge Page (currently only on Linux). > + > +Setting its value to @code{2} enables the use of Huge Page directly with > +@code{mmap} with the use of @code{MAP_HUGETLB} flag. The huge page size > +to use will be the default one provided by the system. A value larger than > +@code{2} specifies huge page size, which will be matched against the system > +supported ones. If provided value is invalid, @code{MAP_HUGETLB} will not > +be used. Ok. > diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c > @@ -29,3 +29,11 @@ __malloc_thp_mode (void) > { > return malloc_thp_mode_not_supported; > } > + > +/* Return the default transparent huge page size. */ > +void > +__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) > +{ > + *pagesize = 0; > + *flags = 0; > +} Ok. > diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h > +/* Return the support huge page size from the REQUESTED sizes on PAGESIZE > + along with the required extra mmap flags on FLAGS, Requesting the value > + of 0 returns the default huge page size, otherwise the value will be > + matched against the supported on by the system. */ > +void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) > + attribute_hidden; s/support/supported/ s/supported on by/sizes supported by/ > diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c > index 7497e07260..120c78b42a 100644 > --- a/sysdeps/unix/sysv/linux/malloc-hugepages.c > +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c > @@ -17,8 +17,10 @@ > not, see <https://www.gnu.org/licenses/>. */ > > #include <intprops.h> > +#include <dirent.h> > #include <malloc-hugepages.h> > #include <not-cancel.h> > +#include <sys/mman.h> Ok. > @@ -72,3 +74,128 @@ __malloc_thp_mode (void) > +static size_t > +malloc_default_hugepage_size (void) > +{ > + int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY); > + if (fd == -1) > + return 0; > + > + size_t hpsize = 0; > + > + char buf[512]; > + off64_t off = 0; > + while (1) > + { > + ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off); > + if (r < 0) > + break; > + buf[r - 1] = '\0'; This always overwrites the last byte of the file, shouldn't this be buf[r] ? > + /* If the tag is not found, read the last line again. */ > + const char *s = strstr (buf, "Hugepagesize:"); > + if (s == NULL) > + { > + char *nl = strrchr (buf, '\n'); > + if (nl == NULL) > + break; > + off += (nl + 1) - buf; > + continue; > + } > + > + /* The default huge page size is in the form: > + Hugepagesize: NUMBER kB */ > + s += sizeof ("Hugepagesize: ") - 1; > + for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++) > + { > + if (s[i] == ' ') > + continue; > + hpsize *= 10; > + hpsize += s[i] - '0'; > + } > + hpsize *= 1024; > + break; > + } > + > + __close_nocancel (fd); > + > + return hpsize; > +} Ok. > +static inline int > +hugepage_flags (size_t pagesize) > +{ > + return MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT); > +} Ok. > +void > +__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) > +{ > + *pagesize = 0; > + *flags = 0; > + > + if (requested == 0) > + { > + *pagesize = malloc_default_hugepage_size (); > + if (pagesize != 0) > + *flags = hugepage_flags (*pagesize); > + return; > + } Ok. > + /* Each entry represents a supported huge page in the form of: > + hugepages-<size>kB. */ > + int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages", > + O_RDONLY | O_DIRECTORY, 0); > + if (dirfd == -1) > + return; > + > + char buffer[1024]; > + while (true) > + { > +#if !IS_IN(libc) > +# define __getdents64 getdents64 > +#endif > + ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer)); > + if (ret == -1) > + break; > + else if (ret == 0) > + break; Ok. > + > + bool found = false; > + char *begin = buffer, *end = buffer + ret; > + while (begin != end) > + { > + unsigned short int d_reclen; > + memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen), > + sizeof (d_reclen)); Because alignment; ok. > + const char *dname = begin + offsetof (struct dirent64, d_name); > + begin += d_reclen; > + > + if (dname[0] == '.' > + || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0) > + continue; > + Ok. > + size_t hpsize = 0; > + const char *sizestr = dname + sizeof ("hugepages-") - 1; > + for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++) > + { > + hpsize *= 10; > + hpsize += sizestr[i] - '0'; > + } > + hpsize *= 1024; Ok. > + if (hpsize == requested) > + { > + *pagesize = hpsize; > + *flags = hugepage_flags (*pagesize); > + found = true; > + break; > + } > + } > + if (found) > + break; > + } > + > + __close_nocancel (dirfd); > +} Ok.
On 15/12/2021 01:26, DJ Delorie wrote: > > A few comment tweaks. > One logic question. > > Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes: > >> @@ -2609,7 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) >> } >> else if (!tried_mmap) >> { >> - /* We can at least try to use to mmap memory. */ >> + /* We can at least try to use to mmap memory. If new_heap fails >> + it is unlikely that trying to allocage huge page will succeed. */ > > s/allocage/allocate/ Ack. > > "huge page" should either be "a huge page" or "huge pages" Ack. >> diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h >> +/* Return the support huge page size from the REQUESTED sizes on PAGESIZE >> + along with the required extra mmap flags on FLAGS, Requesting the value >> + of 0 returns the default huge page size, otherwise the value will be >> + matched against the supported on by the system. */ >> +void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) >> + attribute_hidden; > > s/support/supported/ > s/supported on by/sizes supported by/ > Ack. >> diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c >> index 7497e07260..120c78b42a 100644 >> --- a/sysdeps/unix/sysv/linux/malloc-hugepages.c >> +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c >> @@ -17,8 +17,10 @@ >> not, see <https://www.gnu.org/licenses/>. */ >> >> #include <intprops.h> >> +#include <dirent.h> >> #include <malloc-hugepages.h> >> #include <not-cancel.h> >> +#include <sys/mman.h> > > Ok. > >> @@ -72,3 +74,128 @@ __malloc_thp_mode (void) >> +static size_t >> +malloc_default_hugepage_size (void) >> +{ >> + int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY); >> + if (fd == -1) >> + return 0; >> + >> + size_t hpsize = 0; >> + >> + char buf[512]; >> + off64_t off = 0; >> + while (1) >> + { >> + ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off); >> + if (r < 0) >> + break; >> + buf[r - 1] = '\0'; > > This always overwrites the last byte of the file, shouldn't this be > buf[r] ? Yes, I have fixed it. Is this patch ok with the above fix?
Adhemerval Zanella <adhemerval.zanella@linaro.org> writes: >>> + buf[r - 1] = '\0'; >> >> This always overwrites the last byte of the file, shouldn't this be >> buf[r] ? > > Yes, I have fixed it. > > Is this patch ok with the above fix? Yes. Reviewed-by: DJ Delorie <dj@redhat.com>
diff --git a/NEWS b/NEWS index 589dea4ac3..1b437a0f3a 100644 --- a/NEWS +++ b/NEWS @@ -92,9 +92,11 @@ Major new features: configuration. * On Linux, a new tunable, glibc.malloc.hugetlb, can be used to - make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls. - It might improve performance with Transparent Huge Pages madvise mode - depending of the workload. + either make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk + or to use huge pages directly with mmap calls with the MAP_HUGETLB + flags). The former can improve performance when Transparent Huge Pages + is set to 'madvise' mode while the latter uses the system reserved + huge pages. Deprecated and removed features, and other changes affecting compatibility: diff --git a/Rules b/Rules index 471458ad4a..542a37eef0 100644 --- a/Rules +++ b/Rules @@ -158,6 +158,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \ $(tests-mcheck:%=$(objpfx)%-mcheck.out) \ $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \ $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \ + $(tests-malloc-hugetlb2:%=$(objpfx)%-malloc-hugetlb2.out) \ $(tests-special) $(tests-printers-out) xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special) endif @@ -170,6 +171,7 @@ else tests-expected = $(tests) $(tests-internal) $(tests-printers) \ $(tests-container) $(tests-malloc-check:%=%-malloc-check) \ $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \ + $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) \ $(tests-mcheck:%=%-mcheck) endif tests: @@ -199,6 +201,7 @@ endif binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck) binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check) binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) +binaries-malloc-hugetlb2-tests = $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) else binaries-all-notests = binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs) @@ -211,6 +214,7 @@ binaries-pie-notests = binaries-mcheck-tests = binaries-malloc-check-tests = binaries-malloc-hugetlb1-tests = +binaries-malloc-hugetlb2-tests = endif binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests) @@ -259,6 +263,14 @@ $(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o $(+link-tests) endif +ifneq "$(strip $(binaries-malloc-hugetlb2-tests))" "" +$(addprefix $(objpfx),$(binaries-malloc-hugetlb2-tests)): %-malloc-hugetlb2: %.o \ + $(link-extra-libs-tests) \ + $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \ + $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit) + $(+link-tests) +endif + ifneq "$(strip $(binaries-pie-tests))" "" $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \ $(link-extra-libs-tests) \ @@ -302,6 +314,11 @@ $(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1 endef $(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t)))) +# All malloc-hugetlb2 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=2 +define malloc-hugetlb2-ENVS +$(1)-malloc-hugetlb2-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=2 +endef +$(foreach t,$(tests-malloc-hugetlb2),$(eval $(call malloc-hugetlb2-ENVS,$(t)))) # mcheck tests need the debug DSO to support -lmcheck. define mcheck-ENVS diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list index 5e830403b4..14b87cc405 100644 --- a/elf/dl-tunables.list +++ b/elf/dl-tunables.list @@ -93,9 +93,8 @@ glibc { security_level: SXID_IGNORE } hugetlb { - type: INT_32 + type: SIZE_T minval: 0 - maxval: 1 } } cpu { diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp index 2acc296c15..46237aa60f 100644 --- a/elf/tst-rtld-list-tunables.exp +++ b/elf/tst-rtld-list-tunables.exp @@ -1,7 +1,7 @@ glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+) glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+) glibc.malloc.check: 0 (min: 0, max: 3) -glibc.malloc.hugetlb: 0 (min: 0, max: 1) +glibc.malloc.hugetlb: 0x0 (min: 0x0, max: 0x[f]+) glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647) glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+) glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+) diff --git a/malloc/Makefile b/malloc/Makefile index e47fd660f6..83de7f2a35 100644 --- a/malloc/Makefile +++ b/malloc/Makefile @@ -78,9 +78,9 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \ tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \ $(tests-static),$(tests)) -# Run all testes with GLIBC_TUNABLE=glibc.malloc.hugetlb=1 that check the -# Transparent Huge Pages support. We need exclude some tests that define -# the ENV vars. +# Run all tests with GLIBC_TUNABLE=glibc.malloc.hugetlb={1,2} which check +# the Transparent Huge Pages support (1) or automatic huge page support (2). +# We need exclude some tests that define the ENV vars. tests-exclude-hugetlb1 = \ tst-compathooks-off \ tst-compathooks-on \ @@ -93,6 +93,8 @@ tests-exclude-hugetlb1 = \ tst-mallocstate tests-malloc-hugetlb1 = \ $(filter-out $(tests-exclude-hugetlb1), $(tests)) +tests-malloc-hugetlb2 = \ + $(filter-out $(tests-exclude-hugetlb1), $(tests)) # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24. ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes) diff --git a/malloc/malloc.c b/malloc/malloc.c index 4151d043a2..3e2f427d94 100644 --- a/malloc/malloc.c +++ b/malloc/malloc.c @@ -1883,6 +1883,10 @@ struct malloc_par #if HAVE_TUNABLES /* Transparent Large Page support. */ INTERNAL_SIZE_T thp_pagesize; + /* A value different than 0 means to align mmap allocation to hp_pagesize + add hp_flags on flags. */ + INTERNAL_SIZE_T hp_pagesize; + int hp_flags; #endif /* Memory map support */ @@ -2440,7 +2444,10 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av) if (mm == MAP_FAILED) return mm; - madvise_thp (mm, size); +#ifdef MAP_HUGETLB + if (!(extra_flags & MAP_HUGETLB)) + madvise_thp (mm, size); +#endif /* The offset to the start of the mmapped region is stored in the prev_size @@ -2528,7 +2535,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold) && (mp_.n_mmaps < mp_.n_mmaps_max))) { - char *mm = sysmalloc_mmap (nb, pagesize, 0, av); + char *mm; +#if HAVE_TUNABLES + if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize) + { + /* There is no need to isse the THP madvise call if Huge Pages are + used directly. */ + mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av); + if (mm != MAP_FAILED) + return mm; + } +#endif + mm = sysmalloc_mmap (nb, pagesize, 0, av); if (mm != MAP_FAILED) return mm; tried_mmap = true; @@ -2609,7 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av) } else if (!tried_mmap) { - /* We can at least try to use to mmap memory. */ + /* We can at least try to use to mmap memory. If new_heap fails + it is unlikely that trying to allocage huge page will succeed. */ char *mm = sysmalloc_mmap (nb, pagesize, 0, av); if (mm != MAP_FAILED) return mm; @@ -5395,6 +5414,9 @@ do_set_hugetlb (int32_t value) if (thp_mode == malloc_thp_mode_madvise) mp_.thp_pagesize = __malloc_default_thp_pagesize (); } + else if (value >= 2) + __malloc_hugepage_config (value == 2 ? 0 : value, &mp_.hp_pagesize, + &mp_.hp_flags); return 0; } #endif diff --git a/manual/tunables.texi b/manual/tunables.texi index 7f704e9b37..8a110b2927 100644 --- a/manual/tunables.texi +++ b/manual/tunables.texi @@ -278,6 +278,13 @@ default value is @code{0}, which disables any additional support on Setting its value to @code{1} enables the use of @code{madvise} with @code{MADV_HUGEPAGE} after memory allocation with @code{mmap}. It is enabled only if the system supports Transparent Huge Page (currently only on Linux). + +Setting its value to @code{2} enables the use of Huge Page directly with +@code{mmap} with the use of @code{MAP_HUGETLB} flag. The huge page size +to use will be the default one provided by the system. A value larger than +@code{2} specifies huge page size, which will be matched against the system +supported ones. If provided value is invalid, @code{MAP_HUGETLB} will not +be used. @end deftp @node Dynamic Linking Tunables diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c index 8fb459a263..946284a33c 100644 --- a/sysdeps/generic/malloc-hugepages.c +++ b/sysdeps/generic/malloc-hugepages.c @@ -29,3 +29,11 @@ __malloc_thp_mode (void) { return malloc_thp_mode_not_supported; } + +/* Return the default transparent huge page size. */ +void +__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) +{ + *pagesize = 0; + *flags = 0; +} diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h index f5a442e328..b830ad823e 100644 --- a/sysdeps/generic/malloc-hugepages.h +++ b/sysdeps/generic/malloc-hugepages.h @@ -34,4 +34,11 @@ enum malloc_thp_mode_t enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden; +/* Return the support huge page size from the REQUESTED sizes on PAGESIZE + along with the required extra mmap flags on FLAGS, Requesting the value + of 0 returns the default huge page size, otherwise the value will be + matched against the supported on by the system. */ +void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) + attribute_hidden; + #endif /* _MALLOC_HUGEPAGES_H */ diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c index 7497e07260..120c78b42a 100644 --- a/sysdeps/unix/sysv/linux/malloc-hugepages.c +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c @@ -17,8 +17,10 @@ not, see <https://www.gnu.org/licenses/>. */ #include <intprops.h> +#include <dirent.h> #include <malloc-hugepages.h> #include <not-cancel.h> +#include <sys/mman.h> unsigned long int __malloc_default_thp_pagesize (void) @@ -72,3 +74,128 @@ __malloc_thp_mode (void) } return malloc_thp_mode_not_supported; } + +static size_t +malloc_default_hugepage_size (void) +{ + int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY); + if (fd == -1) + return 0; + + size_t hpsize = 0; + + char buf[512]; + off64_t off = 0; + while (1) + { + ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off); + if (r < 0) + break; + buf[r - 1] = '\0'; + + /* If the tag is not found, read the last line again. */ + const char *s = strstr (buf, "Hugepagesize:"); + if (s == NULL) + { + char *nl = strrchr (buf, '\n'); + if (nl == NULL) + break; + off += (nl + 1) - buf; + continue; + } + + /* The default huge page size is in the form: + Hugepagesize: NUMBER kB */ + s += sizeof ("Hugepagesize: ") - 1; + for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++) + { + if (s[i] == ' ') + continue; + hpsize *= 10; + hpsize += s[i] - '0'; + } + hpsize *= 1024; + break; + } + + __close_nocancel (fd); + + return hpsize; +} + +static inline int +hugepage_flags (size_t pagesize) +{ + return MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT); +} + +void +__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags) +{ + *pagesize = 0; + *flags = 0; + + if (requested == 0) + { + *pagesize = malloc_default_hugepage_size (); + if (pagesize != 0) + *flags = hugepage_flags (*pagesize); + return; + } + + /* Each entry represents a supported huge page in the form of: + hugepages-<size>kB. */ + int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages", + O_RDONLY | O_DIRECTORY, 0); + if (dirfd == -1) + return; + + char buffer[1024]; + while (true) + { +#if !IS_IN(libc) +# define __getdents64 getdents64 +#endif + ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer)); + if (ret == -1) + break; + else if (ret == 0) + break; + + bool found = false; + char *begin = buffer, *end = buffer + ret; + while (begin != end) + { + unsigned short int d_reclen; + memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen), + sizeof (d_reclen)); + const char *dname = begin + offsetof (struct dirent64, d_name); + begin += d_reclen; + + if (dname[0] == '.' + || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0) + continue; + + size_t hpsize = 0; + const char *sizestr = dname + sizeof ("hugepages-") - 1; + for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++) + { + hpsize *= 10; + hpsize += sizestr[i] - '0'; + } + hpsize *= 1024; + + if (hpsize == requested) + { + *pagesize = hpsize; + *flags = hugepage_flags (*pagesize); + found = true; + break; + } + } + if (found) + break; + } + + __close_nocancel (dirfd); +}