From e9ce30429e8e7462fd1da34c550c9c9f895682de Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 7 Apr 2026 06:46:19 +0800
Subject: [PATCH] elf: Add transparent huge page marker and
_dl_map_segment_adjust
The current scheme of loading PT_LOAD segments with THP is hard-coded in
Linux kernel ELF loader and is hard-coded in ld.so or requires a tunable
at run-time.
Add GNU_PROPERTY_1_NEEDED_TRANSPARENT_HUGEPAGE:
/* Set if the object file should be loaded with transparent huge pages
if possible. */
#define GNU_PROPERTY_1_NEEDED_TRANSPARENT_HUGEPAGE (1U << 1)
to GNU_PROPERTY_1_NEEDED. Update ld.so to process PT_GNU_PROPERTY segment
for GNU_PROPERTY_1_NEEDED before loading PT_LOAD segments and enable
THP if the GNU_PROPERTY_1_NEEDED_TRANSPARENT_HUGEPAGE is set in the
binary. This fixes BZ #34056.
Add _dl_map_segment_adjust to enable THP for a binary with program
headers:
Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
LOAD 0x000000 0x00400000 0x00400000 0x16d110 0x16d110 R 0x1000
LOAD 0x16e000 0x0056e000 0x0056e000 0x1055d9 0x1055d9 R E 0x1000
LOAD 0x273ebc 0x00674ebc 0x00674ebc 0x02540 0x039fc RW 0x1000
by combining the first 2 PT_LOAD segments, when the following conditions
are satisfied:
1. The first segment file offset is 0.
2. Both segments are read-only.
3. The first segment load address is aligned to THP page size.
4. There is no address gap between them.
5. The total size >= THP page size.
into a single read-only and executable load segment:
LOAD 0x000000 0x00400000 0x00400000 0x2735d9 0x2735d9 R 0x200000
LOAD 0x273ebc 0x00674ebc 0x00674ebc 0x02540 0x039fc RW 0x1000
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
---
elf/Makefile | 6 ++
elf/dl-load.c | 78 +++++++++++++++--
elf/dl-misc.c | 58 +++++++++----
elf/elf.h | 4 +
elf/tst-thp-1-mod.c | 24 ++++++
elf/tst-thp-1.c | 28 ++++++
elf/tst-thp-note.S | 36 ++++++++
sysdeps/generic/dl-map-segment-align.h | 16 +++-
sysdeps/generic/ldsodefs.h | 6 ++
.../unix/sysv/linux/dl-map-segment-align.c | 86 +++++++++++++++++--
.../unix/sysv/linux/dl-map-segment-align.h | 8 +-
11 files changed, 310 insertions(+), 40 deletions(-)
create mode 100644 elf/tst-thp-1-mod.c
create mode 100644 elf/tst-thp-1.c
create mode 100644 elf/tst-thp-note.S
@@ -474,6 +474,7 @@ tests += \
tst-sonamemove-link \
tst-startup-errno \
tst-thrlock \
+ tst-thp-1 \
tst-tls-dlinfo \
tst-tls-ie \
tst-tls-ie-dlmopen \
@@ -1009,6 +1010,7 @@ modules-names += \
tst-sonamemove-runmod1 \
tst-sonamemove-runmod2 \
tst-sprof-mod \
+ tst-thp-1-mod \
tst-tls-ie-mod0 \
tst-tls-ie-mod1 \
tst-tls-ie-mod2 \
@@ -2341,6 +2343,10 @@ $(objpfx)tst-thrlock: $(shared-thread-library)
$(objpfx)tst-thrlock.out: $(libm)
$(objpfx)tst-noload.out: $(libm)
+LDFLAGS-tst-thp-1-mod.so = -Wl,-soname,tst-thp-1-mod.so
+$(objpfx)tst-thp-1-mod.so: $(objpfx)tst-thp-note.o
+$(objpfx)tst-thp-1: $(objpfx)tst-thp-1-mod.so
+
tst-tst-dlopen-tlsmodid-no-pie = yes
$(objpfx)tst-dlopen-tlsmodid: $(shared-thread-library)
$(objpfx)tst-dlopen-tlsmodid.out: $(objpfx)tst-dlopen-self
@@ -836,12 +836,22 @@ _dl_init_paths (const char *llp, const char *source,
PT_LOAD segments are mapped. Only one NT_GNU_PROPERTY_TYPE_0
note is handled which contains processor specific properties.
FD is -1 for the kernel mapped main executable otherwise it is
- the fd used for loading module L. */
+ the fd used for loading module L. If NOTE isn't NULL, only
+ GNU_PROPERTY_1_NEEDED should be processed. */
-void
-_dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph)
+static void
+_dl_process_pt_gnu_property_1 (struct link_map *l, int fd,
+ const ElfW(Phdr) *ph,
+ const ElfW(Nhdr) *note)
{
- const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr);
+ bool property_1_needed_only;
+ if (note == NULL)
+ {
+ property_1_needed_only = false;
+ note = (const void *) (ph->p_vaddr + l->l_addr);
+ }
+ else
+ property_1_needed_only = true;
const ElfW(Addr) size = ph->p_memsz;
const ElfW(Addr) align = ph->p_align;
@@ -885,8 +895,18 @@ _dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph)
last_type = type;
+ if (property_1_needed_only)
+ {
+ if (type == GNU_PROPERTY_1_NEEDED)
+ {
+ if (datasz == 4)
+ l->l_1_needed = *(unsigned int *) ptr;
+ return;
+ }
+ }
/* Target specific property processing. */
- if (_dl_process_gnu_property (l, fd, type, datasz, ptr) == 0)
+ else if (_dl_process_gnu_property (l, fd, type, datasz,
+ ptr) == 0)
return;
/* Check the next property item. */
@@ -904,6 +924,18 @@ _dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph)
}
}
+/* Process PT_GNU_PROPERTY program header PH in module L after
+ PT_LOAD segments are mapped. Only one NT_GNU_PROPERTY_TYPE_0
+ note is handled which contains processor specific properties.
+ FD is -1 for the kernel mapped main executable otherwise it is
+ the fd used for loading module L. */
+
+void
+_dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph)
+{
+ return _dl_process_pt_gnu_property_1 (l, fd, ph, NULL);
+}
+
static void
_dl_notify_new_object (int mode, Lmid_t nsid, struct link_map *l)
{
@@ -1106,6 +1138,34 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
bool empty_dynamic = false;
ElfW(Addr) p_align_max = 0;
+ bool thp_requested
+ = TUNABLE_GET (glibc, elf, thp, int32_t, NULL) != 0;
+
+ /* If transparent hugt page isn't enabled nor set in tunables, before
+ PT_LOAD segments are mapped in, process PT_GNU_PROPERTY segment
+ for GNU_PROPERTY_1_NEEDED. */
+ if (!thp_requested && !TUNABLE_IS_INITIALIZED (glibc, elf, thp))
+ for (ph = phdr; ph < &phdr[l->l_phnum]; ++ph)
+ if (ph->p_type == PT_GNU_PROPERTY)
+ {
+ size_t prop_size = ph->p_offset + ph->p_memsz;
+ void *prop = _dl_sysdep_map_file_from_fd (fd, &prop_size,
+ PROT_READ);
+ if (prop == MAP_FAILED)
+ {
+ errstring = N_("can't map in property segment");
+ goto lose;
+ }
+ const ElfW(Nhdr) *note
+ = (const void *) (prop + ph->p_offset);
+ _dl_process_pt_gnu_property_1 (l, fd, ph, note);
+ thp_requested
+ = ((l->l_1_needed
+ & GNU_PROPERTY_1_NEEDED_TRANSPARENT_HUGEPAGE) != 0);
+ __munmap (prop, prop_size);
+ break;
+ }
+
/* The struct is initialized to zero so this is not necessary:
l->l_ld = 0;
l->l_phdr = 0;
@@ -1174,7 +1234,8 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
c->prot = pf_to_prot (ph->p_flags);
/* Architecture-specific adjustment of segment alignment. */
- p_align_max = _dl_map_segment_align (c, p_align_max);
+ p_align_max = _dl_map_segment_align (c, p_align_max,
+ thp_requested);
break;
case PT_TLS:
@@ -1229,9 +1290,8 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
goto lose;
}
- /* Align all PT_LOAD segments to the maximum p_align. */
- for (size_t i = 0; i < nloadcmds; i++)
- loadcmds[i].mapalign = p_align_max;
+ _dl_map_segment_adjust (loadcmds, &nloadcmds, p_align_max,
+ thp_requested);
/* dlopen of an executable is not valid because it is not possible
to perform proper relocations, handle static TLS, or run the
@@ -27,41 +27,61 @@
#include <sys/stat.h>
#include <unistd.h>
-/* Read the whole contents of FILE into new mmap'd space with given
- protections. *SIZEP gets the size of the file. On error MAP_FAILED
- is returned. */
+/* Map in the contents of FD with given protections. If *SIZEP is 0,
+ *SIZEP gets the size of the file. Otherwise, read up to *SIZEP bytes.
+ On error, MAP_FAILED is returned. */
void *
-_dl_sysdep_read_whole_file (const char *file, size_t *sizep, int prot)
+_dl_sysdep_map_file_from_fd (int fd, size_t *sizep, int prot)
{
void *result = MAP_FAILED;
struct __stat64_t64 st;
- int fd = __open64_nocancel (file, O_RDONLY | O_CLOEXEC);
- if (fd >= 0)
+ if (__fstat64_time64 (fd, &st) >= 0)
{
- if (__fstat64_time64 (fd, &st) >= 0)
- {
- *sizep = st.st_size;
+ size_t size = st.st_size;
+ if (*sizep == 0)
+ *sizep = size;
+ else if (*sizep > size)
+ return result;
+ else
+ size = *sizep;
- /* No need to map the file if it is empty. */
- if (*sizep != 0)
- /* Map a copy of the file contents. */
- result = __mmap (NULL, *sizep, prot,
+ /* No need to map the file if it is empty. */
+ if (size != 0)
+ /* Map a copy of the file contents. */
+ result = __mmap (NULL, size, prot,
#ifdef MAP_COPY
- MAP_COPY
+ MAP_COPY
#else
- MAP_PRIVATE
+ MAP_PRIVATE
#endif
#ifdef MAP_FILE
- | MAP_FILE
+ | MAP_FILE
#endif
- , fd, 0);
- }
- __close_nocancel (fd);
+ , fd, 0);
}
+
return result;
}
+/* Read the whole contents of FILE into new mmap'd space with given
+ protections. *SIZEP gets the size of the file. On error MAP_FAILED
+ is returned. */
+
+void *
+_dl_sysdep_read_whole_file (const char *file, size_t *sizep, int prot)
+{
+ int fd = __open64_nocancel (file, O_RDONLY | O_CLOEXEC);
+ if (fd >= 0)
+ {
+ *sizep = 0;
+ void *result = _dl_sysdep_map_file_from_fd (fd, sizep, prot);
+ __close_nocancel (fd);
+ return result;
+ }
+ return MAP_FAILED;
+}
+
/* Test whether given NAME matches any of the names of the given object. */
int
_dl_name_match_p (const char *name, const struct link_map *map)
@@ -1380,6 +1380,10 @@ typedef struct
cannot be used with copy relocation. */
#define GNU_PROPERTY_1_NEEDED_INDIRECT_EXTERN_ACCESS (1U << 0)
+/* Set if the object file should be loaded with transparent huge pages
+ if possible. */
+#define GNU_PROPERTY_1_NEEDED_TRANSPARENT_HUGEPAGE (1U << 1)
+
/* Processor-specific semantics, lo */
#define GNU_PROPERTY_LOPROC 0xc0000000
/* Processor-specific semantics, hi */
new file mode 100644
@@ -0,0 +1,24 @@
+/* Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+
+void
+hello (void)
+{
+ printf ("Hello World\n");
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* Test GNU_PROPERTY_1_NEEDED_TRANSPARENT_HUGEPAGE.
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+extern void hello (void);
+
+static int
+do_test (void)
+{
+ hello ();
+ return 0;
+}
+
+#include <support/test-driver.c>
new file mode 100644
@@ -0,0 +1,36 @@
+/* Add a GNU_PROPERTY_1_NEEDED property with
+ GNU_PROPERTY_1_NEEDED_TRANSPARENT_HUGEPAGE.
+ Copyright (C) 2026 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <gcc-macros.h>
+
+ .section ".note.gnu.property", "a"
+ .balign GCCMACRO__SIZEOF_POINTER__
+ .long 1f - 0f /* name length */
+ .long 5f - 2f /* data length */
+ .long 5 /* note type */
+0: .asciz "GNU" /* vendor name */
+1:
+ .balign GCCMACRO__SIZEOF_POINTER__
+2: .long 0xb0008000 /* pr_type. */
+ .long 4f - 3f /* pr_datasz. */
+3:
+ .long 0x00000002
+4:
+ .balign GCCMACRO__SIZEOF_POINTER__
+5:
@@ -1,4 +1,4 @@
-/* _dl_map_segment_align. Generic version.
+/* _dl_map_segment_align and _dl_map_segment_adjust. Generic version.
Copyright (C) 2026 Free Software Foundation, Inc.
Copyright The GNU Toolchain Authors.
This file is part of the GNU C Library.
@@ -20,7 +20,19 @@
#include <dl-load.h>
static inline ElfW(Addr)
-_dl_map_segment_align (const struct loadcmd *c, ElfW(Addr) p_align_max)
+_dl_map_segment_align (const struct loadcmd *c, ElfW(Addr) p_align_max,
+ bool thp_requested)
{
return p_align_max;
}
+
+static inline void
+_dl_map_segment_adjust (struct loadcmd *load_cmds,
+ size_t *n_load_cmds, ElfW(Addr) max_align,
+ bool thp_requested)
+{
+ /* Align all PT_LOAD segments to the maximum p_align. */
+ size_t n = *n_load_cmds;
+ for (size_t i = 0; i < n; i++)
+ load_cmds[i].mapalign = max_align;
+}
@@ -1154,6 +1154,12 @@ extern void _dl_unload_cache (void) attribute_hidden;
extern void *_dl_sysdep_read_whole_file (const char *file, size_t *sizep,
int prot) attribute_hidden;
+/* System-dependent function to map in a file's contents. If *SIZEP is
+ 0, *SIZEP gets the size of the file. Otherwise, read up to *SIZEP
+ bytes. On error, MAP_FAILED is returned. */
+extern void * _dl_sysdep_map_file_from_fd (int fd, size_t *sizep,
+ int prot) attribute_hidden;
+
/* System-specific function to do initial startup for the dynamic linker.
After this, file access calls and getenv must work. This is responsible
for setting __libc_enable_secure if we need to be secure (e.g. setuid),
@@ -1,4 +1,4 @@
-/* _dl_map_segment_align. Linux version.
+/* _dl_map_segment_align and _dl_map_segment_adjust. Linux version.
Copyright (C) 2026 Free Software Foundation, Inc.
Copyright The GNU Toolchain Authors.
This file is part of the GNU C Library.
@@ -18,16 +18,16 @@
<https://www.gnu.org/licenses/>. */
#include <dl-map-segment-align.h>
-#include <dl-tunables.h>
#include <hugepages.h>
+static enum thp_mode_t thp_mode = thp_mode_not_supported;
+static size_t thp_pagesize;
+
ElfW (Addr)
-_dl_map_segment_align (const struct loadcmd *c, ElfW (Addr) p_align_max)
+_dl_map_segment_align (const struct loadcmd *c, ElfW (Addr) p_align_max,
+ bool thp_requested)
{
- static enum thp_mode_t thp_mode = thp_mode_not_supported;
- static unsigned long int thp_pagesize;
-
- if (TUNABLE_GET (glibc, elf, thp, int32_t, NULL) == 0)
+ if (!thp_requested)
return p_align_max;
if (__glibc_unlikely (thp_mode == thp_mode_not_supported
@@ -51,3 +51,75 @@ _dl_map_segment_align (const struct loadcmd *c, ElfW (Addr) p_align_max)
return p_align_max;
}
+
+void
+_dl_map_segment_adjust (struct loadcmd *load_cmds, size_t *n_load_cmds,
+ ElfW(Addr) max_align, bool thp_requested)
+
+{
+ size_t n = *n_load_cmds;
+
+ if (n == 1)
+ {
+ load_cmds[0].mapalign = max_align;
+ return;
+ }
+
+ if (!thp_requested || thp_pagesize == 0 || max_align == thp_pagesize)
+ {
+ /* If THP isn't enabled or the maximum p_align is the same as THP
+ page size, align all PT_LOAD segments to the maximum p_align. */
+thp_disabled:
+ for (size_t i = 0; i < n; i++)
+ load_cmds[i].mapalign = max_align;
+ return;
+ }
+
+ /* For a binary with program headers:
+
+ Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
+ LOAD 0x000000 0x00400000 0x00400000 0x16d110 0x16d110 R 0x1000
+ LOAD 0x16e000 0x0056e000 0x0056e000 0x1055d9 0x1055d9 R E 0x1000
+ LOAD 0x273ebc 0x00674ebc 0x00674ebc 0x02540 0x039fc RW 0x1000
+
+ combine the first 2 PT_LOAD segments, when the following conditions
+ are satisfied:
+
+ 1. The first segment file offset is 0.
+ 2. Both segments are read-only.
+ 3. The first segment load address is aligned to THP page size.
+ 4. There is no address gap between them.
+ 5. The total size >= THP page size.
+
+ into a single read-only and executable load segment:
+
+ LOAD 0x000000 0x00400000 0x00400000 0x2735d9 0x2735d9 R 0x200000
+ LOAD 0x273ebc 0x00674ebc 0x00674ebc 0x02540 0x039fc RW 0x1000
+ */
+
+ ElfW(Addr) mapstart = load_cmds[0].mapstart;
+ ElfW(Addr) mapend = load_cmds[1].mapend;
+ int prot = load_cmds[0].prot | load_cmds[1].prot;
+ if (load_cmds[0].mapoff != 0
+ || (prot & PROT_WRITE) != 0
+ || (mapstart & (thp_pagesize - 1)) != 0
+ || load_cmds[0].mapend != load_cmds[1].mapstart
+ || load_cmds[1].mapoff != (load_cmds[0].mapend - mapstart)
+ || (mapend - mapstart) < thp_pagesize)
+ goto thp_disabled;
+
+ /* Combine the first 2 PT_LOAD segments. */
+ load_cmds[0].mapend = mapend;
+ load_cmds[0].dataend = load_cmds[1].dataend;
+ load_cmds[0].allocend = load_cmds[1].allocend;
+ load_cmds[0].mapalign = thp_pagesize;
+ load_cmds[0].prot = prot;
+
+ n--;
+ *n_load_cmds = n;
+ for (size_t i = 1; i < n; i++)
+ {
+ load_cmds[i] = load_cmds[i + 1];
+ load_cmds[i].mapalign = max_align;
+ }
+}
@@ -1,4 +1,4 @@
-/* _dl_map_segment_align. Linux version.
+/* _dl_map_segment_align and _dl_map_segment_adjust. Linux version.
Copyright (C) 2026 Free Software Foundation, Inc.
Copyright The GNU Toolchain Authors.
This file is part of the GNU C Library.
@@ -23,5 +23,7 @@
# define DL_MAP_DEFAULT_THP_PAGESIZE 0
#endif
-ElfW (Addr)
- _dl_map_segment_align (const struct loadcmd *c, ElfW (Addr) p_align_max);
+extern ElfW (Addr) _dl_map_segment_align
+ (const struct loadcmd *, ElfW (Addr), bool) attribute_hidden;
+extern void _dl_map_segment_adjust
+ (struct loadcmd *, size_t *, ElfW(Addr), bool) attribute_hidden;
--
2.53.0