[v2,1/1] elf: align the mapping address of LOAD segments with p_align
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
dj/TryBot-32bit |
success
|
Build for i686
|
Commit Message
Now, ld.so always map the LOAD segments and aligned by base
page size (e.g. 4k in x86 or 4k, 16k and 64k in arm64). This
is a bug, and had been reported:
https://sourceware.org/bugzilla/show_bug.cgi?id=28676
This patch mainly to fix it. In this patch, ld.so can align
the mapping address of the first LOAD segment with p_align
when p_align is greater than the current base page size.
A testcase:
main.c:
extern void dso_test(void);
int main(void)
{
dso_test();
getchar();
return 0;
}
load.c, used to generate libload.so:
int foo __attribute__((aligned(0x200000))) = 1;
void dso_test(void)
{
printf("dso test\n");
printf("foo: %p\n", &foo);
}
The steps:
$ gcc -O2 -fPIC -c -o load.o load.c
$ gcc -shared -Wl,-z,max-page-size=0x200000 -o libload.so load.o
$ gcc -no-pie -Wl,-z,max-page-size=0x200000 -O2 -o dso main.c libload.so -Wl,-R,.
Before fixing:
$ ./dso
dso test
foo: 0xffff88ae2000
After fixed:
$ ./dso
dso test
foo: 0xffff9e000000
And this fix can help code segments use huge pages become
simple and available.
Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
---
elf/dl-load.c | 1 +
elf/dl-map-segments.h | 63 +++++++++++++++++++++++++++++++++++++++----
include/link.h | 3 +++
3 files changed, 62 insertions(+), 5 deletions(-)
Comments
On Wed, Dec 8, 2021 at 9:57 PM Rongwei Wang
<rongwei.wang@linux.alibaba.com> wrote:
>
> Now, ld.so always map the LOAD segments and aligned by base
> page size (e.g. 4k in x86 or 4k, 16k and 64k in arm64). This
> is a bug, and had been reported:
>
> https://sourceware.org/bugzilla/show_bug.cgi?id=28676
>
> This patch mainly to fix it. In this patch, ld.so can align
> the mapping address of the first LOAD segment with p_align
> when p_align is greater than the current base page size.
>
> A testcase:
> main.c:
>
> extern void dso_test(void);
> int main(void)
> {
> dso_test();
> getchar();
>
> return 0;
> }
>
> load.c, used to generate libload.so:
>
> int foo __attribute__((aligned(0x200000))) = 1;
> void dso_test(void)
> {
> printf("dso test\n");
> printf("foo: %p\n", &foo);
> }
>
> The steps:
> $ gcc -O2 -fPIC -c -o load.o load.c
> $ gcc -shared -Wl,-z,max-page-size=0x200000 -o libload.so load.o
> $ gcc -no-pie -Wl,-z,max-page-size=0x200000 -O2 -o dso main.c libload.so -Wl,-R,.
>
> Before fixing:
> $ ./dso
> dso test
> foo: 0xffff88ae2000
>
> After fixed:
> $ ./dso
> dso test
> foo: 0xffff9e000000
>
> And this fix can help code segments use huge pages become
> simple and available.
Please include a testcase, like
https://gitlab.com/x86-glibc/glibc/-/commits/users/hjl/pr28676/master
> Signed-off-by: Xu Yu <xuyu@linux.alibaba.com>
> Signed-off-by: Rongwei Wang <rongwei.wang@linux.alibaba.com>
> ---
> elf/dl-load.c | 1 +
> elf/dl-map-segments.h | 63 +++++++++++++++++++++++++++++++++++++++----
> include/link.h | 3 +++
> 3 files changed, 62 insertions(+), 5 deletions(-)
>
> diff --git a/elf/dl-load.c b/elf/dl-load.c
> index e39980fb19..136cfe2fa8 100644
> --- a/elf/dl-load.c
> +++ b/elf/dl-load.c
> @@ -1154,6 +1154,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
> c->dataend = ph->p_vaddr + ph->p_filesz;
> c->allocend = ph->p_vaddr + ph->p_memsz;
> c->mapoff = ALIGN_DOWN (ph->p_offset, GLRO(dl_pagesize));
> + l->l_load_align = ph->p_align;
Can you add an alignment field to
/* This structure describes one PT_LOAD command.
Its details have been expanded out and converted. */
struct loadcmd
{
ElfW(Addr) mapstart, mapend, dataend, allocend;
ElfW(Off) mapoff;
int prot; /* PROT_* bits. */
};
instead?
>
> /* Determine whether there is a gap between the last segment
> and this one. */
> diff --git a/elf/dl-map-segments.h b/elf/dl-map-segments.h
> index ac9f09ab4c..fad98eb984 100644
> --- a/elf/dl-map-segments.h
> +++ b/elf/dl-map-segments.h
> @@ -18,6 +18,48 @@
>
> #include <dl-load.h>
>
> +static __always_inline void *
> +_dl_map_segments_align (const struct loadcmd *c,
> + ElfW(Addr) mappref, int fd, size_t alignment,
> + const size_t maplength)
> +{
> + unsigned long map_start, map_start_align, map_end;
Use ElfW(Addr) instead of long.
> + unsigned long maplen = (maplength >= alignment) ?
> + (maplength + alignment) : (2 * alignment);
> +
> + /* Allocate enough space to ensure that address aligned by
> + p_align is included. */
> + map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplen,
> + PROT_NONE,
> + MAP_ANONYMOUS | MAP_PRIVATE,
> + -1, 0);
> + if (__glibc_unlikely ((void *) map_start == MAP_FAILED))
> + {
> + /* If mapping a aligned address failed, then ... */
an aligned
> + map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
> + c->prot,
> + MAP_COPY|MAP_FILE,
> + fd, c->mapoff);
> +
> + return (void *) map_start;
> + }
> + map_start_align = ALIGN_UP(map_start, alignment);
> + map_end = map_start_align + maplength;
> +
> + /* Remember which part of the address space this object uses. */
> + map_start_align = (ElfW(Addr)) __mmap ((void *) map_start_align, maplength,
> + c->prot,
> + MAP_COPY|MAP_FILE|MAP_FIXED,
> + fd, c->mapoff);
> + if (__glibc_unlikely ((void *) map_start_align == MAP_FAILED))
> + return MAP_FAILED;
> + if (map_start_align > map_start)
> + __munmap((void *)map_start, map_start_align - map_start);
> + __munmap((void *)map_end, map_start + maplen - map_end);
> +
> + return (void *) map_start_align;
> +}
> +
> /* This implementation assumes (as does the corresponding implementation
> of _dl_unmap_segments, in dl-unmap-segments.h) that shared objects
> are always laid out with all segments contiguous (or with gaps
> @@ -52,11 +94,22 @@ _dl_map_segments (struct link_map *l, int fd,
> c->mapstart & GLRO(dl_use_load_bias))
> - MAP_BASE_ADDR (l));
>
> - /* Remember which part of the address space this object uses. */
> - l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
> - c->prot,
> - MAP_COPY|MAP_FILE,
> - fd, c->mapoff);
> + /* During mapping, align the mapping address of the LOAD segments
> + according to own p_align. This helps OS map its code segment to
> + huge pages. */
> + if (l->l_load_align > GLRO(dl_pagesize))
> + {
> + l->l_map_start = (ElfW(Addr)) _dl_map_segments_align (c, mappref,
> + fd, l->l_load_align, maplength);
> + }
> + else
> + {
> + /* Remember which part of the address space this object uses. */
> + l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
> + c->prot,
> + MAP_COPY|MAP_FILE,
> + fd, c->mapoff);
> + }
Don't use {} for a single statement.
> if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
> return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
>
> diff --git a/include/link.h b/include/link.h
> index aea268439c..fc6ce29fab 100644
> --- a/include/link.h
> +++ b/include/link.h
> @@ -298,6 +298,9 @@ struct link_map
>
> /* Thread-local storage related info. */
>
> + /* Alignment requirement of the LOAD block. */
> + size_t l_load_align;
> +
> /* Start of the initialization image. */
> void *l_tls_initimage;
> /* Size of the initialization image. */
> --
> 2.27.0
>
@@ -1154,6 +1154,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
c->dataend = ph->p_vaddr + ph->p_filesz;
c->allocend = ph->p_vaddr + ph->p_memsz;
c->mapoff = ALIGN_DOWN (ph->p_offset, GLRO(dl_pagesize));
+ l->l_load_align = ph->p_align;
/* Determine whether there is a gap between the last segment
and this one. */
@@ -18,6 +18,48 @@
#include <dl-load.h>
+static __always_inline void *
+_dl_map_segments_align (const struct loadcmd *c,
+ ElfW(Addr) mappref, int fd, size_t alignment,
+ const size_t maplength)
+{
+ unsigned long map_start, map_start_align, map_end;
+ unsigned long maplen = (maplength >= alignment) ?
+ (maplength + alignment) : (2 * alignment);
+
+ /* Allocate enough space to ensure that address aligned by
+ p_align is included. */
+ map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplen,
+ PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ -1, 0);
+ if (__glibc_unlikely ((void *) map_start == MAP_FAILED))
+ {
+ /* If mapping a aligned address failed, then ... */
+ map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
+ c->prot,
+ MAP_COPY|MAP_FILE,
+ fd, c->mapoff);
+
+ return (void *) map_start;
+ }
+ map_start_align = ALIGN_UP(map_start, alignment);
+ map_end = map_start_align + maplength;
+
+ /* Remember which part of the address space this object uses. */
+ map_start_align = (ElfW(Addr)) __mmap ((void *) map_start_align, maplength,
+ c->prot,
+ MAP_COPY|MAP_FILE|MAP_FIXED,
+ fd, c->mapoff);
+ if (__glibc_unlikely ((void *) map_start_align == MAP_FAILED))
+ return MAP_FAILED;
+ if (map_start_align > map_start)
+ __munmap((void *)map_start, map_start_align - map_start);
+ __munmap((void *)map_end, map_start + maplen - map_end);
+
+ return (void *) map_start_align;
+}
+
/* This implementation assumes (as does the corresponding implementation
of _dl_unmap_segments, in dl-unmap-segments.h) that shared objects
are always laid out with all segments contiguous (or with gaps
@@ -52,11 +94,22 @@ _dl_map_segments (struct link_map *l, int fd,
c->mapstart & GLRO(dl_use_load_bias))
- MAP_BASE_ADDR (l));
- /* Remember which part of the address space this object uses. */
- l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
- c->prot,
- MAP_COPY|MAP_FILE,
- fd, c->mapoff);
+ /* During mapping, align the mapping address of the LOAD segments
+ according to own p_align. This helps OS map its code segment to
+ huge pages. */
+ if (l->l_load_align > GLRO(dl_pagesize))
+ {
+ l->l_map_start = (ElfW(Addr)) _dl_map_segments_align (c, mappref,
+ fd, l->l_load_align, maplength);
+ }
+ else
+ {
+ /* Remember which part of the address space this object uses. */
+ l->l_map_start = (ElfW(Addr)) __mmap ((void *) mappref, maplength,
+ c->prot,
+ MAP_COPY|MAP_FILE,
+ fd, c->mapoff);
+ }
if (__glibc_unlikely ((void *) l->l_map_start == MAP_FAILED))
return DL_MAP_SEGMENTS_ERROR_MAP_SEGMENT;
@@ -298,6 +298,9 @@ struct link_map
/* Thread-local storage related info. */
+ /* Alignment requirement of the LOAD block. */
+ size_t l_load_align;
+
/* Start of the initialization image. */
void *l_tls_initimage;
/* Size of the initialization image. */