x86_64: Simplify elf_machine_{load_address,dynamic}
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
dj/TryBot-32bit |
success
|
Build for i686
|
Commit Message
and drop reliance on _GLOBAL_OFFSET_TABLE_[0].
&__ehdr_start is a better way to get the load address.
---
sysdeps/x86_64/dl-machine.h | 25 +++++++++----------------
1 file changed, 9 insertions(+), 16 deletions(-)
Comments
On Mon, Aug 16, 2021 at 11:24 PM Fangrui Song via Libc-alpha
<libc-alpha@sourceware.org> wrote:
>
> and drop reliance on _GLOBAL_OFFSET_TABLE_[0].
>
> &__ehdr_start is a better way to get the load address.
> ---
> sysdeps/x86_64/dl-machine.h | 25 +++++++++----------------
> 1 file changed, 9 insertions(+), 16 deletions(-)
>
> diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> index a8596aa3fa..ceee50734e 100644
> --- a/sysdeps/x86_64/dl-machine.h
> +++ b/sysdeps/x86_64/dl-machine.h
> @@ -35,27 +35,20 @@ elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
> }
>
>
> -/* Return the link-time address of _DYNAMIC. Conveniently, this is the
> - first element of the GOT. This must be inlined in a function which
> - uses global data. */
> -static inline ElfW(Addr) __attribute__ ((unused))
> -elf_machine_dynamic (void)
> -{
> - /* This produces an IP-relative reloc which is resolved at link time. */
> - extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
> - return _GLOBAL_OFFSET_TABLE_[0];
> -}
> -
> -
> /* Return the run-time load address of the shared object. */
> static inline ElfW(Addr) __attribute__ ((unused))
> elf_machine_load_address (void)
> {
> - /* Compute the difference between the runtime address of _DYNAMIC as seen
> - by an IP-relative reference, and the link-time address found in the
> - special unrelocated first GOT entry. */
> + extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
> + return (ElfW(Addr)) &__ehdr_start;
> +}
> +
> +/* Return the link-time address of _DYNAMIC. */
> +static inline ElfW(Addr) __attribute__ ((unused))
> +elf_machine_dynamic (void)
> +{
> extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
> - return (ElfW(Addr)) &_DYNAMIC - elf_machine_dynamic ();
> + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
> }
>
> /* Set up the loaded object described by L so its unrelocated PLT
> --
> 2.33.0.rc1.237.g0d66db33f3-goog
>
Please provide comparison of ld.so with and without the change, in
terms of code size, code sequence as well as dynamic relocation.
Thanks.
On 2021-08-17, H.J. Lu wrote:
>On Mon, Aug 16, 2021 at 11:24 PM Fangrui Song via Libc-alpha
><libc-alpha@sourceware.org> wrote:
>>
>> and drop reliance on _GLOBAL_OFFSET_TABLE_[0].
>>
>> &__ehdr_start is a better way to get the load address.
>> ---
>> sysdeps/x86_64/dl-machine.h | 25 +++++++++----------------
>> 1 file changed, 9 insertions(+), 16 deletions(-)
>>
>> diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
>> index a8596aa3fa..ceee50734e 100644
>> --- a/sysdeps/x86_64/dl-machine.h
>> +++ b/sysdeps/x86_64/dl-machine.h
>> @@ -35,27 +35,20 @@ elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
>> }
>>
>>
>> -/* Return the link-time address of _DYNAMIC. Conveniently, this is the
>> - first element of the GOT. This must be inlined in a function which
>> - uses global data. */
>> -static inline ElfW(Addr) __attribute__ ((unused))
>> -elf_machine_dynamic (void)
>> -{
>> - /* This produces an IP-relative reloc which is resolved at link time. */
>> - extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
>> - return _GLOBAL_OFFSET_TABLE_[0];
>> -}
>> -
>> -
>> /* Return the run-time load address of the shared object. */
>> static inline ElfW(Addr) __attribute__ ((unused))
>> elf_machine_load_address (void)
>> {
>> - /* Compute the difference between the runtime address of _DYNAMIC as seen
>> - by an IP-relative reference, and the link-time address found in the
>> - special unrelocated first GOT entry. */
>> + extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
>> + return (ElfW(Addr)) &__ehdr_start;
>> +}
>> +
>> +/* Return the link-time address of _DYNAMIC. */
>> +static inline ElfW(Addr) __attribute__ ((unused))
>> +elf_machine_dynamic (void)
>> +{
>> extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
>> - return (ElfW(Addr)) &_DYNAMIC - elf_machine_dynamic ();
>> + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
>> }
>>
>> /* Set up the loaded object described by L so its unrelocated PLT
>> --
>> 2.33.0.rc1.237.g0d66db33f3-goog
>>
>
>Please provide comparison of ld.so with and without the change, in
>terms of code size, code sequence as well as dynamic relocation.
>
>Thanks.
Neither form has dynamic relocations.
If both elf_machine_{load_address,dynamic} are emitted, the new form is
two bytes longer but has fewer GOT loads.
0000000000000050 <elf_machine_dynamic>:
50: movq (%rip), %rax # 0x57 <elf_machine_dynamic+0x7>
0000000000000053: R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_-0x4
57: retq
58: nopl (%rax,%rax)
0000000000000060 <elf_machine_load_address>:
60: leaq (%rip), %rax # 0x67 <elf_machine_load_address+0x7>
0000000000000063: R_X86_64_PC32 _DYNAMIC-0x4
67: subq (%rip), %rax # 0x6e <elf_machine_load_address+0xe>
000000000000006a: R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_-0x4
6e: retq
---
0000000000000050 <elf_machine_load_address>:
50: leaq (%rip), %rax # 0x57 <elf_machine_load_address+0x7>
0000000000000053: R_X86_64_PC32 __ehdr_start-0x4
57: retq
58: nopl (%rax,%rax)
0000000000000060 <elf_machine_dynamic>:
60: leaq (%rip), %rdx # 0x67 <elf_machine_dynamic+0x7>
0000000000000063: R_X86_64_PC32 __ehdr_start-0x4
67: leaq (%rip), %rax # 0x6e <elf_machine_dynamic+0xe>
000000000000006a: R_X86_64_PC32 _DYNAMIC-0x4
6e: subq %rdx, %rax
71: retq
If inlined, as what actually happens when GCC generates elf/rtld.os,
the new form is actually 3 bytes shorter. The difference is nearly
nothing when an align directive is added.
cf6: shlq $32, %rdx
cfa: orq %rdx, %rax
cfd: leaq (%rip), %rdx # 0xd04 <_dl_start+0x24>
0000000000000d00: R_X86_64_PC32 _DYNAMIC-0x4
d04: movq %rax, (%rip) # 0xd0b <_dl_start+0x2b>
0000000000000d07: R_X86_64_PC32 .data.rel.ro+0x7c
d0b: movq (%rip), %rax # 0xd12 <_dl_start+0x32>
0000000000000d0e: R_X86_64_PC32 _DYNAMIC-0x4
d12: movq %rdx, %r12
d15: subq (%rip), %r12 # 0xd1c <_dl_start+0x3c>
0000000000000d18: R_X86_64_GOTPC32 _GLOBAL_OFFSET_TABLE_-0x4
d1c: movq %rdx, (%rip) # 0xd23 <_dl_start+0x43>
0000000000000d1f: R_X86_64_PC32 _rtld_local+0xa14
d23: movq %r12, (%rip) # 0xd2a <_dl_start+0x4a>
0000000000000d26: R_X86_64_PC32 _rtld_local+0xa04
d2a: testq %rax, %rax
d2d: je 0xdd0 <_dl_start+0xf0>
d33: movl $1879048191, %edi # imm = 0x6FFFFFFF
d38: movl $1879047679, %r10d # imm = 0x6FFFFDFF
d3e: movl $1879047935, %ebx # imm = 0x6FFFFEFF
d43: movl $1879048001, %r13d # imm = 0x6FFFFF41
d49: leaq (%rip), %rcx # 0xd50 <_dl_start+0x70>
0000000000000d4c: R_X86_64_PC32 _rtld_local+0xa44
d50: movl $1879047733, %r11d # imm = 0x6FFFFE35
d56: movl $50, %r9d
d5c: movl $1879048226, %r8d # imm = 0x70000022
d62: jmp 0xd82 <_dl_start+0xa2>
---
cf6: leaq (%rip), %r13 # 0xcfd <_dl_start+0x1d>
0000000000000cf9: R_X86_64_PC32 __ehdr_start-0x4
cfd: movq %r13, (%rip) # 0xd04 <_dl_start+0x24>
0000000000000d00: R_X86_64_PC32 _rtld_local+0xa04
d04: shlq $32, %rdx
d08: orq %rdx, %rax
d0b: leaq (%rip), %rdx # 0xd12 <_dl_start+0x32>
0000000000000d0e: R_X86_64_PC32 _DYNAMIC-0x4
d12: movq %rax, (%rip) # 0xd19 <_dl_start+0x39>
0000000000000d15: R_X86_64_PC32 .data.rel.ro+0x7c
d19: movq (%rip), %rax # 0xd20 <_dl_start+0x40>
0000000000000d1c: R_X86_64_PC32 _DYNAMIC-0x4
d20: movq %rdx, (%rip) # 0xd27 <_dl_start+0x47>
0000000000000d23: R_X86_64_PC32 _rtld_local+0xa14
d27: testq %rax, %rax
d2a: je 0xdd0 <_dl_start+0xf0>
d30: movl $1879048191, %edi # imm = 0x6FFFFFFF
d35: movl $1879047679, %r10d # imm = 0x6FFFFDFF
d3b: movl $1879047935, %ebx # imm = 0x6FFFFEFF
d40: movl $1879048001, %r14d # imm = 0x6FFFFF41
d46: leaq (%rip), %rcx # 0xd4d <_dl_start+0x6d>
0000000000000d49: R_X86_64_PC32 _rtld_local+0xa44
d4d: movl $1879047733, %r11d # imm = 0x6FFFFE35
d53: movl $50, %r9d
d59: movl $1879048226, %r8d # imm = 0x70000022
d5f: jmp 0xd82 <_dl_start+0xa2>
I think the main merit is to drop reliance on _GLOBAL_OFFSET_TABLE_[0].
(Newer ports can learn from the existing x86-64/aarch64 ports that _GLOBAL_OFFSET_TABLE_[0] doesn't need to do anything special.)
On Tue, Aug 17, 2021 at 5:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, Aug 16, 2021 at 11:24 PM Fangrui Song via Libc-alpha
> <libc-alpha@sourceware.org> wrote:
> >
> > and drop reliance on _GLOBAL_OFFSET_TABLE_[0].
> >
> > &__ehdr_start is a better way to get the load address.
> > ---
> > sysdeps/x86_64/dl-machine.h | 25 +++++++++----------------
> > 1 file changed, 9 insertions(+), 16 deletions(-)
> >
> > diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
> > index a8596aa3fa..ceee50734e 100644
> > --- a/sysdeps/x86_64/dl-machine.h
> > +++ b/sysdeps/x86_64/dl-machine.h
> > @@ -35,27 +35,20 @@ elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
> > }
> >
> >
> > -/* Return the link-time address of _DYNAMIC. Conveniently, this is the
> > - first element of the GOT. This must be inlined in a function which
> > - uses global data. */
> > -static inline ElfW(Addr) __attribute__ ((unused))
> > -elf_machine_dynamic (void)
> > -{
> > - /* This produces an IP-relative reloc which is resolved at link time. */
> > - extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
> > - return _GLOBAL_OFFSET_TABLE_[0];
> > -}
> > -
> > -
> > /* Return the run-time load address of the shared object. */
> > static inline ElfW(Addr) __attribute__ ((unused))
> > elf_machine_load_address (void)
> > {
> > - /* Compute the difference between the runtime address of _DYNAMIC as seen
> > - by an IP-relative reference, and the link-time address found in the
> > - special unrelocated first GOT entry. */
> > + extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
> > + return (ElfW(Addr)) &__ehdr_start;
> > +}
> > +
> > +/* Return the link-time address of _DYNAMIC. */
> > +static inline ElfW(Addr) __attribute__ ((unused))
> > +elf_machine_dynamic (void)
> > +{
> > extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
> > - return (ElfW(Addr)) &_DYNAMIC - elf_machine_dynamic ();
> > + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
> > }
> >
> > /* Set up the loaded object described by L so its unrelocated PLT
> > --
> > 2.33.0.rc1.237.g0d66db33f3-goog
> >
>
> Please provide comparison of ld.so with and without the change, in
> terms of code size, code sequence as well as dynamic relocation.
LGTM.
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
Thanks.
@@ -35,27 +35,20 @@ elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
}
-/* Return the link-time address of _DYNAMIC. Conveniently, this is the
- first element of the GOT. This must be inlined in a function which
- uses global data. */
-static inline ElfW(Addr) __attribute__ ((unused))
-elf_machine_dynamic (void)
-{
- /* This produces an IP-relative reloc which is resolved at link time. */
- extern const ElfW(Addr) _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
- return _GLOBAL_OFFSET_TABLE_[0];
-}
-
-
/* Return the run-time load address of the shared object. */
static inline ElfW(Addr) __attribute__ ((unused))
elf_machine_load_address (void)
{
- /* Compute the difference between the runtime address of _DYNAMIC as seen
- by an IP-relative reference, and the link-time address found in the
- special unrelocated first GOT entry. */
+ extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
+ return (ElfW(Addr)) &__ehdr_start;
+}
+
+/* Return the link-time address of _DYNAMIC. */
+static inline ElfW(Addr) __attribute__ ((unused))
+elf_machine_dynamic (void)
+{
extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
- return (ElfW(Addr)) &_DYNAMIC - elf_machine_dynamic ();
+ return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
}
/* Set up the loaded object described by L so its unrelocated PLT