i386: Port elf_machine_{load_address,dynamic} from x86-64
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
dj/TryBot-32bit |
success
|
Build for i686
|
Commit Message
This drops reliance on _GLOBAL_OFFSET_TABLE_[0] being the link-time
address of _DYNAMIC.
The code sequence length does not change.
---
sysdeps/i386/dl-machine.h | 29 +++++++++++------------------
1 file changed, 11 insertions(+), 18 deletions(-)
Comments
On Thu, Sep 23, 2021 at 9:26 PM Fangrui Song <maskray@google.com> wrote:
>
> This drops reliance on _GLOBAL_OFFSET_TABLE_[0] being the link-time
> address of _DYNAMIC.
>
> The code sequence length does not change.
> ---
> sysdeps/i386/dl-machine.h | 29 +++++++++++------------------
> 1 file changed, 11 insertions(+), 18 deletions(-)
>
> diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
> index 590b41d8d7..9f0eeaf66e 100644
> --- a/sysdeps/i386/dl-machine.h
> +++ b/sysdeps/i386/dl-machine.h
> @@ -34,27 +34,20 @@ elf_machine_matches_host (const Elf32_Ehdr *ehdr)
> }
>
>
> -/* Return the link-time address of _DYNAMIC. Conveniently, this is the
> - first element of the GOT, a special entry that is never relocated. */
> -static inline Elf32_Addr __attribute__ ((unused, const))
> -elf_machine_dynamic (void)
> -{
> - /* This produces a GOTOFF reloc that resolves to zero at link time, so in
> - fact just loads from the GOT register directly. By doing it without
> - an asm we can let the compiler choose any register. */
> - extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
> - return _GLOBAL_OFFSET_TABLE_[0];
> -}
> -
> /* Return the run-time load address of the shared object. */
> -static inline Elf32_Addr __attribute__ ((unused))
> +static inline ElfW(Addr) __attribute__ ((unused))
> elf_machine_load_address (void)
> {
> - /* Compute the difference between the runtime address of _DYNAMIC as seen
> - by a GOTOFF reference, and the link-time address found in the special
> - unrelocated first GOT entry. */
> - extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden;
> - return (Elf32_Addr) &bygotoff - elf_machine_dynamic ();
> + extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
> + return (ElfW(Addr)) &__ehdr_start;
> +}
> +
> +/* Return the link-time address of _DYNAMIC. */
> +static inline ElfW(Addr) __attribute__ ((unused))
> +elf_machine_dynamic (void)
> +{
> + extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
> + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
> }
>
> /* Set up the loaded object described by L so its unrelocated PLT
> --
> 2.33.0.685.g46640cef36-goog
>
what are the code differences before and after?
On 2021-09-23, H.J. Lu wrote:
>On Thu, Sep 23, 2021 at 9:26 PM Fangrui Song <maskray@google.com> wrote:
>>
>> This drops reliance on _GLOBAL_OFFSET_TABLE_[0] being the link-time
>> address of _DYNAMIC.
>>
>> The code sequence length does not change.
>> ---
>> sysdeps/i386/dl-machine.h | 29 +++++++++++------------------
>> 1 file changed, 11 insertions(+), 18 deletions(-)
>>
>> diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
>> index 590b41d8d7..9f0eeaf66e 100644
>> --- a/sysdeps/i386/dl-machine.h
>> +++ b/sysdeps/i386/dl-machine.h
>> @@ -34,27 +34,20 @@ elf_machine_matches_host (const Elf32_Ehdr *ehdr)
>> }
>>
>>
>> -/* Return the link-time address of _DYNAMIC. Conveniently, this is the
>> - first element of the GOT, a special entry that is never relocated. */
>> -static inline Elf32_Addr __attribute__ ((unused, const))
>> -elf_machine_dynamic (void)
>> -{
>> - /* This produces a GOTOFF reloc that resolves to zero at link time, so in
>> - fact just loads from the GOT register directly. By doing it without
>> - an asm we can let the compiler choose any register. */
>> - extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
>> - return _GLOBAL_OFFSET_TABLE_[0];
>> -}
>> -
>> /* Return the run-time load address of the shared object. */
>> -static inline Elf32_Addr __attribute__ ((unused))
>> +static inline ElfW(Addr) __attribute__ ((unused))
>> elf_machine_load_address (void)
>> {
>> - /* Compute the difference between the runtime address of _DYNAMIC as seen
>> - by a GOTOFF reference, and the link-time address found in the special
>> - unrelocated first GOT entry. */
>> - extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden;
>> - return (Elf32_Addr) &bygotoff - elf_machine_dynamic ();
>> + extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
>> + return (ElfW(Addr)) &__ehdr_start;
>> +}
>> +
>> +/* Return the link-time address of _DYNAMIC. */
>> +static inline ElfW(Addr) __attribute__ ((unused))
>> +elf_machine_dynamic (void)
>> +{
>> + extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
>> + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
>> }
>>
>> /* Set up the loaded object described by L so its unrelocated PLT
>> --
>> 2.33.0.685.g46640cef36-goog
>>
>
>what are the code differences before and after?
long ehdr(void) {
extern char __ehdr_start[] __attribute__((visibility("hidden")));
return (long)__ehdr_start;
}
long got(void) {
extern long _GLOBAL_OFFSET_TABLE_[] __attribute__((visibility("hidden")));
return _GLOBAL_OFFSET_TABLE_[0];
}
ehdr:
call __x86.get_pc_thunk.ax
addl $_GLOBAL_OFFSET_TABLE_, %eax
leal __ehdr_start@GOTOFF(%eax), %eax
ret
got:
call __x86.get_pc_thunk.ax
addl $_GLOBAL_OFFSET_TABLE_, %eax
movl _GLOBAL_OFFSET_TABLE_@GOTOFF(%eax), %eax
ret
In GCC generated elf/rtld.os, the local code sequence related to __ehdr_start/_GLOBAL_OFFSET_TABLE_
does not change its size, but globally the new code triggers some code motion
and eventually makes the file smaller.
FWIW ld.so:.text is 48 bytes smaller.
New code doesn't have memory load. I guess it may allow GCC to optimize more.
On Thu, Sep 23, 2021 at 9:26 PM Fangrui Song <maskray@google.com> wrote:
>
> This drops reliance on _GLOBAL_OFFSET_TABLE_[0] being the link-time
> address of _DYNAMIC.
>
> The code sequence length does not change.
> ---
> sysdeps/i386/dl-machine.h | 29 +++++++++++------------------
> 1 file changed, 11 insertions(+), 18 deletions(-)
>
> diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
> index 590b41d8d7..9f0eeaf66e 100644
> --- a/sysdeps/i386/dl-machine.h
> +++ b/sysdeps/i386/dl-machine.h
> @@ -34,27 +34,20 @@ elf_machine_matches_host (const Elf32_Ehdr *ehdr)
> }
>
>
> -/* Return the link-time address of _DYNAMIC. Conveniently, this is the
> - first element of the GOT, a special entry that is never relocated. */
> -static inline Elf32_Addr __attribute__ ((unused, const))
> -elf_machine_dynamic (void)
> -{
> - /* This produces a GOTOFF reloc that resolves to zero at link time, so in
> - fact just loads from the GOT register directly. By doing it without
> - an asm we can let the compiler choose any register. */
> - extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
> - return _GLOBAL_OFFSET_TABLE_[0];
> -}
> -
> /* Return the run-time load address of the shared object. */
> -static inline Elf32_Addr __attribute__ ((unused))
> +static inline ElfW(Addr) __attribute__ ((unused))
> elf_machine_load_address (void)
> {
> - /* Compute the difference between the runtime address of _DYNAMIC as seen
> - by a GOTOFF reference, and the link-time address found in the special
> - unrelocated first GOT entry. */
> - extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden;
> - return (Elf32_Addr) &bygotoff - elf_machine_dynamic ();
> + extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
> + return (ElfW(Addr)) &__ehdr_start;
> +}
> +
> +/* Return the link-time address of _DYNAMIC. */
> +static inline ElfW(Addr) __attribute__ ((unused))
> +elf_machine_dynamic (void)
> +{
> + extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
> + return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
> }
Please use Elf32 instead of ElfW.
> /* Set up the loaded object described by L so its unrelocated PLT
> --
> 2.33.0.685.g46640cef36-goog
>
@@ -34,27 +34,20 @@ elf_machine_matches_host (const Elf32_Ehdr *ehdr)
}
-/* Return the link-time address of _DYNAMIC. Conveniently, this is the
- first element of the GOT, a special entry that is never relocated. */
-static inline Elf32_Addr __attribute__ ((unused, const))
-elf_machine_dynamic (void)
-{
- /* This produces a GOTOFF reloc that resolves to zero at link time, so in
- fact just loads from the GOT register directly. By doing it without
- an asm we can let the compiler choose any register. */
- extern const Elf32_Addr _GLOBAL_OFFSET_TABLE_[] attribute_hidden;
- return _GLOBAL_OFFSET_TABLE_[0];
-}
-
/* Return the run-time load address of the shared object. */
-static inline Elf32_Addr __attribute__ ((unused))
+static inline ElfW(Addr) __attribute__ ((unused))
elf_machine_load_address (void)
{
- /* Compute the difference between the runtime address of _DYNAMIC as seen
- by a GOTOFF reference, and the link-time address found in the special
- unrelocated first GOT entry. */
- extern Elf32_Dyn bygotoff[] asm ("_DYNAMIC") attribute_hidden;
- return (Elf32_Addr) &bygotoff - elf_machine_dynamic ();
+ extern const ElfW(Ehdr) __ehdr_start attribute_hidden;
+ return (ElfW(Addr)) &__ehdr_start;
+}
+
+/* Return the link-time address of _DYNAMIC. */
+static inline ElfW(Addr) __attribute__ ((unused))
+elf_machine_dynamic (void)
+{
+ extern ElfW(Dyn) _DYNAMIC[] attribute_hidden;
+ return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address ();
}
/* Set up the loaded object described by L so its unrelocated PLT