[v3] elf: Rewrite long RESOLVE_MAP macro to a debug friendly function
Checks
Context |
Check |
Description |
dj/TryBot-apply_patch |
success
|
Patch applied to master at the time it was sent
|
dj/TryBot-32bit |
success
|
Build for i686
|
Commit Message
On Вт, 2022-05-03 at 13:12 -0700, Fāng-ruì Sòng wrote:
> The function has internal linkage (static) and it seems that the
> prevailing style doesn't use `_dl_` prefix for such functions.
Ok, I renamed the function by removing the prefix. I also added keyword
`inline` so that GCC could really inline this new function. GCC relies
on `--param max-inline-insns-auto` (which defaults to 15) when the
compiler decides to inline ordinary function, and `--param max-inline-
insns-single` (70 by default) is used for functions with the keyword.
-- >8 --
A static function that may be inlined is way better to find where exactly a
crash happens. So one can step into the function with GDB. The macro still
remains for compatibility with other code.
Signed-off-by: Nicholas Guriev <nicholas@guriev.su>
---
elf/dl-reloc.c | 56 ++++++++++++++++++++++++++++++--------------------
1 file changed, 34 insertions(+), 22 deletions(-)
Comments
On 07/05/2022 19:03, Nicholas Guriev wrote:
> On Вт, 2022-05-03 at 13:12 -0700, Fāng-ruì Sòng wrote:
>> The function has internal linkage (static) and it seems that the
>> prevailing style doesn't use `_dl_` prefix for such functions.
>
> Ok, I renamed the function by removing the prefix. I also added keyword
> `inline` so that GCC could really inline this new function. GCC relies
> on `--param max-inline-insns-auto` (which defaults to 15) when the
> compiler decides to inline ordinary function, and `--param max-inline-
> insns-single` (70 by default) is used for functions with the keyword.
(Not a full review, leaving that for someone else; just a nit I noticed)
Please use __always_inline instead, which adds the inline keyword as
well as __attribute__ ((inline)). That way gcc ensures that the
function is *always* inlined.
Thanks,
Siddhesh
> -- >8 --
>
> A static function that may be inlined is way better to find where exactly a
> crash happens. So one can step into the function with GDB. The macro still
> remains for compatibility with other code.
>
> Signed-off-by: Nicholas Guriev <nicholas@guriev.su>
> ---
> elf/dl-reloc.c | 56 ++++++++++++++++++++++++++++++--------------------
> 1 file changed, 34 insertions(+), 22 deletions(-)
>
> diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c
> index 771a34bd14..10affc4d48 100644
> --- a/elf/dl-reloc.c
> +++ b/elf/dl-reloc.c
> @@ -162,29 +162,41 @@ _dl_nothread_init_static_tls (struct link_map *map)
> }
> #endif /* !PTHREAD_IN_LIBC */
>
> +static inline lookup_t
> +resolve_map (lookup_t l, struct r_scope_elem *scope[], const ElfW(Sym) **ref,
> + const struct r_found_version *version, unsigned long int r_type)
> +{
> + if (ELFW(ST_BIND) ((*ref)->st_info) == STB_LOCAL
> + || __glibc_unlikely (dl_symbol_visibility_binds_local_p (*ref)))
> + return l;
> +
> + if (__glibc_unlikely (*ref == l->l_lookup_cache.sym)
> + && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class)
> + {
> + bump_num_cache_relocations ();
> + *ref = l->l_lookup_cache.ret;
> + }
> + else
> + {
> + int tc = elf_machine_type_class (r_type);
> + l->l_lookup_cache.type_class = tc;
> + l->l_lookup_cache.sym = *ref;
> + const char *undef_name
> + = (const char *) D_PTR (l, l_info[DT_STRTAB]) + (*ref)->st_name;
> + const struct r_found_version *v = NULL;
> + if (version != NULL && version->hash != 0)
> + v = version;
> + lookup_t lr = _dl_lookup_symbol_x (undef_name, l, ref, scope, v, tc,
> + DL_LOOKUP_ADD_DEPENDENCY
> + | DL_LOOKUP_FOR_RELOCATE, NULL);
> + l->l_lookup_cache.ret = *ref;
> + l->l_lookup_cache.value = lr;
> + }
> + return l->l_lookup_cache.value;
> +}
> +
> /* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */
> -#define RESOLVE_MAP(l, scope, ref, version, r_type) \
> - ((ELFW(ST_BIND) ((*ref)->st_info) != STB_LOCAL \
> - && __glibc_likely (!dl_symbol_visibility_binds_local_p (*ref))) \
> - ? ((__glibc_unlikely ((*ref) == l->l_lookup_cache.sym) \
> - && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class) \
> - ? (bump_num_cache_relocations (), \
> - (*ref) = l->l_lookup_cache.ret, \
> - l->l_lookup_cache.value) \
> - : ({ lookup_t _lr; \
> - int _tc = elf_machine_type_class (r_type); \
> - l->l_lookup_cache.type_class = _tc; \
> - l->l_lookup_cache.sym = (*ref); \
> - const struct r_found_version *v = NULL; \
> - if ((version) != NULL && (version)->hash != 0) \
> - v = (version); \
> - _lr = _dl_lookup_symbol_x ((const char *) D_PTR (l, l_info[DT_STRTAB]) + (*ref)->st_name, \
> - l, (ref), scope, v, _tc, \
> - DL_LOOKUP_ADD_DEPENDENCY \
> - | DL_LOOKUP_FOR_RELOCATE, NULL); \
> - l->l_lookup_cache.ret = (*ref); \
> - l->l_lookup_cache.value = _lr; })) \
> - : l)
> +#define RESOLVE_MAP resolve_map
>
> #include "dynamic-link.h"
>
On Пн, 2022-05-09 at 19:08 +0530, Siddhesh Poyarekar wrote:
> Please use __always_inline instead, which adds the inline keyword as
> well as __attribute__ ((inline)). That way gcc ensures that the
> function is *always* inlined.
That is not necessary. My original intention was to improve the
debugging experience at this point. And the standard `inline` keyword
allows one to control optimizations with the -O flag.
On 2022-05-14, Nicholas Guriev wrote:
>On Пн, 2022-05-09 at 19:08 +0530, Siddhesh Poyarekar wrote:
>> Please use __always_inline instead, which adds the inline keyword as
>> well as __attribute__ ((inline)). That way gcc ensures that the
>> function is *always* inlined.
>
>That is not necessary. My original intention was to improve the
>debugging experience at this point. And the standard `inline` keyword
>allows one to control optimizations with the -O flag.
>
Looks good to me, I think the always_inline here does not affect
performance, as shown in my testing with a large PIE with many
R_*_RELATIVE (function call overhead is larger with a non-R_*_RELATIVE).
I will wait a bit and can push the commit in case Nicholas does not have
write access.
Reviewed-by: Fangrui Song <maskray@google.com>
On 15/05/2022 04:18, Fangrui Song wrote:
> On 2022-05-14, Nicholas Guriev wrote:
>> On Пн, 2022-05-09 at 19:08 +0530, Siddhesh Poyarekar wrote:
>>> Please use __always_inline instead, which adds the inline keyword as
>>> well as __attribute__ ((inline)). That way gcc ensures that the
>>> function is *always* inlined.
>>
>> That is not necessary. My original intention was to improve the
>> debugging experience at this point. And the standard `inline` keyword
>> allows one to control optimizations with the -O flag.
You can't build glibc with anything less than -O2, so that's a moot
point. If you don't use __always_inline here then you're not only
changing the debugging experience, but also potentially adding a
performance overhead. gdb can do well enough in most cases of inlined
functions, so __always_inlined should not hamper that.
> Looks good to me, I think the always_inline here does not affect
> performance, as shown in my testing with a large PIE with many
> R_*_RELATIVE (function call overhead is larger with a non-R_*_RELATIVE).
If you want to make a decision based on this then please quantify it
more precisely with a microbenchmark added to benchtests. Otherwise
just add __always_inline because it is a more minimal change. TBH I'd
prefer the former because it then adds a new microbenchmark that we can
measure dynamic linker changes in future but I can live with the latter.
Thanks,
Siddhesh
On 2022-05-15, Siddhesh Poyarekar wrote:
>On 15/05/2022 04:18, Fangrui Song wrote:
>>On 2022-05-14, Nicholas Guriev wrote:
>>>On Пн, 2022-05-09 at 19:08 +0530, Siddhesh Poyarekar wrote:
>>>>Please use __always_inline instead, which adds the inline keyword as
>>>>well as __attribute__ ((inline)). That way gcc ensures that the
>>>>function is *always* inlined.
>>>
>>>That is not necessary. My original intention was to improve the
>>>debugging experience at this point. And the standard `inline` keyword
>>>allows one to control optimizations with the -O flag.
>
>You can't build glibc with anything less than -O2, so that's a moot
>point. If you don't use __always_inline here then you're not only
>changing the debugging experience, but also potentially adding a
>performance overhead. gdb can do well enough in most cases of inlined
>functions, so __always_inlined should not hamper that.
Hope that the -O2 requirement can be lifted:)
The original decision might be related to guaranteed inlining and relocations in -O0
but it should be revisited nowadays... (Both FreeBSD rtld and musl rtld allow -O0).
>>Looks good to me, I think the always_inline here does not affect
>>performance, as shown in my testing with a large PIE with many
>>R_*_RELATIVE (function call overhead is larger with a non-R_*_RELATIVE).
>
>If you want to make a decision based on this then please quantify it
>more precisely with a microbenchmark added to benchtests. Otherwise
>just add __always_inline because it is a more minimal change. TBH I'd
>prefer the former because it then adds a new microbenchmark that we
>can measure dynamic linker changes in future but I can live with the
>latter.
Thanks. I can add __always_inline.
(I don't think it matters but I guess I just want to avoid a debate :))
On 23/05/2022 03:54, Fangrui Song wrote:
> Hope that the -O2 requirement can be lifted:)
>
> The original decision might be related to guaranteed inlining and
> relocations in -O0
> but it should be revisited nowadays... (Both FreeBSD rtld and musl rtld
> allow -O0).
I don't know TBH, maybe it's something Andreas, Carlos or someone else
who's been around longer may know.
>>> Looks good to me, I think the always_inline here does not affect
>>> performance, as shown in my testing with a large PIE with many
>>> R_*_RELATIVE (function call overhead is larger with a non-R_*_RELATIVE).
>>
>> If you want to make a decision based on this then please quantify it
>> more precisely with a microbenchmark added to benchtests. Otherwise
>> just add __always_inline because it is a more minimal change. TBH I'd
>> prefer the former because it then adds a new microbenchmark that we
>> can measure dynamic linker changes in future but I can live with the
>> latter.
>
> Thanks. I can add __always_inline.
> (I don't think it matters but I guess I just want to avoid a debate :))
>
Bummer, I was hoping you'd write the benchmark; we don't nearly have
enough microbenchmarks for the linker :)
Siddhesh
On 22/05/2022 19:24, Fangrui Song via Libc-alpha wrote:
> On 2022-05-15, Siddhesh Poyarekar wrote:
>> On 15/05/2022 04:18, Fangrui Song wrote:
>>> On 2022-05-14, Nicholas Guriev wrote:
>>>> On Пн, 2022-05-09 at 19:08 +0530, Siddhesh Poyarekar wrote:
>>>>> Please use __always_inline instead, which adds the inline keyword as
>>>>> well as __attribute__ ((inline)). That way gcc ensures that the
>>>>> function is *always* inlined.
>>>>
>>>> That is not necessary. My original intention was to improve the
>>>> debugging experience at this point. And the standard `inline` keyword
>>>> allows one to control optimizations with the -O flag.
>>
>> You can't build glibc with anything less than -O2, so that's a moot point. If you don't use __always_inline here then you're not only changing the debugging experience, but also potentially adding a performance overhead. gdb can do well enough in most cases of inlined functions, so __always_inlined should not hamper that.
>
> Hope that the -O2 requirement can be lifted:)
>
> The original decision might be related to guaranteed inlining and relocations in -O0
> but it should be revisited nowadays... (Both FreeBSD rtld and musl rtld allow -O0).
I tried some time ago, before the recent refactors, and the loader still fails at
bootstrapping (I have not dig into to pinpoint exactly what has failed). But
I agree it would be good to support it, we also might need to fix some objecting
pulling from loader (since -O0 messes with symbols and the script to decide what
to pull).
>
>>> Looks good to me, I think the always_inline here does not affect
>>> performance, as shown in my testing with a large PIE with many
>>> R_*_RELATIVE (function call overhead is larger with a non-R_*_RELATIVE).
>>
>> If you want to make a decision based on this then please quantify it more precisely with a microbenchmark added to benchtests. Otherwise just add __always_inline because it is a more minimal change. TBH I'd prefer the former because it then adds a new microbenchmark that we can measure dynamic linker changes in future but I can live with the latter.
>
> Thanks. I can add __always_inline.
> (I don't think it matters but I guess I just want to avoid a debate :))
I think the main issue is we build glibc with -fgnu89-inline, which is required
due some optimizations where a function is defined without inline, but then
it has an inline definition to be used internally.
Also, since we don't use -Winline we can't assure that compiler won't emit the
function definition where gcc documents it might [1]. So I think one exercise
we might do it to remove all __always_inline__, and add -Winline to see which
functions, if any, won't be inline by compiler.
I would like also to eventually remove -fgnu89-inline, since I think we can
restructure the code to not rely on extern inlines nor on the internal inline
optimizations. Also, it seems that although clang seems to support
-fgnu89-inline, it has subtle different semantics that breaks some internal
glibc assumptions.
[1] https://gcc.gnu.org/onlinedocs/gcc/Inline.html
On 23/05/2022 18:05, Adhemerval Zanella wrote:
> I think the main issue is we build glibc with -fgnu89-inline, which is required
> due some optimizations where a function is defined without inline, but then
> it has an inline definition to be used internally.
I'm not sure I understand, could you please elaborate?
> Also, since we don't use -Winline we can't assure that compiler won't emit the
> function definition where gcc documents it might [1]. So I think one exercise
> we might do it to remove all __always_inline__, and add -Winline to see which
> functions, if any, won't be inline by compiler.
How would that help though? That output is bound to change as the
compiler or even the code base changes since the decision to inline
(when __always_inline__ is not specified) is determined heuristically.
In my understanding, the point of __always_inline__ use in the sources
is to make inlining deterministic.
> I would like also to eventually remove -fgnu89-inline, since I think we can
> restructure the code to not rely on extern inlines nor on the internal inline
> optimizations. Also, it seems that although clang seems to support
> -fgnu89-inline, it has subtle different semantics that breaks some internal
> glibc assumptions.
Could you elaborate on this too?
Thanks,
Siddhesh
On 23/05/2022 10:02, Siddhesh Poyarekar wrote:
> On 23/05/2022 18:05, Adhemerval Zanella wrote:
>> I think the main issue is we build glibc with -fgnu89-inline, which is required
>> due some optimizations where a function is defined without inline, but then
>> it has an inline definition to be used internally.
>
> I'm not sure I understand, could you please elaborate?
I was in fact referring to another issues (the -fgnu89-inline) where it would
only affect either 'extern inline' or 'inline' functions.
>
>> Also, since we don't use -Winline we can't assure that compiler won't emit the
>> function definition where gcc documents it might [1]. So I think one exercise
>> we might do it to remove all __always_inline__, and add -Winline to see which
>> functions, if any, won't be inline by compiler.
>
> How would that help though? That output is bound to change as the compiler or even the code base changes since the decision to inline (when __always_inline__ is not specified) is determined heuristically. In my understanding, the point of __always_inline__ use in the sources is to make inlining deterministic.
From ed159672eb3cd650a32b7e5cb4d5ec1fe0e63802 I take that even always_inline
might fail for some reason. So we need to keep using always_inline on function
that need to be inline to get a compiler warning if it can not be done.
I think by using -Winline with stardand C99 static inline, besides being
slight more portable code is also less error prone (since it is one less
annotation to take care of). It would be slight better to setup -Winline
per TU, instead of global since we only need always_inline semantic for
specific usages.
In the end I think both a pretty equivalent.
>
>> I would like also to eventually remove -fgnu89-inline, since I think we can
>> restructure the code to not rely on extern inlines nor on the internal inline
>> optimizations. Also, it seems that although clang seems to support
>> -fgnu89-inline, it has subtle different semantics that breaks some internal
>> glibc assumptions.
>
> Could you elaborate on this too?
In fact revising my clang branch, I disable extern inlines because another clang
issues; not because it handles -fgnu89-inline differently. Sorry for the noise.
On 23/05/2022 21:52, Adhemerval Zanella wrote:
> From ed159672eb3cd650a32b7e5cb4d5ec1fe0e63802 I take that even always_inline
> might fail for some reason. So we need to keep using always_inline on function
> that need to be inline to get a compiler warning if it can not be done.
>
> I think by using -Winline with stardand C99 static inline, besides being
> slight more portable code is also less error prone (since it is one less
> annotation to take care of). It would be slight better to setup -Winline
> per TU, instead of global since we only need always_inline semantic for
> specific usages.
>
> In the end I think both a pretty equivalent.
That is surprising, and kinda defeats the "always" in the always_inline.
The documentation for the attribute also states that failure to inline
a function is diagnosed as an error[1], except when the function is
called indirectly and maybe some more context is needed for
ed159672eb3cd650a32b7e5cb4d5ec1fe0e63802 to understand the nature of the
warnings.
So I can't agree yet that it's equivalent, but I do agree that the
always_inline semantic is for specific use cases.
Thanks,
Siddhesh
[1] https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html
On 23/05/2022 13:33, Siddhesh Poyarekar wrote:
> On 23/05/2022 21:52, Adhemerval Zanella wrote:
>> From ed159672eb3cd650a32b7e5cb4d5ec1fe0e63802 I take that even always_inline
>> might fail for some reason. So we need to keep using always_inline on function
>> that need to be inline to get a compiler warning if it can not be done.
>>
>> I think by using -Winline with stardand C99 static inline, besides being
>> slight more portable code is also less error prone (since it is one less
>> annotation to take care of). It would be slight better to setup -Winline
>> per TU, instead of global since we only need always_inline semantic for
>> specific usages.
>>
>> In the end I think both a pretty equivalent.
>
> That is surprising, and kinda defeats the "always" in the always_inline. The documentation for the attribute also states that failure to inline a function is diagnosed as an error[1], except when the function is called indirectly and maybe some more context is needed for ed159672eb3cd650a32b7e5cb4d5ec1fe0e63802 to understand the nature of the warnings.
I think ed159672eb3cd650a32b7e5cb4d5ec1fe0e63802 change makes sense once we
commit to use __always_inline where applicable.
>
> So I can't agree yet that it's equivalent, but I do agree that the always_inline semantic is for specific use cases.
They are not indeed, it seems that __always_inline will try harder to inline
different even for specific usages that 'static inline' might fail:
$ cat inline.c
extern int bar (char *, int *);
static inline
#ifdef USE_ALWAYS_INLINE
__attribute__ ((__always_inline__))
#endif
int func (int *a)
{
char *p = __builtin_alloca (128);
return bar (p, a);
}
int foo (int *a)
{
return func (a);
}
$ gcc -Wall -O2 -std=gnu11 inline.c -c -S -o inline.S
$ gcc -Wall -O2 -std=gnu11 inline.c -c -S -o always_inline.S -DUSE_ALWAYS_INLINE
$ diff -u inline.S always_inline.S
--- inline.S 2022-05-23 13:52:50.617859866 -0300
+++ always_inline.S 2022-05-23 13:52:55.157865832 -0300
@@ -1,10 +1,12 @@
.file "inline.c"
.text
.p2align 4
- .type func, @function
-func:
-.LFB0:
+ .globl foo
+ .type foo, @function
+foo:
+.LFB1:
.cfi_startproc
+ endbr64
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
@@ -40,17 +42,6 @@
.cfi_restore_state
call __stack_chk_fail@PLT
.cfi_endproc
-.LFE0:
- .size func, .-func
- .p2align 4
- .globl foo
- .type foo, @function
-foo:
-.LFB1:
- .cfi_startproc
- endbr64
- jmp func
- .cfi_endproc
.LFE1:
.size foo, .-foo
.ident "GCC: (Ubuntu 11.2.0-19ubuntu1) 11.2.0"
So '__attribute__ ((__always_inline__))' will force inline in such cases where
static inline might not. So I agree that we need __always_inline and using the
C99 static inline might not be suffice.
>
> Thanks,
> Siddhesh
>
> [1] https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html
@@ -162,29 +162,41 @@ _dl_nothread_init_static_tls (struct link_map *map)
}
#endif /* !PTHREAD_IN_LIBC */
+static inline lookup_t
+resolve_map (lookup_t l, struct r_scope_elem *scope[], const ElfW(Sym) **ref,
+ const struct r_found_version *version, unsigned long int r_type)
+{
+ if (ELFW(ST_BIND) ((*ref)->st_info) == STB_LOCAL
+ || __glibc_unlikely (dl_symbol_visibility_binds_local_p (*ref)))
+ return l;
+
+ if (__glibc_unlikely (*ref == l->l_lookup_cache.sym)
+ && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class)
+ {
+ bump_num_cache_relocations ();
+ *ref = l->l_lookup_cache.ret;
+ }
+ else
+ {
+ int tc = elf_machine_type_class (r_type);
+ l->l_lookup_cache.type_class = tc;
+ l->l_lookup_cache.sym = *ref;
+ const char *undef_name
+ = (const char *) D_PTR (l, l_info[DT_STRTAB]) + (*ref)->st_name;
+ const struct r_found_version *v = NULL;
+ if (version != NULL && version->hash != 0)
+ v = version;
+ lookup_t lr = _dl_lookup_symbol_x (undef_name, l, ref, scope, v, tc,
+ DL_LOOKUP_ADD_DEPENDENCY
+ | DL_LOOKUP_FOR_RELOCATE, NULL);
+ l->l_lookup_cache.ret = *ref;
+ l->l_lookup_cache.value = lr;
+ }
+ return l->l_lookup_cache.value;
+}
+
/* This macro is used as a callback from the ELF_DYNAMIC_RELOCATE code. */
-#define RESOLVE_MAP(l, scope, ref, version, r_type) \
- ((ELFW(ST_BIND) ((*ref)->st_info) != STB_LOCAL \
- && __glibc_likely (!dl_symbol_visibility_binds_local_p (*ref))) \
- ? ((__glibc_unlikely ((*ref) == l->l_lookup_cache.sym) \
- && elf_machine_type_class (r_type) == l->l_lookup_cache.type_class) \
- ? (bump_num_cache_relocations (), \
- (*ref) = l->l_lookup_cache.ret, \
- l->l_lookup_cache.value) \
- : ({ lookup_t _lr; \
- int _tc = elf_machine_type_class (r_type); \
- l->l_lookup_cache.type_class = _tc; \
- l->l_lookup_cache.sym = (*ref); \
- const struct r_found_version *v = NULL; \
- if ((version) != NULL && (version)->hash != 0) \
- v = (version); \
- _lr = _dl_lookup_symbol_x ((const char *) D_PTR (l, l_info[DT_STRTAB]) + (*ref)->st_name, \
- l, (ref), scope, v, _tc, \
- DL_LOOKUP_ADD_DEPENDENCY \
- | DL_LOOKUP_FOR_RELOCATE, NULL); \
- l->l_lookup_cache.ret = (*ref); \
- l->l_lookup_cache.value = _lr; })) \
- : l)
+#define RESOLVE_MAP resolve_map
#include "dynamic-link.h"