[v2,1/4] misc: Optimize internal usage of __libc_single_threaded

Message ID 20220610163552.3587064-2-adhemerval.zanella@linaro.org
State Superseded
Headers
Series Simplify internal single-threaded usage |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Adhemerval Zanella June 10, 2022, 4:35 p.m. UTC
  By adding an internal hidden_def alias to avoid the GOT indirection.
On some architecture, __libc_single_thread may be accessed through
copy relocations and thus it requires to update both copies.

To obtain the correct address of the __libc_single_thread,
__libc_dlsym is extended to support RTLD_DEFAULT.  It searches
through all scope instead of default local ones.

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 elf/dl-libc.c                 | 20 ++++++++++++++++++--
 elf/libc_early_init.c         |  9 +++++++++
 include/sys/single_threaded.h | 11 +++++++++++
 misc/single_threaded.c        |  2 ++
 nptl/pthread_create.c         |  6 +++++-
 5 files changed, 45 insertions(+), 3 deletions(-)
  

Comments

Fangrui Song June 16, 2022, 7:15 a.m. UTC | #1
On 2022-06-10, Adhemerval Zanella via Libc-alpha wrote:
>By adding an internal hidden_def alias to avoid the GOT indirection.
>On some architecture, __libc_single_thread may be accessed through
>copy relocations and thus it requires to update both copies.
>
>To obtain the correct address of the __libc_single_thread,
>__libc_dlsym is extended to support RTLD_DEFAULT.  It searches
>through all scope instead of default local ones.
>
>Checked on x86_64-linux-gnu and i686-linux-gnu.
>---
> elf/dl-libc.c                 | 20 ++++++++++++++++++--
> elf/libc_early_init.c         |  9 +++++++++
> include/sys/single_threaded.h | 11 +++++++++++
> misc/single_threaded.c        |  2 ++
> nptl/pthread_create.c         |  6 +++++-
> 5 files changed, 45 insertions(+), 3 deletions(-)
>
>diff --git a/elf/dl-libc.c b/elf/dl-libc.c
>index 266e068da6..e64f4b9910 100644
>--- a/elf/dl-libc.c
>+++ b/elf/dl-libc.c
>@@ -16,6 +16,7 @@
>    License along with the GNU C Library; if not, see
>    <https://www.gnu.org/licenses/>.  */
>
>+#include <assert.h>
> #include <dlfcn.h>
> #include <stdlib.h>
> #include <ldsodefs.h>
>@@ -72,6 +73,7 @@ struct do_dlsym_args
>   /* Arguments to do_dlsym.  */
>   struct link_map *map;
>   const char *name;
>+  const void *caller_dlsym;
>
>   /* Return values of do_dlsym.  */
>   lookup_t loadbase;
>@@ -102,8 +104,21 @@ do_dlsym (void *ptr)
> {
>   struct do_dlsym_args *args = (struct do_dlsym_args *) ptr;
>   args->ref = NULL;
>-  args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, args->map, &args->ref,
>-					     args->map->l_local_scope, NULL, 0,
>+  struct link_map *match = args->map;
>+  struct r_scope_elem **scope;
>+  if (args->map == RTLD_DEFAULT)
>+    {
>+      ElfW(Addr) caller = (ElfW(Addr)) args->caller_dlsym;
>+      match = _dl_find_dso_for_object (caller);
>+      /* It is only used internally, so caller should be always recognized.  */
>+      assert (match != NULL);
>+      scope = match->l_scope;
>+    }
>+  else
>+    scope = args->map->l_local_scope;
>+
>+  args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, match, &args->ref,
>+					     scope, NULL, 0,
> 					     DL_LOOKUP_RETURN_NEWEST, NULL);
> }
>
>@@ -182,6 +197,7 @@ __libc_dlsym (void *map, const char *name)
>   struct do_dlsym_args args;
>   args.map = map;
>   args.name = name;
>+  args.caller_dlsym = RETURN_ADDRESS (0);
>
> #ifdef SHARED
>   if (GLRO (dl_dlfcn_hook) != NULL)
>diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
>index 3c4a19cf6b..7cc2997122 100644
>--- a/elf/libc_early_init.c
>+++ b/elf/libc_early_init.c
>@@ -16,7 +16,9 @@
>    License along with the GNU C Library; if not, see
>    <https://www.gnu.org/licenses/>.  */
>
>+#include <assert.h>
> #include <ctype.h>
>+#include <dlfcn.h>
> #include <elision-conf.h>
> #include <libc-early-init.h>
> #include <libc-internal.h>
>@@ -38,6 +40,13 @@ __libc_early_init (_Bool initial)
>   __libc_single_threaded = initial;
>
> #ifdef SHARED
>+  /* __libc_single_threaded can be accessed through copy relocations, so it
>+     requires to update the external copy.  */
>+  __libc_external_single_threaded = __libc_dlsym (RTLD_DEFAULT,
>+						  "__libc_single_threaded");
>+  assert (__libc_external_single_threaded != NULL);
>+  *__libc_external_single_threaded = initial;
>+
>   __libc_initial = initial;
> #endif

I think this whole scheme can be greatly simplified.

Under the hood,

extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __asm__("" "__libc_single_threaded"); extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __attribute__((alias ("" "__GI___libc_single_threaded"))) __attribute__ ((__copy__ (__libc_single_threaded))); 

* __libc_single_threaded is the STV_HIDDEN symbol with the asm name "__GI___libc_single_threaded"
* __EI___libc_single_threaded is the STV_DEFAULT symbol with the asm name "__libc_single_threaded". It aliases __libc_single_threaded.

We can just access __EI___libc_single_threaded which will lead to a GOT
indirection (R_*_GLOB_DAT).  This can avoid a __libc_dlsym call.

I can see that accessing the __EI declaration is currently inconvenient
because include/libc-symbols.h does not seem to provide a convenient
macro, but that be added.

>diff --git a/include/sys/single_threaded.h b/include/sys/single_threaded.h
>index 18f6972482..258b01e0b2 100644
>--- a/include/sys/single_threaded.h
>+++ b/include/sys/single_threaded.h
>@@ -1 +1,12 @@
> #include <misc/sys/single_threaded.h>
>+
>+#ifndef _ISOMAC
>+
>+libc_hidden_proto (__libc_single_threaded);
>+
>+# ifdef SHARED
>+extern __typeof (__libc_single_threaded) *__libc_external_single_threaded
>+  attribute_hidden;
>+# endif
>+
>+#endif
>diff --git a/misc/single_threaded.c b/misc/single_threaded.c
>index 96ada9137b..201d86a273 100644
>--- a/misc/single_threaded.c
>+++ b/misc/single_threaded.c
>@@ -22,6 +22,8 @@
>    __libc_early_init (as false for inner libcs).  */
> #ifdef SHARED
> char __libc_single_threaded;
>+__typeof (__libc_single_threaded) *__libc_external_single_threaded;
> #else
> char __libc_single_threaded = 1;
> #endif
>+libc_hidden_data_def (__libc_single_threaded)
>diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
>index e7a099acb7..5633d01c62 100644
>--- a/nptl/pthread_create.c
>+++ b/nptl/pthread_create.c
>@@ -627,7 +627,11 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
>   if (__libc_single_threaded)
>     {
>       late_init ();
>-      __libc_single_threaded = 0;
>+      __libc_single_threaded =
>+#ifdef SHARED
>+        *__libc_external_single_threaded =
>+#endif
>+	0;
>     }
>
>   const struct pthread_attr *iattr = (struct pthread_attr *) attr;
>-- 
>2.34.1
>
  
Adhemerval Zanella June 16, 2022, 10:06 p.m. UTC | #2
> On 16 Jun 2022, at 00:15, Fangrui Song <maskray@google.com> wrote:
> 
> On 2022-06-10, Adhemerval Zanella via Libc-alpha wrote:
>> By adding an internal hidden_def alias to avoid the GOT indirection.
>> On some architecture, __libc_single_thread may be accessed through
>> copy relocations and thus it requires to update both copies.
>> 
>> To obtain the correct address of the __libc_single_thread,
>> __libc_dlsym is extended to support RTLD_DEFAULT. It searches
>> through all scope instead of default local ones.
>> 
>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>> ---
>> elf/dl-libc.c | 20 ++++++++++++++++++--
>> elf/libc_early_init.c | 9 +++++++++
>> include/sys/single_threaded.h | 11 +++++++++++
>> misc/single_threaded.c | 2 ++
>> nptl/pthread_create.c | 6 +++++-
>> 5 files changed, 45 insertions(+), 3 deletions(-)
>> 
>> diff --git a/elf/dl-libc.c b/elf/dl-libc.c
>> index 266e068da6..e64f4b9910 100644
>> --- a/elf/dl-libc.c
>> +++ b/elf/dl-libc.c
>> @@ -16,6 +16,7 @@
>> License along with the GNU C Library; if not, see
>> <https://www.gnu.org/licenses/>. */
>> 
>> +#include <assert.h>
>> #include <dlfcn.h>
>> #include <stdlib.h>
>> #include <ldsodefs.h>
>> @@ -72,6 +73,7 @@ struct do_dlsym_args
>> /* Arguments to do_dlsym. */
>> struct link_map *map;
>> const char *name;
>> + const void *caller_dlsym;
>> 
>> /* Return values of do_dlsym. */
>> lookup_t loadbase;
>> @@ -102,8 +104,21 @@ do_dlsym (void *ptr)
>> {
>> struct do_dlsym_args *args = (struct do_dlsym_args *) ptr;
>> args->ref = NULL;
>> - args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, args->map, &args->ref,
>> -					 args->map->l_local_scope, NULL, 0,
>> + struct link_map *match = args->map;
>> + struct r_scope_elem **scope;
>> + if (args->map == RTLD_DEFAULT)
>> + {
>> + ElfW(Addr) caller = (ElfW(Addr)) args->caller_dlsym;
>> + match = _dl_find_dso_for_object (caller);
>> + /* It is only used internally, so caller should be always recognized. */
>> + assert (match != NULL);
>> + scope = match->l_scope;
>> + }
>> + else
>> + scope = args->map->l_local_scope;
>> +
>> + args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, match, &args->ref,
>> +					 scope, NULL, 0,
>> 					 DL_LOOKUP_RETURN_NEWEST, NULL);
>> }
>> 
>> @@ -182,6 +197,7 @@ __libc_dlsym (void *map, const char *name)
>> struct do_dlsym_args args;
>> args.map = map;
>> args.name = name;
>> + args.caller_dlsym = RETURN_ADDRESS (0);
>> 
>> #ifdef SHARED
>> if (GLRO (dl_dlfcn_hook) != NULL)
>> diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
>> index 3c4a19cf6b..7cc2997122 100644
>> --- a/elf/libc_early_init.c
>> +++ b/elf/libc_early_init.c
>> @@ -16,7 +16,9 @@
>> License along with the GNU C Library; if not, see
>> <https://www.gnu.org/licenses/>. */
>> 
>> +#include <assert.h>
>> #include <ctype.h>
>> +#include <dlfcn.h>
>> #include <elision-conf.h>
>> #include <libc-early-init.h>
>> #include <libc-internal.h>
>> @@ -38,6 +40,13 @@ __libc_early_init (_Bool initial)
>> __libc_single_threaded = initial;
>> 
>> #ifdef SHARED
>> + /* __libc_single_threaded can be accessed through copy relocations, so it
>> + requires to update the external copy. */
>> + __libc_external_single_threaded = __libc_dlsym (RTLD_DEFAULT,
>> +						 "__libc_single_threaded");
>> + assert (__libc_external_single_threaded != NULL);
>> + *__libc_external_single_threaded = initial;
>> +
>> __libc_initial = initial;
>> #endif
> 
> I think this whole scheme can be greatly simplified.
> 
> Under the hood,
> 
> extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __asm__("" "__libc_single_threaded"); extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __attribute__((alias ("" "__GI___libc_single_threaded"))) __attribute__ ((__copy__ (__libc_single_threaded))); 
> * __libc_single_threaded is the STV_HIDDEN symbol with the asm name "__GI___libc_single_threaded"
> * __EI___libc_single_threaded is the STV_DEFAULT symbol with the asm name "__libc_single_threaded". It aliases __libc_single_threaded.

In fact libc_hidden_proto for SHARED will result in:

extern __typeof (__libc_single_threaded) __libc_single_threaded __asm__ ("" "__GI___libc_single_threaded") __attribute__ ((visibility ("hidden")));;

So using __libc_single_threaded will be the most simple way.

> 
> We can just access __EI___libc_single_threaded which will lead to a GOT
> indirection (R_*_GLOB_DAT). This can avoid a __libc_dlsym call.

The idea is exactly to avoid the GOT indirection.

> 
> I can see that accessing the __EI declaration is currently inconvenient
> because include/libc-symbols.h does not seem to provide a convenient
> macro, but that be added.
> 
>> diff --git a/include/sys/single_threaded.h b/include/sys/single_threaded.h
>> index 18f6972482..258b01e0b2 100644
>> --- a/include/sys/single_threaded.h
>> +++ b/include/sys/single_threaded.h
>> @@ -1 +1,12 @@
>> #include <misc/sys/single_threaded.h>
>> +
>> +#ifndef _ISOMAC
>> +
>> +libc_hidden_proto (__libc_single_threaded);
>> +
>> +# ifdef SHARED
>> +extern __typeof (__libc_single_threaded) *__libc_external_single_threaded
>> + attribute_hidden;
>> +# endif
>> +
>> +#endif
>> diff --git a/misc/single_threaded.c b/misc/single_threaded.c
>> index 96ada9137b..201d86a273 100644
>> --- a/misc/single_threaded.c
>> +++ b/misc/single_threaded.c
>> @@ -22,6 +22,8 @@
>> __libc_early_init (as false for inner libcs). */
>> #ifdef SHARED
>> char __libc_single_threaded;
>> +__typeof (__libc_single_threaded) *__libc_external_single_threaded;
>> #else
>> char __libc_single_threaded = 1;
>> #endif
>> +libc_hidden_data_def (__libc_single_threaded)
>> diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
>> index e7a099acb7..5633d01c62 100644
>> --- a/nptl/pthread_create.c
>> +++ b/nptl/pthread_create.c
>> @@ -627,7 +627,11 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
>> if (__libc_single_threaded)
>> {
>> late_init ();
>> - __libc_single_threaded = 0;
>> + __libc_single_threaded =
>> +#ifdef SHARED
>> + *__libc_external_single_threaded =
>> +#endif
>> +	0;
>> }
>> 
>> const struct pthread_attr *iattr = (struct pthread_attr *) attr;
>> -- 
>> 2.34.1
  
Fangrui Song June 16, 2022, 10:30 p.m. UTC | #3
On 2022-06-16, Adhemerval Zanella wrote:
>
>
>> On 16 Jun 2022, at 00:15, Fangrui Song <maskray@google.com> wrote:
>>
>> On 2022-06-10, Adhemerval Zanella via Libc-alpha wrote:
>>> By adding an internal hidden_def alias to avoid the GOT indirection.
>>> On some architecture, __libc_single_thread may be accessed through
>>> copy relocations and thus it requires to update both copies.
>>>
>>> To obtain the correct address of the __libc_single_thread,
>>> __libc_dlsym is extended to support RTLD_DEFAULT. It searches
>>> through all scope instead of default local ones.
>>>
>>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>>> ---
>>> elf/dl-libc.c | 20 ++++++++++++++++++--
>>> elf/libc_early_init.c | 9 +++++++++
>>> include/sys/single_threaded.h | 11 +++++++++++
>>> misc/single_threaded.c | 2 ++
>>> nptl/pthread_create.c | 6 +++++-
>>> 5 files changed, 45 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/elf/dl-libc.c b/elf/dl-libc.c
>>> index 266e068da6..e64f4b9910 100644
>>> --- a/elf/dl-libc.c
>>> +++ b/elf/dl-libc.c
>>> @@ -16,6 +16,7 @@
>>> License along with the GNU C Library; if not, see
>>> <https://www.gnu.org/licenses/>. */
>>>
>>> +#include <assert.h>
>>> #include <dlfcn.h>
>>> #include <stdlib.h>
>>> #include <ldsodefs.h>
>>> @@ -72,6 +73,7 @@ struct do_dlsym_args
>>> /* Arguments to do_dlsym. */
>>> struct link_map *map;
>>> const char *name;
>>> + const void *caller_dlsym;
>>>
>>> /* Return values of do_dlsym. */
>>> lookup_t loadbase;
>>> @@ -102,8 +104,21 @@ do_dlsym (void *ptr)
>>> {
>>> struct do_dlsym_args *args = (struct do_dlsym_args *) ptr;
>>> args->ref = NULL;
>>> - args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, args->map, &args->ref,
>>> -					 args->map->l_local_scope, NULL, 0,
>>> + struct link_map *match = args->map;
>>> + struct r_scope_elem **scope;
>>> + if (args->map == RTLD_DEFAULT)
>>> + {
>>> + ElfW(Addr) caller = (ElfW(Addr)) args->caller_dlsym;
>>> + match = _dl_find_dso_for_object (caller);
>>> + /* It is only used internally, so caller should be always recognized. */
>>> + assert (match != NULL);
>>> + scope = match->l_scope;
>>> + }
>>> + else
>>> + scope = args->map->l_local_scope;
>>> +
>>> + args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, match, &args->ref,
>>> +					 scope, NULL, 0,
>>> 					 DL_LOOKUP_RETURN_NEWEST, NULL);
>>> }
>>>
>>> @@ -182,6 +197,7 @@ __libc_dlsym (void *map, const char *name)
>>> struct do_dlsym_args args;
>>> args.map = map;
>>> args.name = name;
>>> + args.caller_dlsym = RETURN_ADDRESS (0);
>>>
>>> #ifdef SHARED
>>> if (GLRO (dl_dlfcn_hook) != NULL)
>>> diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
>>> index 3c4a19cf6b..7cc2997122 100644
>>> --- a/elf/libc_early_init.c
>>> +++ b/elf/libc_early_init.c
>>> @@ -16,7 +16,9 @@
>>> License along with the GNU C Library; if not, see
>>> <https://www.gnu.org/licenses/>. */
>>>
>>> +#include <assert.h>
>>> #include <ctype.h>
>>> +#include <dlfcn.h>
>>> #include <elision-conf.h>
>>> #include <libc-early-init.h>
>>> #include <libc-internal.h>
>>> @@ -38,6 +40,13 @@ __libc_early_init (_Bool initial)
>>> __libc_single_threaded = initial;
>>>
>>> #ifdef SHARED
>>> + /* __libc_single_threaded can be accessed through copy relocations, so it
>>> + requires to update the external copy. */
>>> + __libc_external_single_threaded = __libc_dlsym (RTLD_DEFAULT,
>>> +						 "__libc_single_threaded");
>>> + assert (__libc_external_single_threaded != NULL);
>>> + *__libc_external_single_threaded = initial;
>>> +
>>> __libc_initial = initial;
>>> #endif
>>
>> I think this whole scheme can be greatly simplified.
>>
>> Under the hood,
>>
>> extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __asm__("" "__libc_single_threaded"); extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __attribute__((alias ("" "__GI___libc_single_threaded"))) __attribute__ ((__copy__ (__libc_single_threaded)));
>> * __libc_single_threaded is the STV_HIDDEN symbol with the asm name "__GI___libc_single_threaded"
>> * __EI___libc_single_threaded is the STV_DEFAULT symbol with the asm name "__libc_single_threaded". It aliases __libc_single_threaded.
>
>In fact libc_hidden_proto for SHARED will result in:
>
>extern __typeof (__libc_single_threaded) __libc_single_threaded __asm__ ("" "__GI___libc_single_threaded") __attribute__ ((visibility ("hidden")));;
>
>So using __libc_single_threaded will be the most simple way.

The code tries to read the initial value of the copy relocated __libc_single_threaded.
Accessing the STV_DEFAULT symbol does the trick and is simpler than __libc_dlsym.
(Accessing the STV_HIDDEN symbol just accesses libc.so's own copy.)

Note: The copy relocated __libc_single_threaded likely has a value of zero and is read-only from the executable.
I assume that this is a conservative and safe value.

>>
>> We can just access __EI___libc_single_threaded which will lead to a GOT
>> indirection (R_*_GLOB_DAT). This can avoid a __libc_dlsym call.
>
>The idea is exactly to avoid the GOT indirection.
>
>>
>> I can see that accessing the __EI declaration is currently inconvenient
>> because include/libc-symbols.h does not seem to provide a convenient
>> macro, but that be added.
>>
>>> diff --git a/include/sys/single_threaded.h b/include/sys/single_threaded.h
>>> index 18f6972482..258b01e0b2 100644
>>> --- a/include/sys/single_threaded.h
>>> +++ b/include/sys/single_threaded.h
>>> @@ -1 +1,12 @@
>>> #include <misc/sys/single_threaded.h>
>>> +
>>> +#ifndef _ISOMAC
>>> +
>>> +libc_hidden_proto (__libc_single_threaded);
>>> +
>>> +# ifdef SHARED
>>> +extern __typeof (__libc_single_threaded) *__libc_external_single_threaded
>>> + attribute_hidden;
>>> +# endif
>>> +
>>> +#endif
>>> diff --git a/misc/single_threaded.c b/misc/single_threaded.c
>>> index 96ada9137b..201d86a273 100644
>>> --- a/misc/single_threaded.c
>>> +++ b/misc/single_threaded.c
>>> @@ -22,6 +22,8 @@
>>> __libc_early_init (as false for inner libcs). */
>>> #ifdef SHARED
>>> char __libc_single_threaded;
>>> +__typeof (__libc_single_threaded) *__libc_external_single_threaded;
>>> #else
>>> char __libc_single_threaded = 1;
>>> #endif
>>> +libc_hidden_data_def (__libc_single_threaded)
>>> diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
>>> index e7a099acb7..5633d01c62 100644
>>> --- a/nptl/pthread_create.c
>>> +++ b/nptl/pthread_create.c
>>> @@ -627,7 +627,11 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
>>> if (__libc_single_threaded)
>>> {
>>> late_init ();
>>> - __libc_single_threaded = 0;
>>> + __libc_single_threaded =
>>> +#ifdef SHARED
>>> + *__libc_external_single_threaded =
>>> +#endif
>>> +	0;
>>> }
>>>
>>> const struct pthread_attr *iattr = (struct pthread_attr *) attr;
>>> --
>>> 2.34.1
>
  
Adhemerval Zanella June 17, 2022, 12:35 a.m. UTC | #4
> On 16 Jun 2022, at 15:30, Fangrui Song <maskray@google.com> wrote:
> 
> On 2022-06-16, Adhemerval Zanella wrote:
>> 
>> 
>>> On 16 Jun 2022, at 00:15, Fangrui Song <maskray@google.com> wrote:
>>> 
>>> On 2022-06-10, Adhemerval Zanella via Libc-alpha wrote:
>>>> By adding an internal hidden_def alias to avoid the GOT indirection.
>>>> On some architecture, __libc_single_thread may be accessed through
>>>> copy relocations and thus it requires to update both copies.
>>>> 
>>>> To obtain the correct address of the __libc_single_thread,
>>>> __libc_dlsym is extended to support RTLD_DEFAULT. It searches
>>>> through all scope instead of default local ones.
>>>> 
>>>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>>>> ---
>>>> elf/dl-libc.c | 20 ++++++++++++++++++--
>>>> elf/libc_early_init.c | 9 +++++++++
>>>> include/sys/single_threaded.h | 11 +++++++++++
>>>> misc/single_threaded.c | 2 ++
>>>> nptl/pthread_create.c | 6 +++++-
>>>> 5 files changed, 45 insertions(+), 3 deletions(-)
>>>> 
>>>> diff --git a/elf/dl-libc.c b/elf/dl-libc.c
>>>> index 266e068da6..e64f4b9910 100644
>>>> --- a/elf/dl-libc.c
>>>> +++ b/elf/dl-libc.c
>>>> @@ -16,6 +16,7 @@
>>>> License along with the GNU C Library; if not, see
>>>> <https://www.gnu.org/licenses/>. */
>>>> 
>>>> +#include <assert.h>
>>>> #include <dlfcn.h>
>>>> #include <stdlib.h>
>>>> #include <ldsodefs.h>
>>>> @@ -72,6 +73,7 @@ struct do_dlsym_args
>>>> /* Arguments to do_dlsym. */
>>>> struct link_map *map;
>>>> const char *name;
>>>> + const void *caller_dlsym;
>>>> 
>>>> /* Return values of do_dlsym. */
>>>> lookup_t loadbase;
>>>> @@ -102,8 +104,21 @@ do_dlsym (void *ptr)
>>>> {
>>>> struct do_dlsym_args *args = (struct do_dlsym_args *) ptr;
>>>> args->ref = NULL;
>>>> - args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, args->map, &args->ref,
>>>> -					 args->map->l_local_scope, NULL, 0,
>>>> + struct link_map *match = args->map;
>>>> + struct r_scope_elem **scope;
>>>> + if (args->map == RTLD_DEFAULT)
>>>> + {
>>>> + ElfW(Addr) caller = (ElfW(Addr)) args->caller_dlsym;
>>>> + match = _dl_find_dso_for_object (caller);
>>>> + /* It is only used internally, so caller should be always recognized. */
>>>> + assert (match != NULL);
>>>> + scope = match->l_scope;
>>>> + }
>>>> + else
>>>> + scope = args->map->l_local_scope;
>>>> +
>>>> + args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, match, &args->ref,
>>>> +					 scope, NULL, 0,
>>>> 					 DL_LOOKUP_RETURN_NEWEST, NULL);
>>>> }
>>>> 
>>>> @@ -182,6 +197,7 @@ __libc_dlsym (void *map, const char *name)
>>>> struct do_dlsym_args args;
>>>> args.map = map;
>>>> args.name = name;
>>>> + args.caller_dlsym = RETURN_ADDRESS (0);
>>>> 
>>>> #ifdef SHARED
>>>> if (GLRO (dl_dlfcn_hook) != NULL)
>>>> diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
>>>> index 3c4a19cf6b..7cc2997122 100644
>>>> --- a/elf/libc_early_init.c
>>>> +++ b/elf/libc_early_init.c
>>>> @@ -16,7 +16,9 @@
>>>> License along with the GNU C Library; if not, see
>>>> <https://www.gnu.org/licenses/>. */
>>>> 
>>>> +#include <assert.h>
>>>> #include <ctype.h>
>>>> +#include <dlfcn.h>
>>>> #include <elision-conf.h>
>>>> #include <libc-early-init.h>
>>>> #include <libc-internal.h>
>>>> @@ -38,6 +40,13 @@ __libc_early_init (_Bool initial)
>>>> __libc_single_threaded = initial;
>>>> 
>>>> #ifdef SHARED
>>>> + /* __libc_single_threaded can be accessed through copy relocations, so it
>>>> + requires to update the external copy. */
>>>> + __libc_external_single_threaded = __libc_dlsym (RTLD_DEFAULT,
>>>> +						 "__libc_single_threaded");
>>>> + assert (__libc_external_single_threaded != NULL);
>>>> + *__libc_external_single_threaded = initial;
>>>> +
>>>> __libc_initial = initial;
>>>> #endif
>>> 
>>> I think this whole scheme can be greatly simplified.
>>> 
>>> Under the hood,
>>> 
>>> extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __asm__("" "__libc_single_threaded"); extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __attribute__((alias ("" "__GI___libc_single_threaded"))) __attribute__ ((__copy__ (__libc_single_threaded)));
>>> * __libc_single_threaded is the STV_HIDDEN symbol with the asm name "__GI___libc_single_threaded"
>>> * __EI___libc_single_threaded is the STV_DEFAULT symbol with the asm name "__libc_single_threaded". It aliases __libc_single_threaded.
>> 
>> In fact libc_hidden_proto for SHARED will result in:
>> 
>> extern __typeof (__libc_single_threaded) __libc_single_threaded __asm__ ("" "__GI___libc_single_threaded") __attribute__ ((visibility ("hidden")));;
>> 
>> So using __libc_single_threaded will be the most simple way.
> 
> The code tries to read the initial value of the copy relocated __libc_single_threaded.
> Accessing the STV_DEFAULT symbol does the trick and is simpler than __libc_dlsym.
> (Accessing the STV_HIDDEN symbol just accesses libc.so's own copy.)

The glibc will still continue to use the __libc_single_threaded value internally, the 
__libc_dlsym is used to initialize __libc_external_single_threaded which is used to 
update the external one solely to avoid update not being visible on architectures that
use copy relocations.

This is actually not needed on some architectures, aarch64 for instance, but without
it x86 does not see the __libc_single_thread being updated.

> 
> Note: The copy relocated __libc_single_threaded likely has a value of zero and is read-only from the executable.
> I assume that this is a conservative and safe value.
> 

It can not be a read-only value because it should be updated by pthread_create.
  
Fangrui Song June 17, 2022, 8:35 p.m. UTC | #5
On 2022-06-16, Adhemerval Zanella wrote:
>
>
>> On 16 Jun 2022, at 15:30, Fangrui Song <maskray@google.com> wrote:
>>
>> On 2022-06-16, Adhemerval Zanella wrote:
>>>
>>>
>>>> On 16 Jun 2022, at 00:15, Fangrui Song <maskray@google.com> wrote:
>>>>
>>>> On 2022-06-10, Adhemerval Zanella via Libc-alpha wrote:
>>>>> By adding an internal hidden_def alias to avoid the GOT indirection.
>>>>> On some architecture, __libc_single_thread may be accessed through
>>>>> copy relocations and thus it requires to update both copies.
>>>>>
>>>>> To obtain the correct address of the __libc_single_thread,
>>>>> __libc_dlsym is extended to support RTLD_DEFAULT. It searches
>>>>> through all scope instead of default local ones.
>>>>>
>>>>> Checked on x86_64-linux-gnu and i686-linux-gnu.
>>>>> ---
>>>>> elf/dl-libc.c | 20 ++++++++++++++++++--
>>>>> elf/libc_early_init.c | 9 +++++++++
>>>>> include/sys/single_threaded.h | 11 +++++++++++
>>>>> misc/single_threaded.c | 2 ++
>>>>> nptl/pthread_create.c | 6 +++++-
>>>>> 5 files changed, 45 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/elf/dl-libc.c b/elf/dl-libc.c
>>>>> index 266e068da6..e64f4b9910 100644
>>>>> --- a/elf/dl-libc.c
>>>>> +++ b/elf/dl-libc.c
>>>>> @@ -16,6 +16,7 @@
>>>>> License along with the GNU C Library; if not, see
>>>>> <https://www.gnu.org/licenses/>. */
>>>>>
>>>>> +#include <assert.h>
>>>>> #include <dlfcn.h>
>>>>> #include <stdlib.h>
>>>>> #include <ldsodefs.h>
>>>>> @@ -72,6 +73,7 @@ struct do_dlsym_args
>>>>> /* Arguments to do_dlsym. */
>>>>> struct link_map *map;
>>>>> const char *name;
>>>>> + const void *caller_dlsym;
>>>>>
>>>>> /* Return values of do_dlsym. */
>>>>> lookup_t loadbase;
>>>>> @@ -102,8 +104,21 @@ do_dlsym (void *ptr)
>>>>> {
>>>>> struct do_dlsym_args *args = (struct do_dlsym_args *) ptr;
>>>>> args->ref = NULL;
>>>>> - args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, args->map, &args->ref,
>>>>> -					 args->map->l_local_scope, NULL, 0,
>>>>> + struct link_map *match = args->map;
>>>>> + struct r_scope_elem **scope;
>>>>> + if (args->map == RTLD_DEFAULT)
>>>>> + {
>>>>> + ElfW(Addr) caller = (ElfW(Addr)) args->caller_dlsym;
>>>>> + match = _dl_find_dso_for_object (caller);
>>>>> + /* It is only used internally, so caller should be always recognized. */
>>>>> + assert (match != NULL);
>>>>> + scope = match->l_scope;
>>>>> + }
>>>>> + else
>>>>> + scope = args->map->l_local_scope;
>>>>> +
>>>>> + args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, match, &args->ref,
>>>>> +					 scope, NULL, 0,
>>>>> 					 DL_LOOKUP_RETURN_NEWEST, NULL);
>>>>> }
>>>>>
>>>>> @@ -182,6 +197,7 @@ __libc_dlsym (void *map, const char *name)
>>>>> struct do_dlsym_args args;
>>>>> args.map = map;
>>>>> args.name = name;
>>>>> + args.caller_dlsym = RETURN_ADDRESS (0);
>>>>>
>>>>> #ifdef SHARED
>>>>> if (GLRO (dl_dlfcn_hook) != NULL)
>>>>> diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
>>>>> index 3c4a19cf6b..7cc2997122 100644
>>>>> --- a/elf/libc_early_init.c
>>>>> +++ b/elf/libc_early_init.c
>>>>> @@ -16,7 +16,9 @@
>>>>> License along with the GNU C Library; if not, see
>>>>> <https://www.gnu.org/licenses/>. */
>>>>>
>>>>> +#include <assert.h>
>>>>> #include <ctype.h>
>>>>> +#include <dlfcn.h>
>>>>> #include <elision-conf.h>
>>>>> #include <libc-early-init.h>
>>>>> #include <libc-internal.h>
>>>>> @@ -38,6 +40,13 @@ __libc_early_init (_Bool initial)
>>>>> __libc_single_threaded = initial;
>>>>>
>>>>> #ifdef SHARED
>>>>> + /* __libc_single_threaded can be accessed through copy relocations, so it
>>>>> + requires to update the external copy. */
>>>>> + __libc_external_single_threaded = __libc_dlsym (RTLD_DEFAULT,
>>>>> +						 "__libc_single_threaded");
>>>>> + assert (__libc_external_single_threaded != NULL);
>>>>> + *__libc_external_single_threaded = initial;
>>>>> +
>>>>> __libc_initial = initial;
>>>>> #endif
>>>>
>>>> I think this whole scheme can be greatly simplified.
>>>>
>>>> Under the hood,
>>>>
>>>> extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __asm__("" "__libc_single_threaded"); extern __typeof (__libc_single_threaded) __EI___libc_single_threaded __attribute__((alias ("" "__GI___libc_single_threaded"))) __attribute__ ((__copy__ (__libc_single_threaded)));
>>>> * __libc_single_threaded is the STV_HIDDEN symbol with the asm name "__GI___libc_single_threaded"
>>>> * __EI___libc_single_threaded is the STV_DEFAULT symbol with the asm name "__libc_single_threaded". It aliases __libc_single_threaded.
>>>
>>> In fact libc_hidden_proto for SHARED will result in:
>>>
>>> extern __typeof (__libc_single_threaded) __libc_single_threaded __asm__ ("" "__GI___libc_single_threaded") __attribute__ ((visibility ("hidden")));;
>>>
>>> So using __libc_single_threaded will be the most simple way.
>>
>> The code tries to read the initial value of the copy relocated __libc_single_threaded.
>> Accessing the STV_DEFAULT symbol does the trick and is simpler than __libc_dlsym.
>> (Accessing the STV_HIDDEN symbol just accesses libc.so's own copy.)
>
>The glibc will still continue to use the __libc_single_threaded value internally, the
>__libc_dlsym is used to initialize __libc_external_single_threaded which is used to
>update the external one solely to avoid update not being visible on architectures that
>use copy relocations.

Yes, I know.
My point is that to access the possibly external definition (in the
presence of a copy relocation), we can access the STV_DEFAULT symbol,
instead of using __libc_dlsym.

The compiler generates a GOT-generating relocation in -fPIC mode and the
linker will create a GLOB_DAT dynamic relocation because the symbol is
preemptible/interposable (see
https://maskray.me/blog/2020-11-15-explain-gnu-linker-options#bsymbolic-and---dynamic-list
how a symbol is considered preemptible). ld.so resolves GLOB_DAT to the
copy relocated definition in the executable (if exists).

>This is actually not needed on some architectures, aarch64 for instance, but without
>it x86 does not see the __libc_single_thread being updated.

__libc_single_thread is in a public header sys/single_threaded.h and is
used by libstdc++ ext/atomicity.h.
A user program compiled with -fno-pic will typically access __libc_single_thread 
with an absolute or PC-relative relocation. The linker will resolve this relocation
to a copy relocation if __libc_single_thread is defined in libc.so.6.

aarch64 -fno-pic uses PC-relative relocations. I think some variants of
mips avoid -fno-pic code generation which may lead to copy relocation.

clang -fno-direct-access-external-data and gcc -mno-direct-extern-access
avoids absolute/PC-relative relocation in -fno-pic/-fpie mode, but the
options are by no means popular.

>>
>> Note: The copy relocated __libc_single_threaded likely has a value of zero and is read-only from the executable.
>> I assume that this is a conservative and safe value.
>>
>
>It can not be a read-only value because it should be updated by pthread_create.
>
I mean that the executable does not write to __libc_single_threaded.
Its write to __libc_single_threaded will not be visible to libc.so.6.
If libc.so.6 decides to write the STV_HIDDEN __libc_single_threaded,
it needs to write the STV_DEFAULT __libc_single_threaded to update the
possibly copy relocated definition.
  
Wilco Dijkstra June 18, 2022, 1:20 p.m. UTC | #6
Hi,

So long story short, what happens is that the magic macros effectively emit both global
and hidden versions of the same symbol which can be accessed without using dl_sym.

>>>> I think this whole scheme can be greatly simplified.

Yes, I would suggest to keep it simple and just explicitly add a separate hidden symbol
for use inside GLIBC (eg. __libc_single_thread_internal).

Cheers,
Wilco
  
Florian Weimer June 20, 2022, 8:37 a.m. UTC | #7
* Adhemerval Zanella via Libc-alpha:

> diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
> index 3c4a19cf6b..7cc2997122 100644
> --- a/elf/libc_early_init.c
> +++ b/elf/libc_early_init.c
> @@ -16,7 +16,9 @@
>     License along with the GNU C Library; if not, see
>     <https://www.gnu.org/licenses/>.  */
>  
> +#include <assert.h>
>  #include <ctype.h>
> +#include <dlfcn.h>
>  #include <elision-conf.h>
>  #include <libc-early-init.h>
>  #include <libc-internal.h>
> @@ -38,6 +40,13 @@ __libc_early_init (_Bool initial)
>    __libc_single_threaded = initial;
>  
>  #ifdef SHARED
> +  /* __libc_single_threaded can be accessed through copy relocations, so it
> +     requires to update the external copy.  */
> +  __libc_external_single_threaded = __libc_dlsym (RTLD_DEFAULT,
> +						  "__libc_single_threaded");
> +  assert (__libc_external_single_threaded != NULL);
> +  *__libc_external_single_threaded = initial;
> +
>    __libc_initial = initial;
>  #endif

I wrote this before, but:

There is no need to cache the pointer because we need it at most once.
We can just do the lookup in pthread_create.

RTLD_DEFAULT is wrong here because we need the scope of the main
program, where the copy relocations are, so:

  __libc_dlsym (GL (dl_ns)[LM_ID_BASE]._ns_loaded, "__libc_single_threaded");

and no __libc_dlsym changes are needed.

Thanks,
Florian
  

Patch

diff --git a/elf/dl-libc.c b/elf/dl-libc.c
index 266e068da6..e64f4b9910 100644
--- a/elf/dl-libc.c
+++ b/elf/dl-libc.c
@@ -16,6 +16,7 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <assert.h>
 #include <dlfcn.h>
 #include <stdlib.h>
 #include <ldsodefs.h>
@@ -72,6 +73,7 @@  struct do_dlsym_args
   /* Arguments to do_dlsym.  */
   struct link_map *map;
   const char *name;
+  const void *caller_dlsym;
 
   /* Return values of do_dlsym.  */
   lookup_t loadbase;
@@ -102,8 +104,21 @@  do_dlsym (void *ptr)
 {
   struct do_dlsym_args *args = (struct do_dlsym_args *) ptr;
   args->ref = NULL;
-  args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, args->map, &args->ref,
-					     args->map->l_local_scope, NULL, 0,
+  struct link_map *match = args->map;
+  struct r_scope_elem **scope;
+  if (args->map == RTLD_DEFAULT)
+    {
+      ElfW(Addr) caller = (ElfW(Addr)) args->caller_dlsym;
+      match = _dl_find_dso_for_object (caller);
+      /* It is only used internally, so caller should be always recognized.  */
+      assert (match != NULL);
+      scope = match->l_scope;
+    }
+  else
+    scope = args->map->l_local_scope;
+
+  args->loadbase = GLRO(dl_lookup_symbol_x) (args->name, match, &args->ref,
+					     scope, NULL, 0,
 					     DL_LOOKUP_RETURN_NEWEST, NULL);
 }
 
@@ -182,6 +197,7 @@  __libc_dlsym (void *map, const char *name)
   struct do_dlsym_args args;
   args.map = map;
   args.name = name;
+  args.caller_dlsym = RETURN_ADDRESS (0);
 
 #ifdef SHARED
   if (GLRO (dl_dlfcn_hook) != NULL)
diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
index 3c4a19cf6b..7cc2997122 100644
--- a/elf/libc_early_init.c
+++ b/elf/libc_early_init.c
@@ -16,7 +16,9 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
+#include <assert.h>
 #include <ctype.h>
+#include <dlfcn.h>
 #include <elision-conf.h>
 #include <libc-early-init.h>
 #include <libc-internal.h>
@@ -38,6 +40,13 @@  __libc_early_init (_Bool initial)
   __libc_single_threaded = initial;
 
 #ifdef SHARED
+  /* __libc_single_threaded can be accessed through copy relocations, so it
+     requires to update the external copy.  */
+  __libc_external_single_threaded = __libc_dlsym (RTLD_DEFAULT,
+						  "__libc_single_threaded");
+  assert (__libc_external_single_threaded != NULL);
+  *__libc_external_single_threaded = initial;
+
   __libc_initial = initial;
 #endif
 
diff --git a/include/sys/single_threaded.h b/include/sys/single_threaded.h
index 18f6972482..258b01e0b2 100644
--- a/include/sys/single_threaded.h
+++ b/include/sys/single_threaded.h
@@ -1 +1,12 @@ 
 #include <misc/sys/single_threaded.h>
+
+#ifndef _ISOMAC
+
+libc_hidden_proto (__libc_single_threaded);
+
+# ifdef SHARED
+extern __typeof (__libc_single_threaded) *__libc_external_single_threaded
+  attribute_hidden;
+# endif
+
+#endif
diff --git a/misc/single_threaded.c b/misc/single_threaded.c
index 96ada9137b..201d86a273 100644
--- a/misc/single_threaded.c
+++ b/misc/single_threaded.c
@@ -22,6 +22,8 @@ 
    __libc_early_init (as false for inner libcs).  */
 #ifdef SHARED
 char __libc_single_threaded;
+__typeof (__libc_single_threaded) *__libc_external_single_threaded;
 #else
 char __libc_single_threaded = 1;
 #endif
+libc_hidden_data_def (__libc_single_threaded)
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index e7a099acb7..5633d01c62 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -627,7 +627,11 @@  __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
   if (__libc_single_threaded)
     {
       late_init ();
-      __libc_single_threaded = 0;
+      __libc_single_threaded =
+#ifdef SHARED
+        *__libc_external_single_threaded =
+#endif
+	0;
     }
 
   const struct pthread_attr *iattr = (struct pthread_attr *) attr;