Patchwork Reserve static TLS for dynamically loaded initial-exec TLS only [BZ #25051]

login
register
mail settings
Submitter Szabolcs Nagy
Date Jan. 7, 2020, 11:54 a.m.
Message ID <44eaccc2-f760-88c0-989a-e413e328b051@arm.com>
Download mbox | patch
Permalink /patch/37240/
State New
Headers show

Comments

Szabolcs Nagy - Jan. 7, 2020, 11:54 a.m.
i forgot about this, i guess it's too late for 2.31.
please comment if you disagree with the approach.
Florian Weimer - Jan. 7, 2020, 12:09 p.m.
* Szabolcs Nagy:

> This patch reserves 128 bytes of the surplus TLS that is not used
> opportunistically. TLS_STATIC_SURPLUS is currently 1664, so this still
> allows 1536 bytes for opportunistic use. A new test is added to verify
> this ABI contract: dynamic loading of libraries with initial-exec TLS
> is supported up to 128 bytes in total on all targets. This should be
> enough for system libraries such as libgomp.

I'm not sure if it's enough for loading another libc.so.6 via dlmopen.
Have you tested this, by chance?

Thanks,
Florian
Szabolcs Nagy - Jan. 7, 2020, 12:50 p.m.
On 07/01/2020 12:09, Florian Weimer wrote:
> * Szabolcs Nagy:

> 

>> This patch reserves 128 bytes of the surplus TLS that is not used

>> opportunistically. TLS_STATIC_SURPLUS is currently 1664, so this still

>> allows 1536 bytes for opportunistic use. A new test is added to verify

>> this ABI contract: dynamic loading of libraries with initial-exec TLS

>> is supported up to 128 bytes in total on all targets. This should be

>> enough for system libraries such as libgomp.

> 

> I'm not sure if it's enough for loading another libc.so.6 via dlmopen.

> Have you tested this, by chance?


i haven't tested, but that wont work reliably with
this patch.

libc.so on aarch64 has 144 byte TLS (8byte alignmed),
so the reserved 128byte surplus TLS is not enough
(can be increased to 144 though).

however if a lib with ie TLS is loaded before the static
TLS runs out then that works: the 128byte reserve will be
kept available until the 'opportunistic' part of TLS runs
out and then the reserve can only be used for ie TLS.
(so early dlmopen of libc.so.6 and later dlopen of libgomp
works)

this may not be an ideal solution: if all the ie TLS libs
are loaded early then reserving 128 byte at the end is not
that useful.

but using dlmopen to load multiple instances of libc and
libgomp and libopengl etc will not work reliably because
we can't keep enough static TLS reserve for that: if that's
the preferred behaviour then glibc should not use static
TLS opportunistically for TLSDESC and ppc TLS opt.

Patch

From c0ab4649026d19bda77818946e349369b12384ee Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date: Tue, 31 Dec 2019 16:01:41 +0000
Subject: [PATCH] Reserve static TLS for dynamically loaded initial-exec TLS
 only [BZ #25051]

On some targets static TLS surplus area can be used opportunistically
for dynamically loaded modules such that the TLS access then becomes
faster (TLSDESC and powerpc TLS optimization). However we don't want
all surplus TLS to be used for this optimization because dynamically
loaded modules with initial-exec TLS can only use surplus TLS.

This patch reserves 128 bytes of the surplus TLS that is not used
opportunistically. TLS_STATIC_SURPLUS is currently 1664, so this still
allows 1536 bytes for opportunistic use. A new test is added to verify
this ABI contract: dynamic loading of libraries with initial-exec TLS
is supported up to 128 bytes in total on all targets. This should be
enough for system libraries such as libgomp.

Discussed at https://sourceware.org/ml/libc-alpha/2019-09/msg00533.html

Tested on aarch64-linux-gnu and x86_64-linux-gnu.
---

 elf/Makefile          | 17 +++++++-
 elf/dl-reloc.c        | 17 ++++----
 elf/dynamic-link.h    | 10 ++++-
 elf/tst-tls-ie-mod.h  | 40 ++++++++++++++++++
 elf/tst-tls-ie-mod0.c |  4 ++
 elf/tst-tls-ie-mod1.c |  4 ++
 elf/tst-tls-ie-mod2.c |  4 ++
 elf/tst-tls-ie-mod3.c |  4 ++
 elf/tst-tls-ie-mod4.c |  4 ++
 elf/tst-tls-ie-mod5.c |  4 ++
 elf/tst-tls-ie.c      | 98 +++++++++++++++++++++++++++++++++++++++++++
 11 files changed, 194 insertions(+), 12 deletions(-)
 create mode 100644 elf/tst-tls-ie-mod.h
 create mode 100644 elf/tst-tls-ie-mod0.c
 create mode 100644 elf/tst-tls-ie-mod1.c
 create mode 100644 elf/tst-tls-ie-mod2.c
 create mode 100644 elf/tst-tls-ie-mod3.c
 create mode 100644 elf/tst-tls-ie-mod4.c
 create mode 100644 elf/tst-tls-ie-mod5.c
 create mode 100644 elf/tst-tls-ie.c

diff --git a/elf/Makefile b/elf/Makefile

index f861126b2f..6ec23ac874 100644

--- a/elf/Makefile

+++ b/elf/Makefile

@@ -194,17 +194,18 @@  tests += restest1 preloadtest loadfail multiload origtest resolvfail \

 	 tst-nodelete tst-dlopen-nodelete-reloc) \
 	 tst-initorder tst-initorder2 tst-relsort1 tst-null-argv \
 	 tst-tlsalign tst-tlsalign-extern tst-nodelete-opened \
 	 tst-nodelete2 tst-audit11 tst-audit12 tst-dlsym-error tst-noload \
 	 tst-latepthread tst-tls-manydynamic tst-nodelete-dlclose \
 	 tst-debug1 tst-main1 tst-absolute-sym tst-absolute-zero tst-big-note \
 	 tst-unwind-ctor tst-unwind-main tst-audit13 \
 	 tst-sonamemove-link tst-sonamemove-dlopen tst-dlopen-tlsmodid \
-	 tst-dlopen-self tst-auditmany tst-initfinilazyfail tst-dlopenfail

+	 tst-dlopen-self tst-auditmany tst-initfinilazyfail tst-dlopenfail \

+	 tst-tls-ie

 #	 reldep9
 tests-internal += loadtest unload unload2 circleload1 \
 	 neededtest neededtest2 neededtest3 neededtest4 \
 	 tst-tls3 tst-tls6 tst-tls7 tst-tls8 tst-dlmopen2 \
 	 tst-ptrguard1 tst-stackguard1 tst-libc_dlvsym \
 	 tst-create_format1
 tests-container += tst-pldd tst-dlopen-tlsmodid-container \
   tst-dlopen-self-container
@@ -304,17 +305,20 @@  modules-names = testobj1 testobj2 testobj3 testobj4 testobj5 testobj6 \

 		tst-main1mod tst-libc_dlvsym-dso tst-absolute-sym-lib \
 		tst-absolute-zero-lib tst-big-note-lib tst-unwind-ctor-lib \
 		tst-audit13mod1 tst-sonamemove-linkmod1 \
 		tst-sonamemove-runmod1 tst-sonamemove-runmod2 \
 		tst-auditmanymod1 tst-auditmanymod2 tst-auditmanymod3 \
 		tst-auditmanymod4 tst-auditmanymod5 tst-auditmanymod6 \
 		tst-auditmanymod7 tst-auditmanymod8 tst-auditmanymod9 \
 		tst-initlazyfailmod tst-finilazyfailmod \
-		tst-dlopenfailmod1 tst-dlopenfaillinkmod tst-dlopenfailmod2

+		tst-dlopenfailmod1 tst-dlopenfaillinkmod tst-dlopenfailmod2 \

+		tst-tls-ie-mod0 tst-tls-ie-mod1 tst-tls-ie-mod2 \

+		tst-tls-ie-mod3 tst-tls-ie-mod4 tst-tls-ie-mod5

+

 # Most modules build with _ISOMAC defined, but those filtered out
 # depend on internal headers.
 modules-names-tests = $(filter-out ifuncmod% tst-libc_dlvsym-dso tst-tlsmod%,\
 				   $(modules-names))
 
 ifeq (yes,$(have-mtls-dialect-gnu2))
 tests += tst-gnu2-tls1
 modules-names += tst-gnu2-tls1mod
@@ -1683,8 +1687,17 @@  $(objpfx)tst-dlopen-nodelete-reloc-mod15.so: \

 tst-dlopen-nodelete-reloc-mod16.so-no-z-defs = yes
 $(objpfx)tst-dlopen-nodelete-reloc-mod16.so: \
   $(objpfx)tst-dlopen-nodelete-reloc-mod15.so
 LDFLAGS-tst-dlopen-nodelete-reloc-mod16.so = -Wl,--no-as-needed
 $(objpfx)tst-dlopen-nodelete-reloc-mod17.so: \
   $(objpfx)tst-dlopen-nodelete-reloc-mod15.so \
   $(objpfx)tst-dlopen-nodelete-reloc-mod16.so
 LDFLAGS-tst-dlopen-nodelete-reloc-mod17.so = -Wl,--no-as-needed
+

+$(objpfx)tst-tls-ie: $(libdl) $(shared-thread-library)

+$(objpfx)tst-tls-ie.out: \

+  $(objpfx)tst-tls-ie-mod0.so \

+  $(objpfx)tst-tls-ie-mod1.so \

+  $(objpfx)tst-tls-ie-mod2.so \

+  $(objpfx)tst-tls-ie-mod3.so \

+  $(objpfx)tst-tls-ie-mod4.so \

+  $(objpfx)tst-tls-ie-mod5.so

diff --git a/elf/dl-reloc.c b/elf/dl-reloc.c

index 7f201fe184..fb284cc455 100644

--- a/elf/dl-reloc.c

+++ b/elf/dl-reloc.c

@@ -33,39 +33,39 @@ 

 # define bump_num_cache_relocations() ++GL(dl_num_cache_relocations)
 #else
 # define bump_num_cache_relocations() ((void) 0)
 #endif
 
 
 /* We are trying to perform a static TLS relocation in MAP, but it was
    dynamically loaded.  This can only work if there is enough surplus in
-   the static TLS area already allocated for each running thread.  If this

-   object's TLS segment is too big to fit, we fail.  If it fits,

-   we set MAP->l_tls_offset and return.

+   the static TLS area already allocated for each running thread. If this

+   object's TLS segment is too big to fit or less than RESERVED bytes

+   remain free, we fail.  If it fits, we set MAP->l_tls_offset and return.

    This function intentionally does not return any value but signals error
    directly, as static TLS should be rare and code handling it should
    not be inlined as much as possible.  */
 int
-_dl_try_allocate_static_tls (struct link_map *map)

+_dl_try_allocate_static_tls (struct link_map *map, size_t reserved)

 {
   /* If we've already used the variable with dynamic access, or if the
      alignment requirements are too high, fail.  */
   if (map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET
       || map->l_tls_align > GL(dl_tls_static_align))
     {
     fail:
       return -1;
     }
 
 #if TLS_TCB_AT_TP
   size_t freebytes = GL(dl_tls_static_size) - GL(dl_tls_static_used);
-  if (freebytes < TLS_TCB_SIZE)

+  if (freebytes < TLS_TCB_SIZE + reserved)

     goto fail;
-  freebytes -= TLS_TCB_SIZE;

+  freebytes -= TLS_TCB_SIZE + reserved;

 
   size_t blsize = map->l_tls_blocksize + map->l_tls_firstbyte_offset;
   if (freebytes < blsize)
     goto fail;
 
   size_t n = (freebytes - blsize) / map->l_tls_align;
 
   size_t offset = GL(dl_tls_static_used) + (freebytes - n * map->l_tls_align
@@ -75,17 +75,18 @@  _dl_try_allocate_static_tls (struct link_map *map)

 #elif TLS_DTV_AT_TP
   /* dl_tls_static_used includes the TCB at the beginning.  */
   size_t offset = (ALIGN_UP(GL(dl_tls_static_used)
 			    - map->l_tls_firstbyte_offset,
 			    map->l_tls_align)
 		   + map->l_tls_firstbyte_offset);
   size_t used = offset + map->l_tls_blocksize;
 
-  if (used > GL(dl_tls_static_size))

+  if (GL(dl_tls_static_size) < reserved

+      || used > GL(dl_tls_static_size) - reserved)

     goto fail;
 
   map->l_tls_offset = offset;
   map->l_tls_firstbyte_offset = GL(dl_tls_static_used);
   GL(dl_tls_static_used) = used;
 #else
 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 #endif
@@ -110,17 +111,17 @@  _dl_try_allocate_static_tls (struct link_map *map)

   return 0;
 }
 
 void
 __attribute_noinline__
 _dl_allocate_static_tls (struct link_map *map)
 {
   if (map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET
-      || _dl_try_allocate_static_tls (map))

+      || _dl_try_allocate_static_tls (map, 0))

     {
       _dl_signal_error (0, map->l_name, NULL, N_("\
 cannot allocate memory in static TLS block"));
     }
 }
 
 /* Initialize static TLS area and DTV for current (only) thread.
    libpthread implementations should provide their own hook
diff --git a/elf/dynamic-link.h b/elf/dynamic-link.h

index 5d9ef492ac..013a859ca3 100644

--- a/elf/dynamic-link.h

+++ b/elf/dynamic-link.h

@@ -35,19 +35,25 @@ 

       if (!HAVE_STATIC_TLS (map, sym_map))				\
 	_dl_allocate_static_tls (sym_map);				\
     } while (0)
 
 #define TRY_STATIC_TLS(map, sym_map)					\
     (__builtin_expect ((sym_map)->l_tls_offset				\
 		       != FORCED_DYNAMIC_TLS_OFFSET, 1)			\
      && (__builtin_expect ((sym_map)->l_tls_offset != NO_TLS_OFFSET, 1)	\
-	 || _dl_try_allocate_static_tls (sym_map) == 0))

+	 || _dl_try_allocate_static_tls (sym_map, TLS_RESERVED_FOR_IE) == 0))

 
-int _dl_try_allocate_static_tls (struct link_map *map) attribute_hidden;

+/* When the static TLS surplus area is opportunistically used, e.g. by

+   TRY_STATIC_TLS, reserve at least this much for dynamically loaded

+   modules with initial-exec TLS which can only use the surplus TLS.  */

+#define TLS_RESERVED_FOR_IE 128

+

+int _dl_try_allocate_static_tls (struct link_map *map, size_t reserved)

+  attribute_hidden;

 
 #include <elf.h>
 
 #ifdef RESOLVE_MAP
 /* We pass reloc_addr as a pointer to void, as opposed to a pointer to
    ElfW(Addr), because not all architectures can assume that the
    relocated address is properly aligned, whereas the compiler is
    entitled to assume that a pointer to a type is properly aligned for
diff --git a/elf/tst-tls-ie-mod.h b/elf/tst-tls-ie-mod.h

new file mode 100644
index 0000000000..46b362a9b7

--- /dev/null

+++ b/elf/tst-tls-ie-mod.h

@@ -0,0 +1,40 @@ 

+/* Module with specified TLS size and model.

+   Copyright (C) 2020 Free Software Foundation, Inc.

+   This file is part of the GNU C Library.

+

+   The GNU C Library is free software; you can redistribute it and/or

+   modify it under the terms of the GNU Lesser General Public

+   License as published by the Free Software Foundation; either

+   version 2.1 of the License, or (at your option) any later version.

+

+   The GNU C Library is distributed in the hope that it will be useful,

+   but WITHOUT ANY WARRANTY; without even the implied warranty of

+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

+   Lesser General Public License for more details.

+

+   You should have received a copy of the GNU Lesser General Public

+   License along with the GNU C Library; if not, see

+   <https://www.gnu.org/licenses/>.  */

+

+/* This file is parameterized by macros N, SIZE and MODEL.  */

+

+#include <stdio.h>

+#include <string.h>

+

+#define CONCATX(x, y) x ## y

+#define CONCAT(x, y) CONCATX (x, y)

+#define STRX(x) #x

+#define STR(x) STRX (x)

+

+#define VAR CONCAT (var, N)

+

+__attribute__ ((aligned (8), tls_model (MODEL)))

+__thread char VAR[SIZE];

+

+void

+CONCAT (access, N) (void)

+{

+  printf (STR (VAR) "[%d]:\t %p .. %p " MODEL "\n", SIZE, VAR, VAR + SIZE);

+  fflush (stdout);

+  memset (VAR, 1, SIZE);

+}

diff --git a/elf/tst-tls-ie-mod0.c b/elf/tst-tls-ie-mod0.c

new file mode 100644
index 0000000000..a822af1b3c

--- /dev/null

+++ b/elf/tst-tls-ie-mod0.c

@@ -0,0 +1,4 @@ 

+#define N 0

+#define SIZE 1520

+#define MODEL "global-dynamic"

+#include "tst-tls-ie-mod.h"

diff --git a/elf/tst-tls-ie-mod1.c b/elf/tst-tls-ie-mod1.c

new file mode 100644
index 0000000000..849ff91e53

--- /dev/null

+++ b/elf/tst-tls-ie-mod1.c

@@ -0,0 +1,4 @@ 

+#define N 1

+#define SIZE 120

+#define MODEL "global-dynamic"

+#include "tst-tls-ie-mod.h"

diff --git a/elf/tst-tls-ie-mod2.c b/elf/tst-tls-ie-mod2.c

new file mode 100644
index 0000000000..70f8e81e05

--- /dev/null

+++ b/elf/tst-tls-ie-mod2.c

@@ -0,0 +1,4 @@ 

+#define N 2

+#define SIZE 48

+#define MODEL "global-dynamic"

+#include "tst-tls-ie-mod.h"

diff --git a/elf/tst-tls-ie-mod3.c b/elf/tst-tls-ie-mod3.c

new file mode 100644
index 0000000000..5395f844a5

--- /dev/null

+++ b/elf/tst-tls-ie-mod3.c

@@ -0,0 +1,4 @@ 

+#define N 3

+#define SIZE 16

+#define MODEL "global-dynamic"

+#include "tst-tls-ie-mod.h"

diff --git a/elf/tst-tls-ie-mod4.c b/elf/tst-tls-ie-mod4.c

new file mode 100644
index 0000000000..d6a1998d6d

--- /dev/null

+++ b/elf/tst-tls-ie-mod4.c

@@ -0,0 +1,4 @@ 

+#define N 4

+#define SIZE 120

+#define MODEL "initial-exec"

+#include "tst-tls-ie-mod.h"

diff --git a/elf/tst-tls-ie-mod5.c b/elf/tst-tls-ie-mod5.c

new file mode 100644
index 0000000000..3bb4dbcbfb

--- /dev/null

+++ b/elf/tst-tls-ie-mod5.c

@@ -0,0 +1,4 @@ 

+#define N 5

+#define SIZE 8

+#define MODEL "initial-exec"

+#include "tst-tls-ie-mod.h"

diff --git a/elf/tst-tls-ie.c b/elf/tst-tls-ie.c

new file mode 100644
index 0000000000..d17f1fc59e

--- /dev/null

+++ b/elf/tst-tls-ie.c

@@ -0,0 +1,98 @@ 

+/* Test dlopen of modules with initial-exec TLS.

+   Copyright (C) 2016-2020 Free Software Foundation, Inc.

+   This file is part of the GNU C Library.

+

+   The GNU C Library is free software; you can redistribute it and/or

+   modify it under the terms of the GNU Lesser General Public

+   License as published by the Free Software Foundation; either

+   version 2.1 of the License, or (at your option) any later version.

+

+   The GNU C Library is distributed in the hope that it will be useful,

+   but WITHOUT ANY WARRANTY; without even the implied warranty of

+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

+   Lesser General Public License for more details.

+

+   You should have received a copy of the GNU Lesser General Public

+   License along with the GNU C Library; if not, see

+   <https://www.gnu.org/licenses/>.  */

+

+/* This test tries to ensure that at least 128 byte surplus TLS is

+   available for dlopening modules with initial-exec TLS.  */

+

+#include <errno.h>

+#include <pthread.h>

+#include <stdio.h>

+#include <stdlib.h>

+#include <string.h>

+

+static int do_test (void);

+#include <support/xthread.h>

+#include <support/xdlfcn.h>

+#include <support/test-driver.c>

+

+/* Have some big TLS in the main exe: should not use surplus TLS.  */

+__thread char maintls[1000];

+

+static pthread_barrier_t barrier;

+

+/* Forces multi-threaded behaviour.  */

+static void *

+blocked_thread_func (void *closure)

+{

+  xpthread_barrier_wait (&barrier);

+  /* TLS load and access tests run here in the main thread.  */

+  xpthread_barrier_wait (&barrier);

+  return NULL;

+}

+

+static void *

+load_and_access (const char *mod, const char *func)

+{

+  /* Load module with TLS.  */

+  void *p = xdlopen (mod, RTLD_NOW);

+  /* Access the TLS variable to ensure it is allocated.  */

+  void (*f) (void) = (void (*) (void))xdlsym (p, func);

+  f ();

+  return p;

+}

+

+static int

+do_test (void)

+{

+  void *mods[6];

+

+  {

+    int ret = pthread_barrier_init (&barrier, NULL, 2);

+    if (ret != 0)

+      {

+        errno = ret;

+        printf ("error: pthread_barrier_init: %m\n");

+        exit (1);

+      }

+  }

+

+  pthread_t blocked_thread = xpthread_create (NULL, blocked_thread_func, NULL);

+  xpthread_barrier_wait (&barrier);

+

+  printf ("maintls[%zu]:\t %p .. %p\n",

+	   sizeof maintls, maintls, maintls + sizeof maintls);

+  memset (maintls, 1, sizeof maintls);

+

+  /* Load modules with dynamic TLS (may use surplus TLS opportunistically).  */

+  mods[0] = load_and_access ("tst-tls-ie-mod0.so", "access0");

+  mods[1] = load_and_access ("tst-tls-ie-mod1.so", "access1");

+  mods[2] = load_and_access ("tst-tls-ie-mod2.so", "access2");

+  mods[3] = load_and_access ("tst-tls-ie-mod3.so", "access3");

+  /* Load modules with initial-exec TLS (can only use surplus TLS).  */

+  mods[4] = load_and_access ("tst-tls-ie-mod4.so", "access4");

+  mods[5] = load_and_access ("tst-tls-ie-mod5.so", "access5");

+

+  xpthread_barrier_wait (&barrier);

+  xpthread_join (blocked_thread);

+

+  /* Close the modules.  */

+  for (int i = 0; i < 6; ++i)

+    xdlclose (mods[i]);

+

+  return 0;

+}

-- 

2.17.1