[v14,3/9] Add generic 'extra TLS'

Message ID 20241121190924.837446-4-mjeanson@efficios.com
State Under Review
Delegated to: Florian Weimer
Headers
Series Add rseq extensible ABI support |

Checks

Context Check Description
redhat-pt-bot/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Michael Jeanson Nov. 21, 2024, 7:08 p.m. UTC
  Add the logic to append an 'extra TLS' block in the TLS block allocator
with a generic stub implementation. The duplicated code in
'csu/libc-tls.c' and 'elf/dl-tls.c' is to handle both statically linked
applications and the ELF dynamic loader.

Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
---
Changes since v13:
- Improve commit message
---
 csu/libc-tls.c                 | 59 ++++++++++++++++++++++++++++------
 elf/dl-tls.c                   | 59 ++++++++++++++++++++++++++++++++++
 sysdeps/generic/dl-extra_tls.h | 45 ++++++++++++++++++++++++++
 3 files changed, 154 insertions(+), 9 deletions(-)
 create mode 100644 sysdeps/generic/dl-extra_tls.h
  

Patch

diff --git a/csu/libc-tls.c b/csu/libc-tls.c
index b7682bdf43..03a000c499 100644
--- a/csu/libc-tls.c
+++ b/csu/libc-tls.c
@@ -26,6 +26,7 @@ 
 #include <array_length.h>
 #include <pthreadP.h>
 #include <dl-call_tls_init_tp.h>
+#include <dl-extra_tls.h>
 
 #ifdef SHARED
  #error makefile bug, this file is for static only
@@ -110,6 +111,7 @@  __libc_setup_tls (void)
   size_t filesz = 0;
   void *initimage = NULL;
   size_t align = 0;
+  size_t tls_blocks_size = 0;
   size_t max_align = TCB_ALIGNMENT;
   size_t tcb_offset;
   const ElfW(Phdr) *phdr;
@@ -135,22 +137,67 @@  __libc_setup_tls (void)
   /* Calculate the size of the static TLS surplus, with 0 auditors.  */
   _dl_tls_static_surplus_init (0);
 
+  /* Extra TLS block for internal usage to append at the end of the TLS blocks
+     (in allocation order). On Linux systems this is where the rseq area will
+     be allocated. On other systems it is currently unused and both values will
+     be '0'.  */
+  size_t extra_tls_size = _dl_extra_tls_get_size();
+  size_t extra_tls_align = _dl_extra_tls_get_align();
+
+  /* Increase the maximum alignment with the extra TLS alignment requirements
+     if necessary.  */
+  max_align = MAX (max_align, extra_tls_align);
+
   /* We have to set up the TCB block which also (possibly) contains
      'errno'.  Therefore we avoid 'malloc' which might touch 'errno'.
      Instead we use 'sbrk' which would only uses 'errno' if it fails.
      In this case we are right away out of memory and the user gets
      what she/he deserves.  */
 #if TLS_TCB_AT_TP
+  /* Before the the thread pointer, add the aligned tls block size and then
+     align the extra tls block on top.  */
+  tls_blocks_size = roundup (roundup (memsz, align ?: 1) + extra_tls_size,
+		  extra_tls_align);
+
+ /* Record the extra TLS block offset from the thread pointer.
+
+    With TLS_TCB_AT_TP the TLS blocks are allocated before the thread pointer
+    in reverse order.  Our block is added last which results in it being the
+    first in the static TLS block, thus record the most negative offset.
+
+    The alignment requirements of the pointer resulting from this offset and
+    the thread pointer are enforced by 'max_align' which is used to align the
+    tcb_offset.  */
+  _dl_extra_tls_set_offset(-tls_blocks_size);
+
   /* Align the TCB offset to the maximum alignment, as
      _dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign
      and dl_tls_static_align.  */
-  tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align);
+  tcb_offset = roundup (tls_blocks_size + GLRO(dl_tls_static_surplus), max_align);
   tlsblock = _dl_early_allocate (tcb_offset + TLS_INIT_TCB_SIZE + max_align);
   if (tlsblock == NULL)
     _startup_fatal_tls_error ();
 #elif TLS_DTV_AT_TP
+  /* Align memsz on top of the initial tcb.  */
   tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1);
-  tlsblock = _dl_early_allocate (tcb_offset + memsz + max_align
+
+  /* After the thread pointer, add the initial tcb plus the tls block size and
+     then align the extra TLS block on top.  */
+  tls_blocks_size = roundup (tcb_offset + memsz + extra_tls_size, extra_tls_align);
+
+ /* Record the extra TLS block offset from the thread pointer.
+
+    With TLS_DTV_AT_TP the TLS blocks are allocated after the thread pointer in
+    order. Our block is added last which results in it being the last in the
+    static TLS block, thus record the offset as the size of the static TLS
+    block minus the size of our block. The resulting offset will be positive.
+
+    The alignment requirements of the pointer resulting from this offset and
+    the thread pointer are enforced by 'max_align' which is used to align the
+    tcb_offset.  */
+  _dl_extra_tls_set_offset(tls_blocks_size - extra_tls_size);
+
+  tlsblock = _dl_early_allocate (tls_blocks_size + max_align
 				 + TLS_PRE_TCB_SIZE
 				 + GLRO(dl_tls_static_surplus));
   if (tlsblock == NULL)
@@ -209,11 +256,5 @@  __libc_setup_tls (void)
   /* static_slotinfo.slotinfo[1].gen = 0; -- Already zero.  */
   static_slotinfo.slotinfo[1].map = main_map;
 
-  memsz = roundup (memsz, align ?: 1);
-
-#if TLS_DTV_AT_TP
-  memsz += tcb_offset;
-#endif
-
-  init_static_tls (memsz, MAX (TCB_ALIGNMENT, max_align));
+  init_static_tls (tls_blocks_size, MAX (TCB_ALIGNMENT, max_align));
 }
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 3d529b722c..57e72be4f2 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -36,6 +36,8 @@ 
 #define TUNABLE_NAMESPACE rtld
 #include <dl-tunables.h>
 
+#include <dl-extra_tls.h>
+
 /* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
 
    - IE TLS in libc.so for all dlmopen namespaces except in the initial
@@ -323,6 +325,33 @@  _dl_determine_tlsoffset (void)
       slotinfo[cnt].map->l_tls_offset = off;
     }
 
+  /* Insert the extra TLS block after the last TLS block.  */
+
+  /* Extra TLS block for internal usage to append at the end of the TLS blocks
+     (in allocation order). On Linux systems this is where the rseq area will
+     be allocated. On other systems it is currently unused and both values will
+     be '0'.  */
+  size_t extra_tls_size = _dl_extra_tls_get_size();
+  size_t extra_tls_align = _dl_extra_tls_get_align();
+
+  /* Align and add the extra TLS block to the global offset.  */
+  offset = roundup (offset, extra_tls_align) + extra_tls_size;
+
+  /* Increase the maximum alignment with the extra TLS alignment requirements
+     if necessary.  */
+  max_align = MAX (max_align, extra_tls_align);
+
+ /* Record the extra TLS offset.
+
+    With TLS_TCB_AT_TP the TLS blocks are allocated before the thread pointer
+    in reverse order.  Our block is added last which results in it being the
+    first in the static TLS block, thus record the most negative offset.
+
+    The alignment requirements of the pointer resulting from this offset and
+    the thread pointer are enforced by 'max_align' which is used to align the
+    tcb_offset.  */
+  _dl_extra_tls_set_offset(-offset);
+
   GL(dl_tls_static_used) = offset;
   GLRO (dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
 					max_align)
@@ -368,6 +397,36 @@  _dl_determine_tlsoffset (void)
       offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
     }
 
+  /* Insert the extra TLS block after the last TLS block.  */
+
+  /* Extra TLS block for internal usage to append at the end of the TLS blocks
+     (in allocation order). On Linux systems this is where the rseq area will
+     be allocated. On other systems it is currently unused and both values will
+     be '0'.  */
+  size_t extra_tls_size = _dl_extra_tls_get_size();
+  size_t extra_tls_align = _dl_extra_tls_get_align();
+
+  /* Align the global offset to the beginning of the extra TLS block.  */
+  offset = roundup (offset, extra_tls_align);
+
+ /* Record the extra TLS offset.
+
+    With TLS_DTV_AT_TP the TLS blocks are allocated after the thread pointer in
+    order. Our block is added last which results in it being the last in the
+    static TLS block, thus record the offset as the size of the static TLS
+    block minus the size of our block. The resulting offset will be positive.
+
+    The alignment requirements of the pointer resulting from this offset and
+    the thread pointer are enforced by 'max_align' which is used to align the
+    tcb_offset.  */
+  _dl_extra_tls_set_offset(offset);
+
+  /* Add the extra TLS block to the global offset.  */
+  offset += extra_tls_size;
+
+  /* Increase the max_align if necessary.  */
+  max_align = MAX (max_align, extra_tls_align);
+
   GL(dl_tls_static_used) = offset;
   GLRO (dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
 				       TCB_ALIGNMENT);
diff --git a/sysdeps/generic/dl-extra_tls.h b/sysdeps/generic/dl-extra_tls.h
new file mode 100644
index 0000000000..c699e88e85
--- /dev/null
+++ b/sysdeps/generic/dl-extra_tls.h
@@ -0,0 +1,45 @@ 
+/* extra tls utils for the dynamic linker.  Generic stub version.
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_EXTRA_TLS_H
+#define _DL_EXTRA_TLS_H 1
+#include <stddef.h>
+
+/* In this generic version, the extra TLS block is unused.  */
+
+/* Returns the size of the extra TLS block.  */
+static inline size_t
+_dl_extra_tls_get_size (void)
+{
+	return 0;
+}
+
+/* Returns the alignment requirements of the extra TLS block.  */
+static inline size_t
+_dl_extra_tls_get_align (void)
+{
+	return 0;
+}
+
+/* Record the offset of the extra TLS block from the thread pointer.  */
+static inline void
+_dl_extra_tls_set_offset (ptrdiff_t tls_offset __attribute__ ((unused)))
+{
+}
+
+#endif