On 06/19/2017 12:18 PM, Florian Weimer wrote:
> This is a follow-up to bug 15128, where an unaligned stack was
> compensated for in the dynamic linker relocation trampoline.
> __tls_get_addr itself was not fixed, though, and the function can reach
> deeply into libc due to its malloc dependency, so can result crashes
> with an unaligned stack, too.
>
> The attached patch adds a compatibility implementation of __tls_get_addr
> which aligns the stack, but only on the slow path. Internal calls go to
> the default implementation (from elf/dl-tls.c) which does not perform
> stack realignment.
>
> I plan to submit a follow-up patch which adds a new symbol version for
> __tls_get_addr which bypasses the stack alignment for new binaries.
>
> In my patch, the CFI annotations need review. I have never written
> those before.
I went over the CFI annotations with Jakub. He suggested some
simplifications, incorporated in the attached patch. We don't need
.cfi_restore because the %rbp value at the specified offset is protected
by the red zone until the function returns.
Thanks,
Florian
x86-64: Align the stack in __tls_get_addr [BZ #21609]
This change forces realignment of the stack pointer in
__tls_get_addr, so that old GCC-compiled binaries continue to work
even if vector instructions are used in glibc which require the
ABI stack realignment.
The new assembler implementation of __tls_get_addr calls the default
implementation (from elf/dl-tls.c, now call __tls_get_addr_default)
after realigning the stack (but only does so on the slow path).
Internal calls go directly to __tls_get_addr_default because they do not
need stack realignment.
2017-06-20 Florian Weimer <fweimer@redhat.com>
[BZ #21609]
__tls_get_addr implementation with stack alignment for older GCC.
* sysdeps/x86_64/dl-tls.h: Add multiple inclusion guards.
* sysdeps/x86_64/tls_get_addr_compat.c: New file.
* sysdeps/x86_64/dl-tls.c: Likewise.
* sysdeps/x86_64/rtld-offets.h: Likewise.
* sysdeps/x86_64/dl-tlsdesc.h: Include <dl-tls.h>.
(tls_index): Remove definition.
* sysdeps/x86_64/tlsdesc.sym (TI_MODULE_OFFSET, TI_OFFSET_OFFSET):
Add.
* sysdeps/x86_64/Makefile (sysdep-dl-routines): Add
tls_get_addr_compat.
(gen-as-const-headers): Add rtld-offsets.sym.
@@ -27,7 +27,7 @@ ifeq ($(subdir),elf)
CFLAGS-.os += $(if $(filter $(@F),$(patsubst %,%.os,$(all-rtld-routines))),\
-mno-mmx)
-sysdep-dl-routines += tlsdesc dl-tlsdesc
+sysdep-dl-routines += tlsdesc dl-tlsdesc tls_get_addr_compat
tests += ifuncmain8
modules-names += ifuncmod8
@@ -120,5 +120,5 @@ endif
endif
ifeq ($(subdir),csu)
-gen-as-const-headers += tlsdesc.sym
+gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
endif
new file mode 100644
@@ -0,0 +1,42 @@
+/* Thread-local storage handling in the ELF dynamic linker. x86-64 version.
+ Copyright (C) 2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef SHARED
+/* Work around GCC PR58066, due to which __tls_get_addr may be called
+ with an unaligned stack. The compat implementation is in
+ tls_get_addr-compat.S. */
+
+# include <shlib-compat.h>
+# include <dl-tls.h>
+
+/* Define __tls_get_addr within elf/dl-tls.c under a different
+ name. */
+extern __typeof__ (__tls_get_addr) __tls_get_addr_default;
+
+# define __tls_get_addr __tls_get_addr_default
+# include <elf/dl-tls.c>
+
+# undef __tls_get_addr_default
+hidden_ver (__tls_get_addr_default, __tls_get_addr)
+
+#else
+
+/* No compatibility symbol needed. */
+# include <elf/dl-tls.c>
+
+#endif
@@ -16,6 +16,9 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#ifndef _X86_64_DL_TLS_H
+#define _X86_64_DL_TLS_H
+
#include <stdint.h>
/* Type used for the representation of TLS information in the GOT. */
@@ -27,3 +30,5 @@ typedef struct dl_tls_index
extern void *__tls_get_addr (tls_index *ti);
+
+#endif /* _X86_64_DL_TLS_H */
@@ -17,10 +17,11 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <stdint.h>
-
#ifndef _X86_64_DL_TLSDESC_H
-# define _X86_64_DL_TLSDESC_H 1
+#define _X86_64_DL_TLSDESC_H
+
+#include <stdint.h>
+#include <dl-tls.h>
/* Type used to represent a TLS descriptor in the GOT. */
struct tlsdesc
@@ -39,12 +40,6 @@ struct tlsdesc
};
};
-typedef struct dl_tls_index
-{
- uint64_t ti_module;
- uint64_t ti_offset;
-} tls_index;
-
/* Type used as the argument in a TLS descriptor for a symbol that
needs dynamic TLS offsets. */
struct tlsdesc_dynamic_arg
@@ -59,12 +54,12 @@ extern ptrdiff_t attribute_hidden
_dl_tlsdesc_resolve_rela(struct tlsdesc *on_rax),
_dl_tlsdesc_resolve_hold(struct tlsdesc *on_rax);
-# ifdef SHARED
+#ifdef SHARED
extern void *_dl_make_tlsdesc_dynamic (struct link_map *map,
size_t ti_offset)
internal_function attribute_hidden;
extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic(struct tlsdesc *);
-# endif
-
#endif
+
+#endif /* _X86_64_DL_TLSDESC_H */
new file mode 100644
@@ -0,0 +1,6 @@
+#define SHARED
+#include <ldsodefs.h>
+
+--
+
+GL_TLS_GENERATION_OFFSET offsetof (struct rtld_global, _dl_tls_generation)
new file mode 100644
@@ -0,0 +1,61 @@
+/* Stack-aligning implementation of __tls_get_addr.
+ Copyright (C) 2004-2017 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef SHARED
+
+# include <sysdep.h>
+# include "tlsdesc.h"
+# include "rtld-offsets.h"
+
+/* See __tls_get_addr in dl-tls.c. This compat function calls
+ the default __tls_get_addr implementation on both slow paths. It
+ realigns the stack before the call to work around GCC PR58066. */
+ .align 16
+__tls_get_addr_compat:
+ .type __tls_get_addr_compat,@function
+ .global __tls_get_addr_compat
+ strong_alias (__tls_get_addr_compat, __tls_get_addr)
+ cfi_startproc
+ movq %fs:DTV_OFFSET, %rdx
+ movq GL_TLS_GENERATION_OFFSET+_rtld_local(%rip), %rax
+ cmpq %rax, (%rdx) /* GL(dl_tls_generation) == dtv[0].counter */
+ jne 1f
+ movq TI_MODULE_OFFSET(%rdi), %rax
+ salq $4, %rax
+ movq (%rdx,%rax), %rax /* dtv[ti->ti_module] */
+ cmpq $-1, %rax
+ je 1f
+ addq TI_OFFSET_OFFSET(%rdi), %rax
+ ret
+1:
+ /* On the slow path, align the stack. */
+ pushq %rbp
+ cfi_def_cfa_offset (16)
+ cfi_offset (%rbp, -16)
+ movq %rsp, %rbp
+ cfi_def_cfa_register (%rbp)
+ andq $-16, %rsp
+ call HIDDEN_JUMPTARGET (__tls_get_addr)
+ movq %rbp, %rsp
+ popq %rbp
+ cfi_def_cfa (%rsp, 8)
+ ret
+ cfi_endproc
+ .size __tls_get_addr_compat, .-__tls_get_addr_compat
+
+#endif /* SHARED */
@@ -15,3 +15,6 @@ TLSDESC_ARG offsetof(struct tlsdesc, arg)
TLSDESC_GEN_COUNT offsetof(struct tlsdesc_dynamic_arg, gen_count)
TLSDESC_MODID offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
TLSDESC_MODOFF offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+
+TI_MODULE_OFFSET offsetof(tls_index, ti_module)
+TI_OFFSET_OFFSET offsetof(tls_index, ti_offset)