hurd: add TLS support

Message ID 20150207222951.GZ3023@type.youpi.perso.aquilenet.fr
State Superseded, archived
Headers

Commit Message

Samuel Thibault Feb. 7, 2015, 10:29 p.m. UTC
  2009-07-30  Samuel Thibault  <samuel.thibault@gnu.org>

	Add TLS support: __mach_setup_tls allocates and sets
	architecture state for the TLS area.  i386 fork needs to
	propagate the segment kernel definitions.

	* sysdeps/mach/hurd/bits/libc-lock.h [_LIBC - 0]: Include <tls.h>
	* sysdeps/mach/hurd/tls.h: Include <stdint.h> and <sysdep.h>
	* include/errno.h (__GNU__): Do not define TLS errno for now.

	* sysdeps/generic/thread_state.h (MACHINE_NEW_THREAD_STATE_FLAVOR): New
	macro.
	* sysdeps/mach/thread_state.h (MACHINE_THREAD_STATE_FIX_NEW): New macro.
	* sysdeps/mach/i386/thread_state.h (MACHINE_NEW_THREAD_STATE_FLAVOR):
	New macro, defined to i386_THREAD_STATE.
	(MACHINE_THREAD_STATE_FLAVOR): Define to i386_REGS_SEGS_STATE instead
	of i386_THREAD_STATE.
	(MACHINE_THREAD_STATE_FIX_NEW): New macro, reads segments.

	* sysdeps/mach/hurd/i386/trampoline.c (_hurd_setup_sighandler): Use
	i386_REGS_SEGS_STATE instead of i386_THREAD_STATE.

	* sysdeps/mach/hurd/i386/tls.h (_hurd_tls_init): Use kern_return_t
	error type. Use first GDT slot, 0x48.
	(_hurd_tls_fork): Use kern_return_t error type.  Duplicate existing LDT
	descriptor instead of creating a new one.
	(_hurd_tls_new): New function, creates a new descriptor and updates tcb.

	* mach/mach.h (__mach_setup_tls,mach_setup_tls): Add declarations.
	* mach/setup-thread.c: Include <ldsodefs.h>.
	(__mach_setup_thread): Use MACHINE_NEW_THREAD_STATE_FLAVOR instead of
	MACHINE_THREAD_STATE_FLAVOR.
	(__mach_setup_tls): New function.
	* hurd/hurdfault.c (_hurdsig_fault_init): Call
	MACHINE_THREAD_STATE_FIX_NEW.

	* sysdeps/mach/hurd/profil.c (update_waiter): Call __mach_setup_tls.
	* sysdeps/mach/hurd/setitimer.c (setitimer_locked): Call
	__mach_setup_tls.
	* hurd/hurdsig.c (_hurdsig_init): Call __mach_setup_tls.
	* sysdeps/mach/hurd/fork.c (__fork): Call _hurd_tls_fork for
	sigthread.  Pass kernel thread to _hurd_tls_fork.
	* sysdeps/mach/hurd/i386/init-first.c (init): Move ELF header parsing
	after getting up the environment pointer.  Call
	__pthread_initialize_minimal.
	* csu/libc-start.c (LIBC_START_MAIN) [__GNU__]: Do not call
	__pthread_initialize_minimal.

---
 csu/libc-start.c                    |  2 ++
 hurd/hurdfault.c                    |  2 ++
 hurd/hurdsig.c                      |  2 ++
 include/errno.h                     | 14 +++++-----
 mach/mach.h                         |  3 +++
 mach/setup-thread.c                 | 30 ++++++++++++++++++++-
 sysdeps/generic/thread_state.h      |  1 +
 sysdeps/mach/hurd/bits/libc-lock.h  |  3 +++
 sysdeps/mach/hurd/fork.c            |  7 ++++-
 sysdeps/mach/hurd/i386/init-first.c | 53 +++++++++++++++++++++++--------------
 sysdeps/mach/hurd/i386/tls.h        | 53 ++++++++++++++++++++++++++++++-------
 sysdeps/mach/hurd/i386/trampoline.c |  2 +-
 sysdeps/mach/hurd/profil.c          |  2 ++
 sysdeps/mach/hurd/setitimer.c       |  3 ++-
 sysdeps/mach/hurd/tls.h             |  2 ++
 sysdeps/mach/i386/thread_state.h    | 11 +++++++-
 sysdeps/mach/thread_state.h         |  3 +++
 17 files changed, 152 insertions(+), 41 deletions(-)
  

Comments

Samuel Thibault Feb. 7, 2015, 10:30 p.m. UTC | #1
Note: we also have the support for TLS-based per-thread glibc variables,
I will submit that later.

Samuel
  
Roland McGrath Feb. 8, 2015, 3:22 a.m. UTC | #2
> --- a/csu/libc-start.c
> +++ b/csu/libc-start.c
> @@ -189,10 +189,12 @@ LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
>    /* Perform IREL{,A} relocations.  */
>    apply_irel ();
>  
> +#ifndef __GNU__
>    /* Initialize the thread library at least a bit since the libgcc
>       functions are using thread functions if these are available and
>       we need to setup errno.  */
>    __pthread_initialize_minimal ();
> +#endif

This is not the way we do things.  Leaving aside why you are avoiding this
call, if you really need to then the way to do that is to add a
sysdeps/mach/hurd/libc-start.c that defines a static stub function and then
does #include <csu/libc-start.c>.

In that new file, you need some comments explaining why you are avoiding
the normal path to __libc_setup_tls.

> --- a/include/errno.h
> +++ b/include/errno.h

Same issue here.  You can add a sysdeps/mach/hurd/include/errno.h,
which can use #include_next.

> --- a/mach/mach.h
> +++ b/mach/mach.h
> @@ -100,5 +100,8 @@ kern_return_t mach_setup_thread (task_t task, thread_t thread, void *pc,
>  				 vm_address_t *stack_base,
>  				 vm_size_t *stack_size);
>  
> +/* Give THREAD a TLS area.  */
> +kern_return_t __mach_setup_tls (thread_t thread);
> +kern_return_t mach_setup_tls (thread_t thread);

This doesn't really seem like a useful function to have in the public API.
It really does nothing but fiddle with libc internals that don't have a
public API, and the only thing outside libc itself that would call this
would be a threads library.  But since you're not exporting it in any
Versions file, it really seems that it's purely internal.  So put it in an
internal header and give it attribute_hidden.

Conversely, is there really any reason not to just roll this into
mach_setup_thread?  You're calling it in all the internal places that use
__mach_setup_thread.  mach_setup_thread has an existing interface contract
described pretty precisely by the comment in <mach.h>, albeit if TLS
support is going to exist at all in a given process, mach_setup_thread also
implicitly doing TLS setup is probably more useful than not.  But if we are
actually worried that existing users of mach_setup_thread outside libc
itself would be perturbed by a new libc.so doing it, then we can add a new
symbol version for mach_setup_thread.

> +/* Give THREAD a TLS area.  */
> +kern_return_t
> +__mach_setup_tls (thread_t thread)
> +{
> +  kern_return_t error;
> +  struct machine_thread_state ts;
> +  mach_msg_type_number_t tssize = MACHINE_THREAD_STATE_COUNT;
> +  tcbhead_t *tcb;
> +
> +  if (error = __thread_get_state (thread, MACHINE_THREAD_STATE_FLAVOR,
> +			     (natural_t *) &ts, &tssize))
> +    return error;
> +  assert (tssize == MACHINE_THREAD_STATE_COUNT);
> +
> +  tcb = _dl_allocate_tls (NULL);
> +  if (!tcb)

No implicit Boolean coercion (use "tcb == NULL").

> +    return KERN_RESOURCE_SHORTAGE;

Why not do _dl_allocate_tls first at the very top of the function,
before any of the other variable declarations?  Then it's very clear,
and in the failure case you don't bother with __thread_get_state.

> +  _hurd_tls_new (thread, &ts, tcb);

Technically code in mach/ shouldn't be calling code in mach/hurd/ like
this.  What about the TLS details is actually Hurd-specific, anyway?
(But I won't really quibble about this separation, since it is useless
in practice.)

> --- a/sysdeps/generic/thread_state.h
> +++ b/sysdeps/generic/thread_state.h
> @@ -22,6 +22,7 @@
>  
>  /* Replace <machine> with "i386" or "mips" or whatever.  */
>  
> +#define MACHINE_NEW_THREAD_STATE_FLAVOR	<machine>_NEW_THREAD_STATE
>  #define MACHINE_THREAD_STATE_FLAVOR	<machine>_THREAD_STATE
>  #define MACHINE_THREAD_STATE_COUNT	<machine>_THREAD_STATE_COUNT

AFAIK there is no Mach convention of flavors named
<machine>_NEW_THREAD_STATE.  So you really need some comments here about
what MACHINE_NEW_THREAD_STATE_FLAVOR is for and how it ought to be used.

Having just looked at the kernel to see what the addition of
i386_REGS_SEGS_STATE was about, I am mystified.  It enforces a few
constraints on the segment registers, which, if not met, should already
have meant that the thread would trap as soon as it was run.
I don't get it.  If this is important to the uses in libc, then comments
in the libc code should make me get it.


> diff --git a/sysdeps/mach/hurd/bits/libc-lock.h b/sysdeps/mach/hurd/bits/libc-lock.h
> index 4ffb311..8bf5656 100644
> --- a/sysdeps/mach/hurd/bits/libc-lock.h
> +++ b/sysdeps/mach/hurd/bits/libc-lock.h
> @@ -20,6 +20,9 @@
>  #define _BITS_LIBC_LOCK_H 1
>  
>  #if (_LIBC - 0) || (_CTHREADS_ - 0)
> +#if (_LIBC - 0)
> +#include <tls.h>
> +#endif
>  #include <cthreads.h>
>  #include <hurd/threadvar.h>

Why does it need tls.h if you're not touching this file otherwise?
And "# include" inside "#if".

> diff --git a/sysdeps/mach/hurd/fork.c b/sysdeps/mach/hurd/fork.c
> index 321421f..f19cfc4 100644
> --- a/sysdeps/mach/hurd/fork.c
> +++ b/sysdeps/mach/hurd/fork.c
> @@ -528,6 +528,11 @@ __fork (void)
>  #endif
>        MACHINE_THREAD_STATE_SET_PC (&state,
>  				   (unsigned long int) _hurd_msgport_receive);
> +
> +      /* Do special thread setup for TLS if needed.  */
> +      if (err = _hurd_tls_fork (sigthread, _hurd_msgport_thread, &state))
> +	LOSE;

It's slightly confusing that this has exactly the same comment as the
call below.  Maybe it's only because I was reading the patch before I
went to re-read the whole context.  But it would be clearer if the
comment explicitly said this is doing TLS for the signal thread, while
the later one is for the main thread.

> diff --git a/sysdeps/mach/hurd/i386/init-first.c b/sysdeps/mach/hurd/i386/init-first.c
> index 8fb613b..74b3a56 100644
> --- a/sysdeps/mach/hurd/i386/init-first.c
> +++ b/sysdeps/mach/hurd/i386/init-first.c
> @@ -113,31 +113,11 @@ init1 (int argc, char *arg0, ...)
>       data block; the argument strings start there.  */
>    if ((void *) d == argv[0])
>      {
> -#ifndef SHARED
> -      /* With a new enough linker (binutils-2.23 or better),
> -         the magic __ehdr_start symbol will be available and
> -         __libc_start_main will have done this that way already.  */
> -      if (_dl_phdr == NULL)
> -        {
> -          /* We may need to see our own phdrs, e.g. for TLS setup.
> -             Try the usual kludge to find the headers without help from
> -             the exec server.  */
> -          extern const void __executable_start;
> -          const ElfW(Ehdr) *const ehdr = &__executable_start;
> -          _dl_phdr = (const void *) ehdr + ehdr->e_phoff;
> -          _dl_phnum = ehdr->e_phnum;
> -          assert (ehdr->e_phentsize == sizeof (ElfW(Phdr)));
> -        }
> -#endif
>        return;
>      }

Drop the braces around the single statement (return) in the if.

> @@ -193,6 +173,39 @@ init (int *data)
>      ++envp;
>    d = (void *) ++envp;
>  
> +#ifndef SHARED
> +  /* If we are the bootstrap task started by the kernel,
> +     then after the environment pointers there is no Hurd
> +     data block; the argument strings start there.  */
> +  if ((void *) d == argv[0])
> +    {
> +      /* With a new enough linker (binutils-2.23 or better),
> +	 the magic __ehdr_start symbol will be available and
> +	 __libc_start_main will have done this that way already.  */
> +      if (_dl_phdr == NULL)
> +        {
> +	  /* We may need to see our own phdrs, e.g. for TLS setup.
> +	     Try the usual kludge to find the headers without help from
> +	     the exec server.  */
> +	  extern const void __executable_start;
> +	  const ElfW(Ehdr) *const ehdr = &__executable_start;
> +	  _dl_phdr = (const void *) ehdr + ehdr->e_phoff;
> +	  _dl_phnum = ehdr->e_phnum;
> +	  assert (ehdr->e_phentsize == sizeof (ElfW(Phdr)));
> +        }
> +    }
> +  else
> +    {
> +      _dl_phdr = (ElfW(Phdr) *) d->phdr;
> +      _dl_phnum = d->phdrsz / sizeof (ElfW(Phdr));
> +      assert (d->phdrsz % sizeof (ElfW(Phdr)) == 0);
> +    }

I take it the reason for moving this from init1 to init is that the phdr
vars are needed by some TLS setup, which needs to happen earlier than
init1.  You should add a comment before this bit, saying why it's
important that it be done early.  It wouldn't hurt to put all the phdr
setup into a subroutine, and then you could just put that comment on the
call site.

Also, if you folks would be OK with requiring binutils-2.23 or better
for new Hurd builds of libc, then we could just drop all the nonsense
entirely and just have an assert, since _dl_phdr should already have
been set up in __libc_start_main.  (If that's not actually true, then
the comment here needs to be changed.)

> +  /* We need to setup TLS before starting sigthread */

Proper punctuation and two spaces after the sentence.
Say "the signal thread".

> +  extern void __pthread_initialize_minimal(void);

Space before paren.

> @@ -70,7 +70,7 @@ _hurd_tls_init (tcbhead_t *tcb, int secondcall)
>  
>        /* Get the first available selector.  */
>        int sel = -1;
> -      error_t err = __i386_set_gdt (tcb->self, &sel, desc);
> +      kern_return_t err = __i386_set_gdt (tcb->self, &sel, desc);

What have you got against error_t?  It's used for a superset of the
values kern_return_t can take.

> @@ -94,16 +94,16 @@ _hurd_tls_init (tcbhead_t *tcb, int secondcall)
>        /* Fetch the selector set by the first call.  */
>        int sel;
>        asm ("mov %%gs, %w0" : "=q" (sel) : "0" (0));
> -      if (__builtin_expect (sel, 0x50) & 4) /* LDT selector */
> +      if (__builtin_expect (sel, 0x48) & 4) /* LDT selector */

This change is quite meaningless in practice: (0x50&4) == (0x48&4).
But you should make it just __glibc_unlikely (sel & 4) anyway.
Do we not have some header file that defines macros for these bits.
The magic 4 is ugly.  At least make a local function/macro for the
multiple uses here: bool __i386_selector_is_ldt or whatever.

> --- a/sysdeps/mach/thread_state.h
> +++ b/sysdeps/mach/thread_state.h
> @@ -37,6 +37,9 @@
>    ((ts)->SP = (unsigned long int) (stack) + (size))
>  #endif
>  #endif
> +#ifndef MACHINE_THREAD_STATE_FIX_NEW
> +#define MACHINE_THREAD_STATE_FIX_NEW(ts)
> +#endif

"# define" inside #ifndef.  And provide a comment explaining what the
macro is required to do.


Thanks,
Roland
  

Patch

diff --git a/csu/libc-start.c b/csu/libc-start.c
index c898d06..46e5612 100644
--- a/csu/libc-start.c
+++ b/csu/libc-start.c
@@ -189,10 +189,12 @@  LIBC_START_MAIN (int (*main) (int, char **, char ** MAIN_AUXVEC_DECL),
   /* Perform IREL{,A} relocations.  */
   apply_irel ();
 
+#ifndef __GNU__
   /* Initialize the thread library at least a bit since the libgcc
      functions are using thread functions if these are available and
      we need to setup errno.  */
   __pthread_initialize_minimal ();
+#endif
 
   /* Set up the stack checker's canary.  */
   uintptr_t stack_chk_guard = _dl_setup_stack_chk_guard (_dl_random);
diff --git a/hurd/hurdfault.c b/hurd/hurdfault.c
index 1adaeb1..e290b86 100644
--- a/hurd/hurdfault.c
+++ b/hurd/hurdfault.c
@@ -204,6 +204,8 @@  _hurdsig_fault_init (void)
   /* This state will be restored when we fault.
      It runs the function above.  */
   memset (&state, 0, sizeof state);
+
+  MACHINE_THREAD_STATE_FIX_NEW (&state);
   MACHINE_THREAD_STATE_SET_PC (&state, faulted);
   MACHINE_THREAD_STATE_SET_SP (&state, faultstack, sizeof faultstack);
 
diff --git a/hurd/hurdsig.c b/hurd/hurdsig.c
index 558aa07..8e912db 100644
--- a/hurd/hurdsig.c
+++ b/hurd/hurdsig.c
@@ -1266,6 +1266,8 @@  _hurdsig_init (const int *intarray, size_t intarraysize)
 				 (vm_address_t *) &__hurd_sigthread_stack_base,
 				 &stacksize);
       assert_perror (err);
+      err = __mach_setup_tls (_hurd_msgport_thread);
+      assert_perror (err);
 
       __hurd_sigthread_stack_end = __hurd_sigthread_stack_base + stacksize;
       __hurd_sigthread_variables =
diff --git a/include/errno.h b/include/errno.h
index f1b93a8..8beb1b8 100644
--- a/include/errno.h
+++ b/include/errno.h
@@ -21,13 +21,15 @@  extern int rtld_errno attribute_hidden;
 
 #  include <tls.h>
 
-#  undef  errno
-#  ifndef NOT_IN_libc
-#   define errno __libc_errno
-#  else
-#   define errno errno		/* For #ifndef errno tests.  */
-#  endif
+#  if !defined __GNU__
+#   undef  errno
+#   ifndef NOT_IN_libc
+#    define errno __libc_errno
+#   else
+#    define errno errno		/* For #ifndef errno tests.  */
+#   endif
 extern __thread int errno attribute_tls_model_ie;
+#  endif
 
 # endif	/* !NOT_IN_libc || IN_LIB */
 
diff --git a/mach/mach.h b/mach/mach.h
index cf92171..2acc3f3 100644
--- a/mach/mach.h
+++ b/mach/mach.h
@@ -100,5 +100,8 @@  kern_return_t mach_setup_thread (task_t task, thread_t thread, void *pc,
 				 vm_address_t *stack_base,
 				 vm_size_t *stack_size);
 
+/* Give THREAD a TLS area.  */
+kern_return_t __mach_setup_tls (thread_t thread);
+kern_return_t mach_setup_tls (thread_t thread);
 
 #endif	/* mach.h */
diff --git a/mach/setup-thread.c b/mach/setup-thread.c
index e1582d1..3a57125 100644
--- a/mach/setup-thread.c
+++ b/mach/setup-thread.c
@@ -19,6 +19,7 @@ 
 #include <thread_state.h>
 #include <string.h>
 #include <mach/machine/vm_param.h>
+#include <ldsodefs.h>
 #include "sysdep.h"		/* Defines stack direction.  */
 
 #define	STACK_SIZE	(16 * 1024 * 1024) /* 16MB, arbitrary.  */
@@ -72,8 +73,35 @@  __mach_setup_thread (task_t task, thread_t thread, void *pc,
   if (error = __vm_protect (task, stack, __vm_page_size, 0, VM_PROT_NONE))
     return error;
 
-  return __thread_set_state (thread, MACHINE_THREAD_STATE_FLAVOR,
+  return __thread_set_state (thread, MACHINE_NEW_THREAD_STATE_FLAVOR,
 			     (natural_t *) &ts, tssize);
 }
 
 weak_alias (__mach_setup_thread, mach_setup_thread)
+
+/* Give THREAD a TLS area.  */
+kern_return_t
+__mach_setup_tls (thread_t thread)
+{
+  kern_return_t error;
+  struct machine_thread_state ts;
+  mach_msg_type_number_t tssize = MACHINE_THREAD_STATE_COUNT;
+  tcbhead_t *tcb;
+
+  if (error = __thread_get_state (thread, MACHINE_THREAD_STATE_FLAVOR,
+			     (natural_t *) &ts, &tssize))
+    return error;
+  assert (tssize == MACHINE_THREAD_STATE_COUNT);
+
+  tcb = _dl_allocate_tls (NULL);
+  if (!tcb)
+    return KERN_RESOURCE_SHORTAGE;
+
+  _hurd_tls_new (thread, &ts, tcb);
+
+  error = __thread_set_state (thread, MACHINE_THREAD_STATE_FLAVOR,
+			     (natural_t *) &ts, tssize);
+  return error;
+}
+
+weak_alias (__mach_setup_tls, mach_setup_tls)
diff --git a/sysdeps/generic/thread_state.h b/sysdeps/generic/thread_state.h
index 869658e..166e5f2 100644
--- a/sysdeps/generic/thread_state.h
+++ b/sysdeps/generic/thread_state.h
@@ -22,6 +22,7 @@ 
 
 /* Replace <machine> with "i386" or "mips" or whatever.  */
 
+#define MACHINE_NEW_THREAD_STATE_FLAVOR	<machine>_NEW_THREAD_STATE
 #define MACHINE_THREAD_STATE_FLAVOR	<machine>_THREAD_STATE
 #define MACHINE_THREAD_STATE_COUNT	<machine>_THREAD_STATE_COUNT
 
diff --git a/sysdeps/mach/hurd/bits/libc-lock.h b/sysdeps/mach/hurd/bits/libc-lock.h
index 4ffb311..8bf5656 100644
--- a/sysdeps/mach/hurd/bits/libc-lock.h
+++ b/sysdeps/mach/hurd/bits/libc-lock.h
@@ -20,6 +20,9 @@ 
 #define _BITS_LIBC_LOCK_H 1
 
 #if (_LIBC - 0) || (_CTHREADS_ - 0)
+#if (_LIBC - 0)
+#include <tls.h>
+#endif
 #include <cthreads.h>
 #include <hurd/threadvar.h>
 
diff --git a/sysdeps/mach/hurd/fork.c b/sysdeps/mach/hurd/fork.c
index 321421f..f19cfc4 100644
--- a/sysdeps/mach/hurd/fork.c
+++ b/sysdeps/mach/hurd/fork.c
@@ -528,6 +528,11 @@  __fork (void)
 #endif
       MACHINE_THREAD_STATE_SET_PC (&state,
 				   (unsigned long int) _hurd_msgport_receive);
+
+      /* Do special thread setup for TLS if needed.  */
+      if (err = _hurd_tls_fork (sigthread, _hurd_msgport_thread, &state))
+	LOSE;
+
       if (err = __thread_set_state (sigthread, MACHINE_THREAD_STATE_FLAVOR,
 				    (natural_t *) &state, statecount))
 	LOSE;
@@ -538,7 +543,7 @@  __fork (void)
       _hurd_longjmp_thread_state (&state, env, 1);
 
       /* Do special thread setup for TLS if needed.  */
-      if (err = _hurd_tls_fork (thread, &state))
+      if (err = _hurd_tls_fork (thread, ss->thread, &state))
 	LOSE;
 
       if (err = __thread_set_state (thread, MACHINE_THREAD_STATE_FLAVOR,
diff --git a/sysdeps/mach/hurd/i386/init-first.c b/sysdeps/mach/hurd/i386/init-first.c
index 8fb613b..74b3a56 100644
--- a/sysdeps/mach/hurd/i386/init-first.c
+++ b/sysdeps/mach/hurd/i386/init-first.c
@@ -113,31 +113,11 @@  init1 (int argc, char *arg0, ...)
      data block; the argument strings start there.  */
   if ((void *) d == argv[0])
     {
-#ifndef SHARED
-      /* With a new enough linker (binutils-2.23 or better),
-         the magic __ehdr_start symbol will be available and
-         __libc_start_main will have done this that way already.  */
-      if (_dl_phdr == NULL)
-        {
-          /* We may need to see our own phdrs, e.g. for TLS setup.
-             Try the usual kludge to find the headers without help from
-             the exec server.  */
-          extern const void __executable_start;
-          const ElfW(Ehdr) *const ehdr = &__executable_start;
-          _dl_phdr = (const void *) ehdr + ehdr->e_phoff;
-          _dl_phnum = ehdr->e_phnum;
-          assert (ehdr->e_phentsize == sizeof (ElfW(Phdr)));
-        }
-#endif
       return;
     }
 
 #ifndef SHARED
   __libc_enable_secure = d->flags & EXEC_SECURE;
-
-  _dl_phdr = (ElfW(Phdr) *) d->phdr;
-  _dl_phnum = d->phdrsz / sizeof (ElfW(Phdr));
-  assert (d->phdrsz % sizeof (ElfW(Phdr)) == 0);
 #endif
 
   _hurd_init_dtable = d->dtable;
@@ -193,6 +173,39 @@  init (int *data)
     ++envp;
   d = (void *) ++envp;
 
+#ifndef SHARED
+  /* If we are the bootstrap task started by the kernel,
+     then after the environment pointers there is no Hurd
+     data block; the argument strings start there.  */
+  if ((void *) d == argv[0])
+    {
+      /* With a new enough linker (binutils-2.23 or better),
+	 the magic __ehdr_start symbol will be available and
+	 __libc_start_main will have done this that way already.  */
+      if (_dl_phdr == NULL)
+        {
+	  /* We may need to see our own phdrs, e.g. for TLS setup.
+	     Try the usual kludge to find the headers without help from
+	     the exec server.  */
+	  extern const void __executable_start;
+	  const ElfW(Ehdr) *const ehdr = &__executable_start;
+	  _dl_phdr = (const void *) ehdr + ehdr->e_phoff;
+	  _dl_phnum = ehdr->e_phnum;
+	  assert (ehdr->e_phentsize == sizeof (ElfW(Phdr)));
+        }
+    }
+  else
+    {
+      _dl_phdr = (ElfW(Phdr) *) d->phdr;
+      _dl_phnum = d->phdrsz / sizeof (ElfW(Phdr));
+      assert (d->phdrsz % sizeof (ElfW(Phdr)) == 0);
+    }
+
+  /* We need to setup TLS before starting sigthread */
+  extern void __pthread_initialize_minimal(void);
+  __pthread_initialize_minimal ();
+#endif
+
   /* The user might have defined a value for this, to get more variables.
      Otherwise it will be zero on startup.  We must make sure it is set
      properly before before cthreads initialization, so cthreads can know
diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
index da8c16a..88baeeb 100644
--- a/sysdeps/mach/hurd/i386/tls.h
+++ b/sysdeps/mach/hurd/i386/tls.h
@@ -70,7 +70,7 @@  _hurd_tls_init (tcbhead_t *tcb, int secondcall)
 
       /* Get the first available selector.  */
       int sel = -1;
-      error_t err = __i386_set_gdt (tcb->self, &sel, desc);
+      kern_return_t err = __i386_set_gdt (tcb->self, &sel, desc);
       if (err == MIG_BAD_ID)
 	{
 	  /* Old kernel, use a per-thread LDT.  */
@@ -94,16 +94,16 @@  _hurd_tls_init (tcbhead_t *tcb, int secondcall)
       /* Fetch the selector set by the first call.  */
       int sel;
       asm ("mov %%gs, %w0" : "=q" (sel) : "0" (0));
-      if (__builtin_expect (sel, 0x50) & 4) /* LDT selector */
+      if (__builtin_expect (sel, 0x48) & 4) /* LDT selector */
 	{
-	  error_t err = __i386_set_ldt (tcb->self, sel, &desc, 1);
+	  kern_return_t err = __i386_set_ldt (tcb->self, sel, &desc, 1);
 	  assert_perror (err);
 	  if (err)
 	    return "i386_set_ldt failed";
 	}
       else
 	{
-	  error_t err = __i386_set_gdt (tcb->self, &sel, desc);
+	  kern_return_t err = __i386_set_gdt (tcb->self, &sel, desc);
 	  assert_perror (err);
 	  if (err)
 	    return "i386_set_gdt failed";
@@ -139,9 +139,40 @@  _hurd_tls_init (tcbhead_t *tcb, int secondcall)
 
 # include <mach/machine/thread_status.h>
 
-/* Set up TLS in the new thread of a fork child, copying from our own.  */
-static inline error_t __attribute__ ((unused))
-_hurd_tls_fork (thread_t child, struct i386_thread_state *state)
+/* Set up TLS in the new thread of a fork child, copying from the original.  */
+static inline kern_return_t __attribute__ ((unused))
+_hurd_tls_fork (thread_t child, thread_t orig, struct i386_thread_state *state)
+{
+  /* Fetch the selector set by _hurd_tls_init.  */
+  int sel;
+  asm ("mov %%gs, %w0" : "=q" (sel) : "0" (0));
+  if (sel == state->ds)		/* _hurd_tls_init was never called.  */
+    return 0;
+
+  struct descriptor desc, *_desc = &desc;
+  kern_return_t err;
+  unsigned int count = 1;
+
+  if (__builtin_expect (sel, 0x48) & 4) /* LDT selector */
+    err = __i386_get_ldt (orig, sel, 1, &_desc, &count);
+  else
+    err = __i386_get_gdt (orig, sel, &desc);
+
+  assert_perror (err);
+  if (err)
+    return err;
+
+  if (__builtin_expect (sel, 0x48) & 4) /* LDT selector */
+    err = __i386_set_ldt (child, sel, &desc, 1);
+  else
+    err = __i386_set_gdt (child, &sel, desc);
+
+  state->gs = sel;
+  return err;
+}
+
+static inline kern_return_t __attribute__ ((unused))
+_hurd_tls_new (thread_t child, struct i386_thread_state *state, tcbhead_t *tcb)
 {
   /* Fetch the selector set by _hurd_tls_init.  */
   int sel;
@@ -149,11 +180,13 @@  _hurd_tls_fork (thread_t child, struct i386_thread_state *state)
   if (sel == state->ds)		/* _hurd_tls_init was never called.  */
     return 0;
 
-  tcbhead_t *const tcb = THREAD_SELF;
   HURD_TLS_DESC_DECL (desc, tcb);
-  error_t err;
+  kern_return_t err;
+
+  tcb->tcb = tcb;
+  tcb->self = child;
 
-  if (__builtin_expect (sel, 0x50) & 4) /* LDT selector */
+  if (__builtin_expect (sel, 0x48) & 4) /* LDT selector */
     err = __i386_set_ldt (child, sel, &desc, 1);
   else
     err = __i386_set_gdt (child, &sel, desc);
diff --git a/sysdeps/mach/hurd/i386/trampoline.c b/sysdeps/mach/hurd/i386/trampoline.c
index e06977a..e5b47b4 100644
--- a/sysdeps/mach/hurd/i386/trampoline.c
+++ b/sysdeps/mach/hurd/i386/trampoline.c
@@ -63,7 +63,7 @@  _hurd_setup_sighandler (struct hurd_sigstate *ss, __sighandler_t handler,
 		  sizeof (state->basic));
 	  memcpy (&state->fpu, &ss->context->sc_i386_float_state,
 		  sizeof (state->fpu));
-	  state->set |= (1 << i386_THREAD_STATE) | (1 << i386_FLOAT_STATE);
+	  state->set |= (1 << i386_REGS_SEGS_STATE) | (1 << i386_FLOAT_STATE);
 	}
     }
 
diff --git a/sysdeps/mach/hurd/profil.c b/sysdeps/mach/hurd/profil.c
index 2ed2499..d590ff0 100644
--- a/sysdeps/mach/hurd/profil.c
+++ b/sysdeps/mach/hurd/profil.c
@@ -68,6 +68,8 @@  update_waiter (u_short *sample_buffer, size_t size, size_t offset, u_int scale)
       if (! err)
 	err = __mach_setup_thread (__mach_task_self (), profile_thread,
 				   &profile_waiter, NULL, NULL);
+      if (! err)
+	err = __mach_setup_tls (profile_thread);
     }
   else
     err = 0;
diff --git a/sysdeps/mach/hurd/setitimer.c b/sysdeps/mach/hurd/setitimer.c
index 1d3e01a..bad1fa3 100644
--- a/sysdeps/mach/hurd/setitimer.c
+++ b/sysdeps/mach/hurd/setitimer.c
@@ -221,11 +221,12 @@  setitimer_locked (const struct itimerval *new, struct itimerval *old,
 	    goto out;
 	  _hurd_itimer_thread_stack_base = 0; /* Anywhere.  */
 	  _hurd_itimer_thread_stack_size = __vm_page_size; /* Small stack.  */
-	  if (err = __mach_setup_thread (__mach_task_self (),
+	  if ((err = __mach_setup_thread (__mach_task_self (),
 					 _hurd_itimer_thread,
 					 &timer_thread,
 					 &_hurd_itimer_thread_stack_base,
 					 &_hurd_itimer_thread_stack_size))
+	      || (err = __mach_setup_tls (_hurd_itimer_thread)))
 	    {
 	      __thread_terminate (_hurd_itimer_thread);
 	      _hurd_itimer_thread = MACH_PORT_NULL;
diff --git a/sysdeps/mach/hurd/tls.h b/sysdeps/mach/hurd/tls.h
index afdcfee..230f907 100644
--- a/sysdeps/mach/hurd/tls.h
+++ b/sysdeps/mach/hurd/tls.h
@@ -22,7 +22,9 @@ 
 #ifndef __ASSEMBLER__
 
 # include <stddef.h>
+# include <stdint.h>
 # include <stdbool.h>
+# include <sysdep.h>
 # include <mach/mig_errors.h>
 # include <mach.h>
 
diff --git a/sysdeps/mach/i386/thread_state.h b/sysdeps/mach/i386/thread_state.h
index 49828f5..75f14af 100644
--- a/sysdeps/mach/i386/thread_state.h
+++ b/sysdeps/mach/i386/thread_state.h
@@ -21,7 +21,8 @@ 
 
 #include <mach/machine/thread_status.h>
 
-#define MACHINE_THREAD_STATE_FLAVOR	i386_THREAD_STATE
+#define MACHINE_NEW_THREAD_STATE_FLAVOR	i386_THREAD_STATE
+#define MACHINE_THREAD_STATE_FLAVOR	i386_REGS_SEGS_STATE
 #define MACHINE_THREAD_STATE_COUNT	i386_THREAD_STATE_COUNT
 
 #define machine_thread_state i386_thread_state
@@ -30,6 +31,14 @@ 
 #define SP uesp
 #define SYSRETURN eax
 
+#define MACHINE_THREAD_STATE_FIX_NEW(ts) do { \
+	asm ("mov %%cs, %w0" : "=q" ((ts)->cs)); \
+	asm ("mov %%ds, %w0" : "=q" ((ts)->ds)); \
+	asm ("mov %%es, %w0" : "=q" ((ts)->es)); \
+	asm ("mov %%fs, %w0" : "=q" ((ts)->fs)); \
+	asm ("mov %%gs, %w0" : "=q" ((ts)->gs)); \
+} while(0)
+
 struct machine_thread_all_state
   {
     int set;			/* Mask of bits (1 << FLAVOR).  */
diff --git a/sysdeps/mach/thread_state.h b/sysdeps/mach/thread_state.h
index 64d440c..2e08af2 100644
--- a/sysdeps/mach/thread_state.h
+++ b/sysdeps/mach/thread_state.h
@@ -37,6 +37,9 @@ 
   ((ts)->SP = (unsigned long int) (stack) + (size))
 #endif
 #endif
+#ifndef MACHINE_THREAD_STATE_FIX_NEW
+#define MACHINE_THREAD_STATE_FIX_NEW(ts)
+#endif
 
 /* These functions are of use in machine-dependent signal trampoline
    implementations.  */