[RFC,24/34] hurd: Only check for TLS initialization inside rtld or in static builds

Message ID 20230319151017.531737-25-bugaevc@gmail.com
State Committed, archived
Headers
Series The rest of the x86_64-gnu port |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Sergey Bugaev March 19, 2023, 3:10 p.m. UTC
  When glibc is built as a shared library, TLS is always initialized by
the call of TLS_INIT_TP () macro made inside the dynamic loader, prior
to running the main program (see dl-call_tls_init_tp.h). We can take
advantage of this: we know for sure that __LIBC_NO_TLS () will evaluate
to 0 in all other cases, so let the compiler know that explicitly too.

Also, only define _hurd_tls_init () and TLS_INIT_TP () under the same
conditions (either !SHARED or inside rtld), to statically assert that
this is the case.

Other than a microoptimization, this also helps with avoiding awkward
sharing of the __libc_tls_initialized variable between ld.so and libc.so
that we would have to do otherwise -- we know for sure that no sharing
is required, simply because __libc_tls_initialized would always be set
to true inside libc.so.

Signed-off-by: Sergey Bugaev <bugaevc@gmail.com>
---
 sysdeps/mach/hurd/Makefile                    |  4 ++
 sysdeps/mach/hurd/i386/dl-tls-initialized.c   | 21 +++++++++
 sysdeps/mach/hurd/i386/tls.h                  | 43 +++++++++++--------
 sysdeps/mach/hurd/x86/init-first.c            | 11 +----
 sysdeps/mach/hurd/x86_64/dl-tls-initialized.c | 21 +++++++++
 sysdeps/mach/hurd/x86_64/tls.h                | 19 +++++---
 6 files changed, 85 insertions(+), 34 deletions(-)
 create mode 100644 sysdeps/mach/hurd/i386/dl-tls-initialized.c
 create mode 100644 sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
  

Comments

Samuel Thibault April 10, 2023, 9:33 p.m. UTC | #1
Applied, thanks!

Sergey Bugaev, le dim. 19 mars 2023 18:10:07 +0300, a ecrit:
> When glibc is built as a shared library, TLS is always initialized by
> the call of TLS_INIT_TP () macro made inside the dynamic loader, prior
> to running the main program (see dl-call_tls_init_tp.h). We can take
> advantage of this: we know for sure that __LIBC_NO_TLS () will evaluate
> to 0 in all other cases, so let the compiler know that explicitly too.
> 
> Also, only define _hurd_tls_init () and TLS_INIT_TP () under the same
> conditions (either !SHARED or inside rtld), to statically assert that
> this is the case.
> 
> Other than a microoptimization, this also helps with avoiding awkward
> sharing of the __libc_tls_initialized variable between ld.so and libc.so
> that we would have to do otherwise -- we know for sure that no sharing
> is required, simply because __libc_tls_initialized would always be set
> to true inside libc.so.
> 
> Signed-off-by: Sergey Bugaev <bugaevc@gmail.com>
> ---
>  sysdeps/mach/hurd/Makefile                    |  4 ++
>  sysdeps/mach/hurd/i386/dl-tls-initialized.c   | 21 +++++++++
>  sysdeps/mach/hurd/i386/tls.h                  | 43 +++++++++++--------
>  sysdeps/mach/hurd/x86/init-first.c            | 11 +----
>  sysdeps/mach/hurd/x86_64/dl-tls-initialized.c | 21 +++++++++
>  sysdeps/mach/hurd/x86_64/tls.h                | 19 +++++---
>  6 files changed, 85 insertions(+), 34 deletions(-)
>  create mode 100644 sysdeps/mach/hurd/i386/dl-tls-initialized.c
>  create mode 100644 sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> 
> diff --git a/sysdeps/mach/hurd/Makefile b/sysdeps/mach/hurd/Makefile
> index d5584930..f43e92ba 100644
> --- a/sysdeps/mach/hurd/Makefile
> +++ b/sysdeps/mach/hurd/Makefile
> @@ -197,6 +197,10 @@ ifeq (hurd, $(subdir))
>  sysdep_routines += cthreads
>  endif
>  
> +ifeq (elf, $(subdir))
> +sysdep-dl-routines += dl-tls-initialized
> +endif
> +
>  ifeq (io, $(subdir))
>  sysdep_routines += f_setlk close_nocancel close_nocancel_nostatus \
>  		   fcntl_nocancel open_nocancel openat_nocancel read_nocancel \
> diff --git a/sysdeps/mach/hurd/i386/dl-tls-initialized.c b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
> new file mode 100644
> index 00000000..493ec239
> --- /dev/null
> +++ b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
> @@ -0,0 +1,21 @@
> +/* Determine whether TLS is initialized, for i386/Hurd.
> +   Copyright (C) 1995-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef SHARED
> +unsigned short __init1_desc;
> +#endif
> diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
> index 0f8dd241..ee7b8004 100644
> --- a/sysdeps/mach/hurd/i386/tls.h
> +++ b/sysdeps/mach/hurd/i386/tls.h
> @@ -69,18 +69,6 @@ _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x30,
>                    |  (desc->high_word & 0xff000000));			      \
>    })
>  
> -/* Return 1 if TLS is not initialized yet.  */
> -#ifndef SHARED
> -extern unsigned short __init1_desc;
> -#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
> -#else
> -#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
> -#endif
> -
> -#define __LIBC_NO_TLS()							      \
> -  ({ unsigned short ds, gs;						      \
> -     asm ("movw %%ds,%w0; movw %%gs,%w1" : "=q" (ds), "=q" (gs));	      \
> -     __builtin_expect(__HURD_DESC_INITIAL(gs, ds), 0); })
>  #endif
>  
>  /* The TCB can have any size and the memory following the address the
> @@ -125,6 +113,28 @@ extern unsigned short __init1_desc;
>  
>  # define HURD_SEL_LDT(sel) (__builtin_expect ((sel) & 4, 0))
>  
> +#ifndef SHARED
> +extern unsigned short __init1_desc;
> +# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
> +#else
> +# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
> +#endif
> +
> +#if !defined (SHARED) || IS_IN (rtld)
> +/* Return 1 if TLS is not initialized yet.  */
> +extern inline bool __attribute__ ((unused))
> +__LIBC_NO_TLS (void)
> +{
> +  unsigned short ds, gs;
> +  asm ("movw %%ds, %w0\n"
> +       "movw %%gs, %w1"
> +       : "=q" (ds), "=q" (gs));
> +  return __glibc_unlikely (__HURD_DESC_INITIAL (gs, ds));
> +}
> +
> +/* Code to initially initialize the thread pointer.  This might need
> +   special attention since 'errno' is not yet available and if the
> +   operation can cause a failure 'errno' must not be touched.  */
>  static inline bool __attribute__ ((unused))
>  _hurd_tls_init (tcbhead_t *tcb)
>  {
> @@ -168,11 +178,10 @@ out:
>    return success;
>  }
>  
> -/* Code to initially initialize the thread pointer.  This might need
> -   special attention since 'errno' is not yet available and if the
> -   operation can cause a failure 'errno' must not be touched.  */
> -# define TLS_INIT_TP(descr) \
> -    _hurd_tls_init ((tcbhead_t *) (descr))
> +# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> +#else /* defined (SHARED) && !IS_IN (rtld) */
> +# define __LIBC_NO_TLS() 0
> +#endif
>  
>  # if __GNUC_PREREQ (6, 0)
>  
> diff --git a/sysdeps/mach/hurd/x86/init-first.c b/sysdeps/mach/hurd/x86/init-first.c
> index 48c330ec..89a5f44c 100644
> --- a/sysdeps/mach/hurd/x86/init-first.c
> +++ b/sysdeps/mach/hurd/x86/init-first.c
> @@ -40,13 +40,6 @@ extern char **_dl_argv;
>  
>  #ifndef SHARED
>  static tcbhead_t __init1_tcbhead;
> -# ifndef __x86_64__
> -unsigned short __init1_desc;
> -# endif
> -#endif
> -
> -#ifdef __x86_64__
> -unsigned char __libc_tls_initialized;
>  #endif
>  
>  /* Things that want to be run before _hurd_init or much anything else.
> @@ -166,9 +159,7 @@ first_init (void)
>    _hurd_tls_init (&__init1_tcbhead);
>  
>    /* Make sure __LIBC_NO_TLS () keeps evaluating to 1.  */
> -# ifdef __x86_64__
> -  __libc_tls_initialized = 0;
> -# else
> +# ifndef __x86_64__
>    asm ("movw %%gs,%w0" : "=m" (__init1_desc));
>  # endif
>  #endif
> diff --git a/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> new file mode 100644
> index 00000000..d0766f95
> --- /dev/null
> +++ b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> @@ -0,0 +1,21 @@
> +/* Determine whether TLS is initialized, for x86_64/Hurd.
> +   Copyright (C) 1995-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#if !defined (SHARED) || IS_IN (rtld)
> +unsigned short __libc_tls_initialized;
> +#endif
> diff --git a/sysdeps/mach/hurd/x86_64/tls.h b/sysdeps/mach/hurd/x86_64/tls.h
> index cf74e1f4..da504d9c 100644
> --- a/sysdeps/mach/hurd/x86_64/tls.h
> +++ b/sysdeps/mach/hurd/x86_64/tls.h
> @@ -68,10 +68,6 @@ _Static_assert (offsetof (tcbhead_t, stack_guard) == 0x28,
>  _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x70,
>                  "split stack pointer offset");
>  
> -extern unsigned char __libc_tls_initialized;
> -
> -# define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
> -
>  /* The TCB can have any size and the memory following the address the
>     thread pointer points to is unspecified.  Allocate the TCB there.  */
>  # define TLS_TCB_AT_TP	1
> @@ -87,8 +83,6 @@ extern unsigned char __libc_tls_initialized;
>  # define TCB_ALIGNMENT	64
>  
>  
> -# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> -
>  # define THREAD_SELF							\
>    (*(tcbhead_t * __seg_fs *) offsetof (tcbhead_t, tcb))
>  /* Read member of the thread descriptor directly.  */
> @@ -174,6 +168,10 @@ _hurd_tls_new (thread_t child, tcbhead_t *tcb)
>                               i386_FSGS_BASE_STATE_COUNT);
>  }
>  
> +# if !defined (SHARED) || IS_IN (rtld)
> +extern unsigned char __libc_tls_initialized;
> +#  define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
> +
>  static inline bool __attribute__ ((unused))
>  _hurd_tls_init (tcbhead_t *tcb)
>  {
> @@ -184,11 +182,18 @@ _hurd_tls_init (tcbhead_t *tcb)
>    tcb->multiple_threads = 1;
>  
>    err = _hurd_tls_new (self, tcb);
> +  if (err == 0)
> +    __libc_tls_initialized = 1;
>    __mach_port_deallocate (__mach_task_self (), self);
> -  __libc_tls_initialized = 1;
>    return err == 0;
>  }
>  
> +#  define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> +# else /* defined (SHARED) && !IS_IN (rtld) */
> +#  define __LIBC_NO_TLS() 0
> +# endif
> +
> +
>  
>  /* Global scope switch support.  */
>  # define THREAD_GSCOPE_FLAG_UNUSED 0
> -- 
> 2.39.2
>
  
Samuel Thibault April 11, 2023, 6:57 p.m. UTC | #2
Hello,

Had you actually tested it on i386? It seems to be breaking the
testsuite completely. I would expect that a submitted patch series has
gone through the testsuite.

Sergey Bugaev, le dim. 19 mars 2023 18:10:07 +0300, a ecrit:
> When glibc is built as a shared library, TLS is always initialized by
> the call of TLS_INIT_TP () macro made inside the dynamic loader, prior
> to running the main program (see dl-call_tls_init_tp.h).

Yes, but apparently we load libc.so before calling TLS_INIT_TP? (and
thus start using its functions)

Samuel

> We can take
> advantage of this: we know for sure that __LIBC_NO_TLS () will evaluate
> to 0 in all other cases, so let the compiler know that explicitly too.
> 
> Also, only define _hurd_tls_init () and TLS_INIT_TP () under the same
> conditions (either !SHARED or inside rtld), to statically assert that
> this is the case.
> 
> Other than a microoptimization, this also helps with avoiding awkward
> sharing of the __libc_tls_initialized variable between ld.so and libc.so
> that we would have to do otherwise -- we know for sure that no sharing
> is required, simply because __libc_tls_initialized would always be set
> to true inside libc.so.
> 
> Signed-off-by: Sergey Bugaev <bugaevc@gmail.com>
> ---
>  sysdeps/mach/hurd/Makefile                    |  4 ++
>  sysdeps/mach/hurd/i386/dl-tls-initialized.c   | 21 +++++++++
>  sysdeps/mach/hurd/i386/tls.h                  | 43 +++++++++++--------
>  sysdeps/mach/hurd/x86/init-first.c            | 11 +----
>  sysdeps/mach/hurd/x86_64/dl-tls-initialized.c | 21 +++++++++
>  sysdeps/mach/hurd/x86_64/tls.h                | 19 +++++---
>  6 files changed, 85 insertions(+), 34 deletions(-)
>  create mode 100644 sysdeps/mach/hurd/i386/dl-tls-initialized.c
>  create mode 100644 sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> 
> diff --git a/sysdeps/mach/hurd/Makefile b/sysdeps/mach/hurd/Makefile
> index d5584930..f43e92ba 100644
> --- a/sysdeps/mach/hurd/Makefile
> +++ b/sysdeps/mach/hurd/Makefile
> @@ -197,6 +197,10 @@ ifeq (hurd, $(subdir))
>  sysdep_routines += cthreads
>  endif
>  
> +ifeq (elf, $(subdir))
> +sysdep-dl-routines += dl-tls-initialized
> +endif
> +
>  ifeq (io, $(subdir))
>  sysdep_routines += f_setlk close_nocancel close_nocancel_nostatus \
>  		   fcntl_nocancel open_nocancel openat_nocancel read_nocancel \
> diff --git a/sysdeps/mach/hurd/i386/dl-tls-initialized.c b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
> new file mode 100644
> index 00000000..493ec239
> --- /dev/null
> +++ b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
> @@ -0,0 +1,21 @@
> +/* Determine whether TLS is initialized, for i386/Hurd.
> +   Copyright (C) 1995-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef SHARED
> +unsigned short __init1_desc;
> +#endif
> diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
> index 0f8dd241..ee7b8004 100644
> --- a/sysdeps/mach/hurd/i386/tls.h
> +++ b/sysdeps/mach/hurd/i386/tls.h
> @@ -69,18 +69,6 @@ _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x30,
>                    |  (desc->high_word & 0xff000000));			      \
>    })
>  
> -/* Return 1 if TLS is not initialized yet.  */
> -#ifndef SHARED
> -extern unsigned short __init1_desc;
> -#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
> -#else
> -#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
> -#endif
> -
> -#define __LIBC_NO_TLS()							      \
> -  ({ unsigned short ds, gs;						      \
> -     asm ("movw %%ds,%w0; movw %%gs,%w1" : "=q" (ds), "=q" (gs));	      \
> -     __builtin_expect(__HURD_DESC_INITIAL(gs, ds), 0); })
>  #endif
>  
>  /* The TCB can have any size and the memory following the address the
> @@ -125,6 +113,28 @@ extern unsigned short __init1_desc;
>  
>  # define HURD_SEL_LDT(sel) (__builtin_expect ((sel) & 4, 0))
>  
> +#ifndef SHARED
> +extern unsigned short __init1_desc;
> +# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
> +#else
> +# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
> +#endif
> +
> +#if !defined (SHARED) || IS_IN (rtld)
> +/* Return 1 if TLS is not initialized yet.  */
> +extern inline bool __attribute__ ((unused))
> +__LIBC_NO_TLS (void)
> +{
> +  unsigned short ds, gs;
> +  asm ("movw %%ds, %w0\n"
> +       "movw %%gs, %w1"
> +       : "=q" (ds), "=q" (gs));
> +  return __glibc_unlikely (__HURD_DESC_INITIAL (gs, ds));
> +}
> +
> +/* Code to initially initialize the thread pointer.  This might need
> +   special attention since 'errno' is not yet available and if the
> +   operation can cause a failure 'errno' must not be touched.  */
>  static inline bool __attribute__ ((unused))
>  _hurd_tls_init (tcbhead_t *tcb)
>  {
> @@ -168,11 +178,10 @@ out:
>    return success;
>  }
>  
> -/* Code to initially initialize the thread pointer.  This might need
> -   special attention since 'errno' is not yet available and if the
> -   operation can cause a failure 'errno' must not be touched.  */
> -# define TLS_INIT_TP(descr) \
> -    _hurd_tls_init ((tcbhead_t *) (descr))
> +# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> +#else /* defined (SHARED) && !IS_IN (rtld) */
> +# define __LIBC_NO_TLS() 0
> +#endif
>  
>  # if __GNUC_PREREQ (6, 0)
>  
> diff --git a/sysdeps/mach/hurd/x86/init-first.c b/sysdeps/mach/hurd/x86/init-first.c
> index 48c330ec..89a5f44c 100644
> --- a/sysdeps/mach/hurd/x86/init-first.c
> +++ b/sysdeps/mach/hurd/x86/init-first.c
> @@ -40,13 +40,6 @@ extern char **_dl_argv;
>  
>  #ifndef SHARED
>  static tcbhead_t __init1_tcbhead;
> -# ifndef __x86_64__
> -unsigned short __init1_desc;
> -# endif
> -#endif
> -
> -#ifdef __x86_64__
> -unsigned char __libc_tls_initialized;
>  #endif
>  
>  /* Things that want to be run before _hurd_init or much anything else.
> @@ -166,9 +159,7 @@ first_init (void)
>    _hurd_tls_init (&__init1_tcbhead);
>  
>    /* Make sure __LIBC_NO_TLS () keeps evaluating to 1.  */
> -# ifdef __x86_64__
> -  __libc_tls_initialized = 0;
> -# else
> +# ifndef __x86_64__
>    asm ("movw %%gs,%w0" : "=m" (__init1_desc));
>  # endif
>  #endif
> diff --git a/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> new file mode 100644
> index 00000000..d0766f95
> --- /dev/null
> +++ b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> @@ -0,0 +1,21 @@
> +/* Determine whether TLS is initialized, for x86_64/Hurd.
> +   Copyright (C) 1995-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#if !defined (SHARED) || IS_IN (rtld)
> +unsigned short __libc_tls_initialized;
> +#endif
> diff --git a/sysdeps/mach/hurd/x86_64/tls.h b/sysdeps/mach/hurd/x86_64/tls.h
> index cf74e1f4..da504d9c 100644
> --- a/sysdeps/mach/hurd/x86_64/tls.h
> +++ b/sysdeps/mach/hurd/x86_64/tls.h
> @@ -68,10 +68,6 @@ _Static_assert (offsetof (tcbhead_t, stack_guard) == 0x28,
>  _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x70,
>                  "split stack pointer offset");
>  
> -extern unsigned char __libc_tls_initialized;
> -
> -# define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
> -
>  /* The TCB can have any size and the memory following the address the
>     thread pointer points to is unspecified.  Allocate the TCB there.  */
>  # define TLS_TCB_AT_TP	1
> @@ -87,8 +83,6 @@ extern unsigned char __libc_tls_initialized;
>  # define TCB_ALIGNMENT	64
>  
>  
> -# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> -
>  # define THREAD_SELF							\
>    (*(tcbhead_t * __seg_fs *) offsetof (tcbhead_t, tcb))
>  /* Read member of the thread descriptor directly.  */
> @@ -174,6 +168,10 @@ _hurd_tls_new (thread_t child, tcbhead_t *tcb)
>                               i386_FSGS_BASE_STATE_COUNT);
>  }
>  
> +# if !defined (SHARED) || IS_IN (rtld)
> +extern unsigned char __libc_tls_initialized;
> +#  define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
> +
>  static inline bool __attribute__ ((unused))
>  _hurd_tls_init (tcbhead_t *tcb)
>  {
> @@ -184,11 +182,18 @@ _hurd_tls_init (tcbhead_t *tcb)
>    tcb->multiple_threads = 1;
>  
>    err = _hurd_tls_new (self, tcb);
> +  if (err == 0)
> +    __libc_tls_initialized = 1;
>    __mach_port_deallocate (__mach_task_self (), self);
> -  __libc_tls_initialized = 1;
>    return err == 0;
>  }
>  
> +#  define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> +# else /* defined (SHARED) && !IS_IN (rtld) */
> +#  define __LIBC_NO_TLS() 0
> +# endif
> +
> +
>  
>  /* Global scope switch support.  */
>  # define THREAD_GSCOPE_FLAG_UNUSED 0
> -- 
> 2.39.2
>
  
Samuel Thibault April 11, 2023, 7:18 p.m. UTC | #3
Samuel Thibault, le mar. 11 avril 2023 20:57:05 +0200, a ecrit:
> Had you actually tested it on i386? It seems to be breaking the
> testsuite completely. I would expect that a submitted patch series has
> gone through the testsuite.
> 
> Sergey Bugaev, le dim. 19 mars 2023 18:10:07 +0300, a ecrit:
> > When glibc is built as a shared library, TLS is always initialized by
> > the call of TLS_INIT_TP () macro made inside the dynamic loader, prior
> > to running the main program (see dl-call_tls_init_tp.h).
> 
> Yes, but apparently we load libc.so before calling TLS_INIT_TP? (and
> thus start using its functions)

I'm thinking that we should as well just assume that the hardware is old
enough to support rdfsbase, that'll be *way* simpler to test for TLS.

Samuel
  
Samuel Thibault April 11, 2023, 8:03 p.m. UTC | #4
I have reverted that change for now, so we get back to a working glibc.

Samuel

Samuel Thibault, le mar. 11 avril 2023 20:57:05 +0200, a ecrit:
> Hello,
> 
> Had you actually tested it on i386? It seems to be breaking the
> testsuite completely. I would expect that a submitted patch series has
> gone through the testsuite.
> 
> Sergey Bugaev, le dim. 19 mars 2023 18:10:07 +0300, a ecrit:
> > When glibc is built as a shared library, TLS is always initialized by
> > the call of TLS_INIT_TP () macro made inside the dynamic loader, prior
> > to running the main program (see dl-call_tls_init_tp.h).
> 
> Yes, but apparently we load libc.so before calling TLS_INIT_TP? (and
> thus start using its functions)
> 
> Samuel
> 
> > We can take
> > advantage of this: we know for sure that __LIBC_NO_TLS () will evaluate
> > to 0 in all other cases, so let the compiler know that explicitly too.
> > 
> > Also, only define _hurd_tls_init () and TLS_INIT_TP () under the same
> > conditions (either !SHARED or inside rtld), to statically assert that
> > this is the case.
> > 
> > Other than a microoptimization, this also helps with avoiding awkward
> > sharing of the __libc_tls_initialized variable between ld.so and libc.so
> > that we would have to do otherwise -- we know for sure that no sharing
> > is required, simply because __libc_tls_initialized would always be set
> > to true inside libc.so.
> > 
> > Signed-off-by: Sergey Bugaev <bugaevc@gmail.com>
> > ---
> >  sysdeps/mach/hurd/Makefile                    |  4 ++
> >  sysdeps/mach/hurd/i386/dl-tls-initialized.c   | 21 +++++++++
> >  sysdeps/mach/hurd/i386/tls.h                  | 43 +++++++++++--------
> >  sysdeps/mach/hurd/x86/init-first.c            | 11 +----
> >  sysdeps/mach/hurd/x86_64/dl-tls-initialized.c | 21 +++++++++
> >  sysdeps/mach/hurd/x86_64/tls.h                | 19 +++++---
> >  6 files changed, 85 insertions(+), 34 deletions(-)
> >  create mode 100644 sysdeps/mach/hurd/i386/dl-tls-initialized.c
> >  create mode 100644 sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> > 
> > diff --git a/sysdeps/mach/hurd/Makefile b/sysdeps/mach/hurd/Makefile
> > index d5584930..f43e92ba 100644
> > --- a/sysdeps/mach/hurd/Makefile
> > +++ b/sysdeps/mach/hurd/Makefile
> > @@ -197,6 +197,10 @@ ifeq (hurd, $(subdir))
> >  sysdep_routines += cthreads
> >  endif
> >  
> > +ifeq (elf, $(subdir))
> > +sysdep-dl-routines += dl-tls-initialized
> > +endif
> > +
> >  ifeq (io, $(subdir))
> >  sysdep_routines += f_setlk close_nocancel close_nocancel_nostatus \
> >  		   fcntl_nocancel open_nocancel openat_nocancel read_nocancel \
> > diff --git a/sysdeps/mach/hurd/i386/dl-tls-initialized.c b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
> > new file mode 100644
> > index 00000000..493ec239
> > --- /dev/null
> > +++ b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
> > @@ -0,0 +1,21 @@
> > +/* Determine whether TLS is initialized, for i386/Hurd.
> > +   Copyright (C) 1995-2023 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#ifndef SHARED
> > +unsigned short __init1_desc;
> > +#endif
> > diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
> > index 0f8dd241..ee7b8004 100644
> > --- a/sysdeps/mach/hurd/i386/tls.h
> > +++ b/sysdeps/mach/hurd/i386/tls.h
> > @@ -69,18 +69,6 @@ _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x30,
> >                    |  (desc->high_word & 0xff000000));			      \
> >    })
> >  
> > -/* Return 1 if TLS is not initialized yet.  */
> > -#ifndef SHARED
> > -extern unsigned short __init1_desc;
> > -#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
> > -#else
> > -#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
> > -#endif
> > -
> > -#define __LIBC_NO_TLS()							      \
> > -  ({ unsigned short ds, gs;						      \
> > -     asm ("movw %%ds,%w0; movw %%gs,%w1" : "=q" (ds), "=q" (gs));	      \
> > -     __builtin_expect(__HURD_DESC_INITIAL(gs, ds), 0); })
> >  #endif
> >  
> >  /* The TCB can have any size and the memory following the address the
> > @@ -125,6 +113,28 @@ extern unsigned short __init1_desc;
> >  
> >  # define HURD_SEL_LDT(sel) (__builtin_expect ((sel) & 4, 0))
> >  
> > +#ifndef SHARED
> > +extern unsigned short __init1_desc;
> > +# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
> > +#else
> > +# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
> > +#endif
> > +
> > +#if !defined (SHARED) || IS_IN (rtld)
> > +/* Return 1 if TLS is not initialized yet.  */
> > +extern inline bool __attribute__ ((unused))
> > +__LIBC_NO_TLS (void)
> > +{
> > +  unsigned short ds, gs;
> > +  asm ("movw %%ds, %w0\n"
> > +       "movw %%gs, %w1"
> > +       : "=q" (ds), "=q" (gs));
> > +  return __glibc_unlikely (__HURD_DESC_INITIAL (gs, ds));
> > +}
> > +
> > +/* Code to initially initialize the thread pointer.  This might need
> > +   special attention since 'errno' is not yet available and if the
> > +   operation can cause a failure 'errno' must not be touched.  */
> >  static inline bool __attribute__ ((unused))
> >  _hurd_tls_init (tcbhead_t *tcb)
> >  {
> > @@ -168,11 +178,10 @@ out:
> >    return success;
> >  }
> >  
> > -/* Code to initially initialize the thread pointer.  This might need
> > -   special attention since 'errno' is not yet available and if the
> > -   operation can cause a failure 'errno' must not be touched.  */
> > -# define TLS_INIT_TP(descr) \
> > -    _hurd_tls_init ((tcbhead_t *) (descr))
> > +# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> > +#else /* defined (SHARED) && !IS_IN (rtld) */
> > +# define __LIBC_NO_TLS() 0
> > +#endif
> >  
> >  # if __GNUC_PREREQ (6, 0)
> >  
> > diff --git a/sysdeps/mach/hurd/x86/init-first.c b/sysdeps/mach/hurd/x86/init-first.c
> > index 48c330ec..89a5f44c 100644
> > --- a/sysdeps/mach/hurd/x86/init-first.c
> > +++ b/sysdeps/mach/hurd/x86/init-first.c
> > @@ -40,13 +40,6 @@ extern char **_dl_argv;
> >  
> >  #ifndef SHARED
> >  static tcbhead_t __init1_tcbhead;
> > -# ifndef __x86_64__
> > -unsigned short __init1_desc;
> > -# endif
> > -#endif
> > -
> > -#ifdef __x86_64__
> > -unsigned char __libc_tls_initialized;
> >  #endif
> >  
> >  /* Things that want to be run before _hurd_init or much anything else.
> > @@ -166,9 +159,7 @@ first_init (void)
> >    _hurd_tls_init (&__init1_tcbhead);
> >  
> >    /* Make sure __LIBC_NO_TLS () keeps evaluating to 1.  */
> > -# ifdef __x86_64__
> > -  __libc_tls_initialized = 0;
> > -# else
> > +# ifndef __x86_64__
> >    asm ("movw %%gs,%w0" : "=m" (__init1_desc));
> >  # endif
> >  #endif
> > diff --git a/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> > new file mode 100644
> > index 00000000..d0766f95
> > --- /dev/null
> > +++ b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> > @@ -0,0 +1,21 @@
> > +/* Determine whether TLS is initialized, for x86_64/Hurd.
> > +   Copyright (C) 1995-2023 Free Software Foundation, Inc.
> > +   This file is part of the GNU C Library.
> > +
> > +   The GNU C Library is free software; you can redistribute it and/or
> > +   modify it under the terms of the GNU Lesser General Public
> > +   License as published by the Free Software Foundation; either
> > +   version 2.1 of the License, or (at your option) any later version.
> > +
> > +   The GNU C Library is distributed in the hope that it will be useful,
> > +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > +   Lesser General Public License for more details.
> > +
> > +   You should have received a copy of the GNU Lesser General Public
> > +   License along with the GNU C Library; if not, see
> > +   <https://www.gnu.org/licenses/>.  */
> > +
> > +#if !defined (SHARED) || IS_IN (rtld)
> > +unsigned short __libc_tls_initialized;
> > +#endif
> > diff --git a/sysdeps/mach/hurd/x86_64/tls.h b/sysdeps/mach/hurd/x86_64/tls.h
> > index cf74e1f4..da504d9c 100644
> > --- a/sysdeps/mach/hurd/x86_64/tls.h
> > +++ b/sysdeps/mach/hurd/x86_64/tls.h
> > @@ -68,10 +68,6 @@ _Static_assert (offsetof (tcbhead_t, stack_guard) == 0x28,
> >  _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x70,
> >                  "split stack pointer offset");
> >  
> > -extern unsigned char __libc_tls_initialized;
> > -
> > -# define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
> > -
> >  /* The TCB can have any size and the memory following the address the
> >     thread pointer points to is unspecified.  Allocate the TCB there.  */
> >  # define TLS_TCB_AT_TP	1
> > @@ -87,8 +83,6 @@ extern unsigned char __libc_tls_initialized;
> >  # define TCB_ALIGNMENT	64
> >  
> >  
> > -# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> > -
> >  # define THREAD_SELF							\
> >    (*(tcbhead_t * __seg_fs *) offsetof (tcbhead_t, tcb))
> >  /* Read member of the thread descriptor directly.  */
> > @@ -174,6 +168,10 @@ _hurd_tls_new (thread_t child, tcbhead_t *tcb)
> >                               i386_FSGS_BASE_STATE_COUNT);
> >  }
> >  
> > +# if !defined (SHARED) || IS_IN (rtld)
> > +extern unsigned char __libc_tls_initialized;
> > +#  define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
> > +
> >  static inline bool __attribute__ ((unused))
> >  _hurd_tls_init (tcbhead_t *tcb)
> >  {
> > @@ -184,11 +182,18 @@ _hurd_tls_init (tcbhead_t *tcb)
> >    tcb->multiple_threads = 1;
> >  
> >    err = _hurd_tls_new (self, tcb);
> > +  if (err == 0)
> > +    __libc_tls_initialized = 1;
> >    __mach_port_deallocate (__mach_task_self (), self);
> > -  __libc_tls_initialized = 1;
> >    return err == 0;
> >  }
> >  
> > +#  define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> > +# else /* defined (SHARED) && !IS_IN (rtld) */
> > +#  define __LIBC_NO_TLS() 0
> > +# endif
> > +
> > +
> >  
> >  /* Global scope switch support.  */
> >  # define THREAD_GSCOPE_FLAG_UNUSED 0
> > -- 
> > 2.39.2
> > 
> 
> -- 
> Samuel
> ---
> Pour une évaluation indépendante, transparente et rigoureuse !
> Je soutiens la Commission d'Évaluation de l'Inria.
  
Sergey Bugaev April 11, 2023, 8:27 p.m. UTC | #5
On Tue, Apr 11, 2023 at 9:57 PM Samuel Thibault <samuel.thibault@gnu.org> wrote:
>
> Hello,
>
> Had you actually tested it on i386? It seems to be breaking the
> testsuite completely. I would expect that a submitted patch series has
> gone through the testsuite.

Ouch!

I have tested that it works on i386, as in I was able to run bash and
apt with it. I'll re-check whether it (still) works, maybe I'm
misremembering and my testing wasn't with the latest iteration of the
change (although I'm fairly sure that it was, hm).

I have not run the testsuite, because:
* I'm cross-compiling
* I never managed to run it to completion on the Hurd in the first
place... but maybe you have already fixed this.

I'll try to run the full testsuite once I get to my other machine.

But also, I think I've made it clear enough that this is an RFC and
while I have done some testing, it wasn't comprehensive. I was happy
to see it pushed, but that's because I assumed that you have verified
that my assumptions & reasoning are sound and that this change doesn't
break anything. Please don't just blindly trust me with this patchset!

> Sergey Bugaev, le dim. 19 mars 2023 18:10:07 +0300, a ecrit:
> > When glibc is built as a shared library, TLS is always initialized by
> > the call of TLS_INIT_TP () macro made inside the dynamic loader, prior
> > to running the main program (see dl-call_tls_init_tp.h).
>
> Yes, but apparently we load libc.so before calling TLS_INIT_TP? (and
> thus start using its functions)

If that was the case, wouldn't we also explode on e.g. errno accesses?
Note that the Hurd port doesn't use RTLD_PRIVATE_ERRNO.

As I understand it, rtld starts using libc functions after the call to
_dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope, 0, 0) at
elf/rtld.c:2372. TLS has already been initialized by that point, and
in fact there's a comment there saying, "We must do this after TLS
initialization in case after this re-relocation, we might call a
user-supplied function (e.g. calloc from _dl_relocate_object) that
uses TLS data."

Here's some evidence to support that theory: I've uploaded a log with
LD_DEBUG + gdb here: [0]. As you can see, i386_set_gdt gets hit *way*
before ld.so gets reallocated / bound to libc.so symbols. The same can
be seen in a file that explicitly links against libdl (this doesn't
seem to make any difference).

[0]: https://paste.gg/p/anonymous/ae39453264334d3e92670a3cd964806d

But of course it's hard to argue with test failures :(

> I'm thinking that we should as well just assume that the hardware is old
> enough to support rdfsbase, that'll be *way* simpler to test for TLS.

Maybe that, yes.

> I have reverted that change for now, so we get back to a working glibc.

Good.

Sergey
  
Samuel Thibault April 11, 2023, 9:23 p.m. UTC | #6
Sergey Bugaev, le mar. 11 avril 2023 23:27:35 +0300, a ecrit:
> On Tue, Apr 11, 2023 at 9:57 PM Samuel Thibault <samuel.thibault@gnu.org> wrote:
> > Had you actually tested it on i386? It seems to be breaking the
> > testsuite completely. I would expect that a submitted patch series has
> > gone through the testsuite.
> 
> Ouch!
> 
> I have tested that it works on i386, as in I was able to run bash and
> apt with it. I'll re-check whether it (still) works,

Yes, some things work, but some others don't :)

> I have not run the testsuite, because:
> * I'm cross-compiling

You can probably push your tree to a box where you can compile & run?

> * I never managed to run it to completion on the Hurd in the first
> place... but maybe you have already fixed this.

I have marked various tests as unsupported, yet.

> But also, I think I've made it clear enough that this is an RFC and
> while I have done some testing, it wasn't comprehensive. I was happy
> to see it pushed, but that's because I assumed that you have verified
> that my assumptions & reasoning are sound and that this change doesn't
> break anything. Please don't just blindly trust me with this patchset!

Sure, I did verify, but like everybody, the testsuite is in the end what
is best to make sure we haven't screwed up :)

> > Sergey Bugaev, le dim. 19 mars 2023 18:10:07 +0300, a ecrit:
> > > When glibc is built as a shared library, TLS is always initialized by
> > > the call of TLS_INIT_TP () macro made inside the dynamic loader, prior
> > > to running the main program (see dl-call_tls_init_tp.h).
> >
> > Yes, but apparently we load libc.so before calling TLS_INIT_TP? (and
> > thus start using its functions)
> 
> If that was the case, wouldn't we also explode on e.g. errno accesses?
> Note that the Hurd port doesn't use RTLD_PRIVATE_ERRNO.

I don't know.

> As I understand it, rtld starts using libc functions after the call to
> _dl_relocate_object (&GL(dl_rtld_map), main_map->l_scope, 0, 0) at
> elf/rtld.c:2372. TLS has already been initialized by that point, and
> in fact there's a comment there saying, "We must do this after TLS
> initialization in case after this re-relocation, we might call a
> user-supplied function (e.g. calloc from _dl_relocate_object) that
> uses TLS data."

Ok, I don't know.

Samuel
  
Sergey Bugaev April 12, 2023, 8:36 a.m. UTC | #7
On Wed, Apr 12, 2023 at 12:23 AM Samuel Thibault
<samuel.thibault@gnu.org> wrote:>
> Sergey Bugaev, le mar. 11 avril 2023 23:27:35 +0300, a ecrit:
> > On Tue, Apr 11, 2023 at 9:57 PM Samuel Thibault <samuel.thibault@gnu.org> wrote:
> > > Had you actually tested it on i386? It seems to be breaking the
> > > testsuite completely. I would expect that a submitted patch series has
> > > gone through the testsuite.
> >
> > Ouch!
> >
> > I have tested that it works on i386, as in I was able to run bash and
> > apt with it. I'll re-check whether it (still) works,
>
> Yes, some things work, but some others don't :)
>
> > I have not run the testsuite, because:
> > * I'm cross-compiling
>
> You can probably push your tree to a box where you can compile & run?
>
> > * I never managed to run it to completion on the Hurd in the first
> > place... but maybe you have already fixed this.
>
> I have marked various tests as unsupported, yet.

Alright, this went much as expected. First it took an eternity to
compile (SMP support can't come soon enough), and second, the testuite
has brought the system into a bad state and didn't complete. In
particular: it managed to terminate my SSH session several times (I
don't know how), and eventually it ran into that networking stack
deadlock (the only way out of which seems to be reboot-hurd on the
console). So I still can not run the testsuite on my end.

But the tests that did run seem to mostly have worked. There were some
failures here and there (I don't know which ones are expected), but
nothing as bad as every single program crashing because it starts
using TLS before it's set up due to already having switched to libc.so
functions.

Could you please point me to a specific test case (and not just "run
the whole thing") that started failing because of this change, and
teach me how to run just that one? I have read [0], but that says, "To
test just one test you have to have already run the entire testsuite",
which is a non-starter.

[0]: https://sourceware.org/glibc/wiki/Testing/Testsuite#Testing_just_one_test

Side note, I don't know why it's glibc's test suite that brings the
system to its knees. glib's, which is also quite comprehensive,
completes just fine. Would it have been easy for me to run the full
test suite, I would surely do that before submitting any patches. But
it's not.

Sergey
  
Samuel Thibault April 12, 2023, 9 a.m. UTC | #8
Sergey Bugaev, le mer. 12 avril 2023 11:36:58 +0300, a ecrit:
> Alright, this went much as expected. First it took an eternity to
> compile (SMP support can't come soon enough), and second, the testuite
> has brought the system into a bad state and didn't complete. In
> particular: it managed to terminate my SSH session several times (I
> don't know how), and eventually it ran into that networking stack
> deadlock (the only way out of which seems to be reboot-hurd on the
> console). So I still can not run the testsuite on my end.

Was that tst-spawn6? We'd probably have to disable that as well, you can
just add 

tests-unsupported += tst-spawn6

to sysdeps/mach/hurd/Makefile

Possibly also test-lfs:

tests-unsupported += test-lfs

> But the tests that did run seem to mostly have worked. There were some
> failures here and there (I don't know which ones are expected)

You can run on master to get the list of current expected failures.

> Could you please point me to a specific test case

Actually all cases that actually execute something, fail. So for
instance the very first, csu/test-as-const-rtld-sizes

> (and not just "run the whole thing") that started failing because of
> this change, and teach me how to run just that one? I have read [0],
> but that says, "To test just one test you have to have already run the
> entire testsuite", which is a non-starter.

You can use

./testrun.sh csu/test-as-const-rtld-sizes

> Would it have been easy for me to run the full test suite, I would
> surely do that before submitting any patches. But it's not.

Then it's simple: we have to fix that first.

Samuel
  
Sergey Bugaev April 12, 2023, 10:42 a.m. UTC | #9
On Wed, Apr 12, 2023 at 12:00 PM Samuel Thibault
<samuel.thibault@gnu.org> wrote:
> Was that tst-spawn6?

I can't point out any specific one :( There is just this huge wall of
output, and then the whole thing breaks / hangs. FWIW, the last lines
before my SSH / network stack died were: (Gmail is surely going to
wrap this, but hopefully not too badly)

gcc /home/sergey/glibc/build/elf/dso-sort-tests-src/tst-dso-ordering9_42-bdeca-c.c
-c -std=gnu11 -fgnu89-inline  -g -O2 -Wall -Wwrite-strings -Wundef
-Werror -fmerge-all-constants -frounding-math -fno-stack-protector
-fno-common -Wno-parentheses -Wstrict-prototypes
-Wold-style-definition -fmath-errno    -fPIC              -I../include
-I/home/sergey/glibc/build/elf  -I/home/sergey/glibc/build
-I../sysdeps/mach/hurd/i386  -I../sysdeps/mach/hurd/x86
-I../sysdeps/mach/hurd/i386/htl  -I../sysdeps/mach/hurd/htl
-I../sysdeps/hurd/htl  -I../sysdeps/mach/htl  -I../sysdeps/htl/include
-I../sysdeps/htl  -I../sysdeps/pthread  -I../sysdeps/mach/hurd/x86/htl
 -I../sysdeps/i386/htl  -I../sysdeps/x86/htl  -I../sysdeps/mach/hurd
-I../sysdeps/gnu  -I../sysdeps/unix/bsd  -I../sysdeps/unix/inet
-I../sysdeps/mach/i386  -I../sysdeps/mach/x86
-I../sysdeps/mach/include -I../sysdeps/mach
-I../sysdeps/i386/i686/fpu/multiarch  -I../sysdeps/i386/i686/fpu
-I../sysdeps/i386/i686/multiarch  -I../sysdeps/i386/i686
-I../sysdeps/i386/fpu  -I../sysdeps/x86/fpu  -I../sysdeps/i386
-I../sysdeps/x86/include -I../sysdeps/x86  -I../sysdeps/wordsize-32
-I../sysdeps/ieee754/float128  -I../sysdeps/ieee754/ldbl-96/include
-I../sysdeps/ieee754/ldbl-96  -I../sysdeps/ieee754/dbl-64
-I../sysdeps/ieee754/flt-32  -I../sysdeps/hurd/include
-I../sysdeps/hurd  -I../sysdeps/unix  -I../sysdeps/posix
-I../sysdeps/ieee754  -I../sysdeps/generic -I../hurd
-I/home/sergey/glibc/build/hurd/ -I../mach
-I/home/sergey/glibc/build/mach/ -I.. -I../libio -I.
-D_LIBC_REENTRANT -include /home/sergey/glibc/build/libc-modules.h
-DMODULE_NAME=testsuite -include ../include/libc-symbols.h  -DPIC
-DSHARED     -DTOP_NAMESPACE=glibc -o
/home/sergey/glibc/build/elf/tst-dso-ordering9-dir/tst-dso-ordering9_42-bdeca-c.os
client_loop: send disconnect: Broken pipe

...but that doesn't seem useful.

> You can run on master to get the list of current expected failures.

But that's the thing, I can not :|

> > Could you please point me to a specific test case
>
> Actually all cases that actually execute something, fail. So for
> instance the very first, csu/test-as-const-rtld-sizes

That one seems to have passed:

sergey@hurdle:~/glibc/build$ cat csu/test-as-const-rtld-sizes.out
sergey@hurdle:~/glibc/build$ cat csu/test-as-const-rtld-sizes.test-result
PASS: csu/test-as-const-rtld-sizes
original exit status 0

this is with your revert reverted.

Here's some more evidence that compiling out the no-tls case should be
safe, about errno again:

(gdb) info symbol 0x00023670
__errno_location in section .text of /lib/ld.so
(gdb) disas 0x00023670
Dump of assembler code for function __errno_location:
   0x00023670 <+0>:       call   0x2919a <__x86.get_pc_thunk.ax>
   0x00023675 <+5>:       add    $0x1297f,%eax
   0x0002367a <+10>:      lea    0xa24(%eax),%eax
   0x00023680 <+16>:      ret
End of assembler dump.
(gdb) info symbol 0x01077f00
__errno_location in section .text of /lib/i386-gnu/libc.so.0.3
(gdb) disas 0x01077f00
Dump of assembler code for function __GI___errno_location:
   0x01077f00 <+0>:       call   0x1220bc1 <__x86.get_pc_thunk.ax>
   0x01077f05 <+5>:       add    $0x22e0ef,%eax
   0x01077f0a <+10>:      mov    -0x1b8(%eax),%eax
   0x01077f10 <+16>:      add    %gs:0x0,%eax
   0x01077f17 <+23>:      ret
End of assembler dump.

This is what's shipped in Debian, i.e. prior to my changes. As you can
see, the libc.so version accesses %gs:0x0 without any checks, which
makes sense, since __errno_location is just return &errno, 'errno'
itself being a thread-local. There is no __LIBC_NO_TLS check in the
source code. And yet it works just fine! It follows that all
__LIBC_NO_TLS checks in libc.so must be redundant.

> > Would it have been easy for me to run the full test suite, I would
> > surely do that before submitting any patches. But it's not.
>
> Then it's simple: we have to fix that first.

If that's simple to fix, great!

Can you reproduce my issues?

Sergey
  
Samuel Thibault April 12, 2023, 10:45 a.m. UTC | #10
Sergey Bugaev, le mer. 12 avril 2023 13:42:50 +0300, a ecrit:
> > You can run on master to get the list of current expected failures.
> 
> But that's the thing, I can not :|

I meant after having fixed the tests that break your testing, by
disabling them as I hinted.

Samuel
  
Sergey Bugaev April 12, 2023, 5:18 p.m. UTC | #11
On Wed, Apr 12, 2023 at 1:45 PM Samuel Thibault <samuel.thibault@gnu.org> wrote:
>
> Sergey Bugaev, le mer. 12 avril 2023 13:42:50 +0300, a ecrit:
> > > You can run on master to get the list of current expected failures.
> >
> > But that's the thing, I can not :|
>
> I meant after having fixed the tests that break your testing, by
> disabling them as I hinted.

Alright, after spending a day trying to make this work, I declare this
a lost cause.

I have disabled the two tests you suggested, and some more that seemed
to behave particularly bad. It still always kills / hard-locks my
system, at seemingly random places. This means fs corruption, each
time, so I'm not willing to try doing this again and again.

But anyway, the test you mentioned works here. All the tests in csu/ do:

$ for test in $(find csu/ -executable -name test-\*); do echo running
$test; ./testrun.sh $test && echo ok; done
running csu/test-as-const-rtld-sizes
ok
running csu/test-as-const-tcb-offsets
ok
running csu/test-as-const-link-defines
ok
running csu/test-as-const-tlsdesc
ok
running csu/test-as-const-cpu-features-offsets
ok
$

Is there any other way for me to reproduce the crashes? If you can
reproduce them, can you see what's going on, maybe enable LD_DEBUG and
see if rtld is getting relocated early for some reason? Or maybe you
could at least get a backtrace, and then we could try to stare at it
and figure out what's going on together?

Maybe you're building with some flags that affect this? I'm only doing
../configure.

Sergey
  
Samuel Thibault April 12, 2023, 11:46 p.m. UTC | #12
Sergey Bugaev, le mer. 12 avril 2023 13:42:50 +0300, a ecrit:
> before my SSH / network stack died were: (Gmail is surely going to
> wrap this, but hopefully not too badly)
> 
> gcc /home/sergey/glibc/build/elf/dso-sort-tests-src/tst-dso-ordering9_42-bdeca-c.c
> -c -std=gnu11 -fgnu89-inline  -g -O2 -Wall -Wwrite-strings -Wundef
> -Werror -fmerge-all-constants -frounding-math -fno-stack-protector
> -fno-common -Wno-parentheses -Wstrict-prototypes
> -Wold-style-definition -fmath-errno    -fPIC              -I../include
> -I/home/sergey/glibc/build/elf  -I/home/sergey/glibc/build
> -I../sysdeps/mach/hurd/i386  -I../sysdeps/mach/hurd/x86
> -I../sysdeps/mach/hurd/i386/htl  -I../sysdeps/mach/hurd/htl
> -I../sysdeps/hurd/htl  -I../sysdeps/mach/htl  -I../sysdeps/htl/include
> -I../sysdeps/htl  -I../sysdeps/pthread  -I../sysdeps/mach/hurd/x86/htl
>  -I../sysdeps/i386/htl  -I../sysdeps/x86/htl  -I../sysdeps/mach/hurd
> -I../sysdeps/gnu  -I../sysdeps/unix/bsd  -I../sysdeps/unix/inet
> -I../sysdeps/mach/i386  -I../sysdeps/mach/x86
> -I../sysdeps/mach/include -I../sysdeps/mach
> -I../sysdeps/i386/i686/fpu/multiarch  -I../sysdeps/i386/i686/fpu
> -I../sysdeps/i386/i686/multiarch  -I../sysdeps/i386/i686
> -I../sysdeps/i386/fpu  -I../sysdeps/x86/fpu  -I../sysdeps/i386
> -I../sysdeps/x86/include -I../sysdeps/x86  -I../sysdeps/wordsize-32
> -I../sysdeps/ieee754/float128  -I../sysdeps/ieee754/ldbl-96/include
> -I../sysdeps/ieee754/ldbl-96  -I../sysdeps/ieee754/dbl-64
> -I../sysdeps/ieee754/flt-32  -I../sysdeps/hurd/include
> -I../sysdeps/hurd  -I../sysdeps/unix  -I../sysdeps/posix
> -I../sysdeps/ieee754  -I../sysdeps/generic -I../hurd
> -I/home/sergey/glibc/build/hurd/ -I../mach
> -I/home/sergey/glibc/build/mach/ -I.. -I../libio -I.
> -D_LIBC_REENTRANT -include /home/sergey/glibc/build/libc-modules.h
> -DMODULE_NAME=testsuite -include ../include/libc-symbols.h  -DPIC
> -DSHARED     -DTOP_NAMESPACE=glibc -o
> /home/sergey/glibc/build/elf/tst-dso-ordering9-dir/tst-dso-ordering9_42-bdeca-c.os
> client_loop: send disconnect: Broken pipe
> 
> ...but that doesn't seem useful.

Perhaps you can pipe to tee -a build.log?

> This is what's shipped in Debian, i.e. prior to my changes. As you can
> see, the libc.so version accesses %gs:0x0 without any checks, which
> makes sense, since __errno_location is just return &errno, 'errno'
> itself being a thread-local. There is no __LIBC_NO_TLS check in the
> source code. And yet it works just fine!

But possibly that's just because the ld.so code before setting up tls
doesn't access errno.

> > > Would it have been easy for me to run the full test suite, I would
> > > surely do that before submitting any patches. But it's not.
> >
> > Then it's simple: we have to fix that first.
> 
> If that's simple to fix, great!
> 
> Can you reproduce my issues?

No :/

> > > > You can run on master to get the list of current expected failures.
> > >
> > > But that's the thing, I can not :|
> >
> > I meant after having fixed the tests that break your testing, by
> > disabling them as I hinted.
> 
> Alright, after spending a day trying to make this work, I declare this
> a lost cause.

Ergl, sorry it ate up that much time :/

> I have disabled the two tests you suggested, and some more that seemed
> to behave particularly bad.

Which one? I have been running glibc's testsuite safely for several
years now.

> It still always kills / hard-locks my system, at seemingly random
> places.

Mmm, what is your system setup in terms of disk drivers and alike? As
of now, I am using the the Debian 1.8+git20230410-486-dbg kernel, the
gnumach AHCI disk drivers. I have 3635000KB of memory showing up as
total ni `free` (kvm -m 3550M, the maximum one can have with <4G phys
addressing).

Perhaps one thing worth noting: my /tmp is on the real disk, not a
tmpfs.

> This means fs corruption, each time, so I'm not willing to try doing
> this again and again.

I understand, but since it's apparently random, this looks like a
problem on your system, not related to glibc specifically, that you'll
probably want to fix anyway.

> Is there any other way for me to reproduce the crashes? If you can
> reproduce them, can you see what's going on, maybe enable LD_DEBUG and
> see if rtld is getting relocated early for some reason? Or maybe you
> could at least get a backtrace, and then we could try to stare at it
> and figure out what's going on together?
> 
> Maybe you're building with some flags that affect this? I'm only doing
> ../configure.

I'm using

../configure --prefix= --enable-pt_chown

I have uploaded the build result of master +
b37899d34d2190ef4b454283188f22519f096048 restored on:

https://dept-info.labri.fr/~thibault/tmp/libc.so.0.3
https://dept-info.labri.fr/~thibault/tmp/ld.so
https://dept-info.labri.fr/~thibault/tmp/test-as-const-rtld-sizes

you can run it by hand with
./ld.so --library-path $PWD ./test-as-const-rtld-sizes

It hangs on my system. I have put the core dump on 

https://dept-info.labri.fr/~thibault/tmp/core.18601

which can be inspected with 

gdb ./ld.so core.18601


Running live gdb ./ld.so 18529, I get:

(gdb) thread apply all bt

Thread 2 (Thread 18529.2):
#0  0x0102aa3c in __GI___mach_msg_trap () at /usr/src/glibc-upstream/build/mach/mach_msg_trap.S:2
#1  0x0102b1d6 in __GI___mach_msg (msg=0x1315d10, option=3, send_size=64, rcv_size=32, rcv_name=0, timeout=0, notify=0) at msg.c:111
#2  0x012c9850 in __gsync_wait (task=<optimized out>, addr=<optimized out>, val1=<optimized out>, val2=<optimized out>, msec=<optimized out>, flags=<optimized out>) at ./build-tree/hurd-i386-libc/mach/RPC_gsync_wait.c:186
#3  0x0104631b in __GI___spin_lock (__lock=0x12bb844 <_hurd_siglock>) at ../mach/lock-intern.h:60
#4  __GI___mutex_lock (__lock=0x12bb844 <_hurd_siglock>) at ../mach/lock-intern.h:119
#5  __GI__hurd_thread_sigstate (thread=<optimized out>) at hurdsig.c:80
#6  0x0116abb8 in _hurd_critical_section_lock () at ../hurd/hurd/signal.h:230
#7  _hurd_fd_get (fd=2) at ../hurd/hurd/fd.h:74
#8  __GI___write_nocancel (fd=2, buf=0x1315e60, nbytes=<optimized out>) at ../sysdeps/mach/hurd/write_nocancel.c:26
#9  0x01149135 in __GI___libc_write (fd=2, buf=0x1315e60, nbytes=41) at ../sysdeps/mach/hurd/write.c:26
#10 0x0116ff07 in __GI___writev (fd=<optimized out>, vector=<optimized out>, count=<optimized out>) at ../sysdeps/posix/writev.c:87
#11 0x010b9df5 in writev_for_fatal (fd=<optimized out>, total=<optimized out>, niov=<optimized out>, iov=<optimized out>) at ../sysdeps/posix/libc_fatal.c:44
#12 __libc_message (fmt=<optimized out>) at ../sysdeps/posix/libc_fatal.c:124
#13 0x010b9ead in __GI___libc_fatal (message=0x12216b4 "hurd: Can't add reference on Mach thread\n") at ../sysdeps/posix/libc_fatal.c:159
#14 0x01046524 in __GI__hurd_thread_sigstate (thread=<optimized out>) at hurdsig.c:136
#15 0x0103fd33 in __GI__hurd_self_sigstate () at ../hurd/hurd/signal.h:173
#16 _hurd_msgport_receive (arg=<error reading variable: Cannot access memory at address 0x1316004>) at msgportdemux.c:47
Backtrace stopped: Cannot access memory at address 0x1316000

Thread 1 (Thread 18529.1):
#0  0x0102aa3c in __GI___mach_msg_trap () at /usr/src/glibc-upstream/build/mach/mach_msg_trap.S:2
#1  0x0102b1d6 in __GI___mach_msg (msg=0x10018dc, option=3, send_size=64, rcv_size=32, rcv_name=31, timeout=0, notify=0) at msg.c:111
#2  0x012c9850 in __gsync_wait (task=<optimized out>, addr=<optimized out>, val1=<optimized out>, val2=<optimized out>, msec=<optimized out>, flags=<optimized out>) at ./build-tree/hurd-i386-libc/mach/RPC_gsync_wait.c:186
#3  0x01110cb7 in __GI___spin_lock (__lock=<optimized out>) at ../mach/lock-intern.h:60
#4  __GI___mutex_lock (__lock=<optimized out>) at ../mach/lock-intern.h:119
#5  __GI__Fork () at ../sysdeps/mach/hurd/_Fork.c:116
#6  0x01110892 in __libc_fork () at fork.c:74
#7  0x010089af in ?? ()


(gdb) thread apply all bt full

Thread 2 (Thread 18529.2):
#0  0x0102aa3c in __GI___mach_msg_trap () at /usr/src/glibc-upstream/build/mach/mach_msg_trap.S:2
No locals.
#1  0x0102b1d6 in __GI___mach_msg (msg=0x1315d10, option=3, send_size=64, rcv_size=32, rcv_name=0, timeout=0, notify=0) at msg.c:111
        ret = <optimized out>
#2  0x012c9850 in __gsync_wait (task=<optimized out>, addr=<optimized out>, val1=<optimized out>, val2=<optimized out>, msec=<optimized out>, flags=<optimized out>) at ./build-tree/hurd-i386-libc/mach/RPC_gsync_wait.c:186
        Mess = {In = {Head = {msgh_bits = 5395, msgh_size = 64, {msgh_remote_port = 36, msgh_remote_port_do_not_use = 36}, {msgh_local_port = 0, msgh_protected_payload = 0}, msgh_seqno = 0, msgh_id = 4204}, addrType = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, addr = 19642436, val1Type = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, val1 = 2, val2Type = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, val2 = 0, msecType = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, msec = 0, flagsType = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, flags = 0}, Out = {Head = {msgh_bits = 5395, msgh_size = 64, {msgh_remote_port = 36, msgh_remote_port_do_not_use = 36}, {msgh_local_port = 0, msgh_protected_payload = 0}, msgh_seqno = 0, msgh_id = 4204}, RetCodeType = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, RetCode = 19642436}}
        InP = 0x1315d10
        OutP = 0x1315d10
        msg_result = <optimized out>
#3  0x0104631b in __GI___spin_lock (__lock=0x12bb844 <_hurd_siglock>) at ../mach/lock-intern.h:60
        __iptr = 0x12bb844 <_hurd_siglock>
        __flags = 0
#4  __GI___mutex_lock (__lock=0x12bb844 <_hurd_siglock>) at ../mach/lock-intern.h:119
No locals.
#5  __GI__hurd_thread_sigstate (thread=<optimized out>) at hurdsig.c:80
        ss = <optimized out>
#6  0x0116abb8 in _hurd_critical_section_lock () at ../hurd/hurd/signal.h:230
        self = 28
        ss = <optimized out>
        ss = <optimized out>
        self = <optimized out>
#7  _hurd_fd_get (fd=2) at ../hurd/hurd/fd.h:74
        __hurd_critical__ = <optimized out>
        descriptor = <optimized out>
        descriptor = <optimized out>
        __hurd_critical__ = <optimized out>
        cell = <optimized out>
#8  __GI___write_nocancel (fd=2, buf=0x1315e60, nbytes=<optimized out>) at ../sysdeps/mach/hurd/write_nocancel.c:26
        descriptor = <optimized out>
        err = <optimized out>
#9  0x01149135 in __GI___libc_write (fd=2, buf=0x1315e60, nbytes=41) at ../sysdeps/mach/hurd/write.c:26
        ret = <optimized out>
        cancel_oldtype = 0
#10 0x0116ff07 in __GI___writev (fd=<optimized out>, vector=<optimized out>, count=<optimized out>) at ../sysdeps/posix/writev.c:87
        bytes = <optimized out>
        buffer = <optimized out>
        malloced_buffer = 0x0
        to_copy = <optimized out>
        bp = <optimized out>
        bytes_written = <optimized out>
#11 0x010b9df5 in writev_for_fatal (fd=<optimized out>, total=<optimized out>, niov=<optimized out>, iov=<optimized out>) at ../sysdeps/posix/libc_fatal.c:44
        __result = <optimized out>
#12 __libc_message (fmt=<optimized out>) at ../sysdeps/posix/libc_fatal.c:124
        iov = <optimized out>
        total = 41
        buf = <optimized out>
        ap = 0x1315f6c "\001"
        fd = <optimized out>
        list = <optimized out>
        nlist = <optimized out>
        cp = <optimized out>
#13 0x010b9ead in __GI___libc_fatal (message=0x12216b4 "hurd: Can't add reference on Mach thread\n") at ../sysdeps/posix/libc_fatal.c:159
--Type <RET> for more, q to quit, c to continue without paging--
No locals.
#14 0x01046524 in __GI__hurd_thread_sigstate (thread=<optimized out>) at hurdsig.c:136
        err = <optimized out>
        s = <optimized out>
        ss = <optimized out>
#15 0x0103fd33 in __GI__hurd_self_sigstate () at ../hurd/hurd/signal.h:173
        self = 28
        ss = 0x0
        ss = <optimized out>
        self = <optimized out>
#16 _hurd_msgport_receive (arg=<error reading variable: Cannot access memory at address 0x1316004>) at msgportdemux.c:47
No locals.
Backtrace stopped: Cannot access memory at address 0x1316000

Thread 1 (Thread 18529.1):
#0  0x0102aa3c in __GI___mach_msg_trap () at /usr/src/glibc-upstream/build/mach/mach_msg_trap.S:2
No locals.
#1  0x0102b1d6 in __GI___mach_msg (msg=0x10018dc, option=3, send_size=64, rcv_size=32, rcv_name=31, timeout=0, notify=0) at msg.c:111
        ret = <optimized out>
#2  0x012c9850 in __gsync_wait (task=<optimized out>, addr=<optimized out>, val1=<optimized out>, val2=<optimized out>, msec=<optimized out>, flags=<optimized out>) at ./build-tree/hurd-i386-libc/mach/RPC_gsync_wait.c:186
        Mess = {In = {Head = {msgh_bits = 4608, msgh_size = 32, {msgh_remote_port = 0, msgh_remote_port_do_not_use = 0}, {msgh_local_port = 31, msgh_protected_payload = 31}, msgh_seqno = 29, msgh_id = 4304}, addrType = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, addr = 28, val1Type = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, val1 = 2, val2Type = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, val2 = 0, msecType = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, msec = 0, flagsType = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, flags = 0}, Out = {Head = {msgh_bits = 4608, msgh_size = 32, {msgh_remote_port = 0, msgh_remote_port_do_not_use = 0}, {msgh_local_port = 31, msgh_protected_payload = 31}, msgh_seqno = 29, msgh_id = 4304}, RetCodeType = {msgt_name = 2, msgt_size = 32, msgt_number = 1, msgt_inline = 1, msgt_longform = 0, msgt_deallocate = 0, msgt_unused = 0}, RetCode = 28}}
        InP = 0x10018dc
        OutP = 0x10018dc
        msg_result = <optimized out>
#3  0x01110cb7 in __GI___spin_lock (__lock=<optimized out>) at ../mach/lock-intern.h:60
        __iptr = <optimized out>
        __flags = 0
#4  __GI___mutex_lock (__lock=<optimized out>) at ../mach/lock-intern.h:119
No locals.
#5  __GI__Fork () at ../sysdeps/mach/hurd/_Fork.c:116
        newtask = 19675056
        thread_refs = 16831952
        sigthread_refs = 19762984
        state = {gs = 16784272, fs = 16784276, es = 134259806, ds = 16806696, edi = 16784276, esi = 134441884, ebp = 1, esp = 16832864, ebx = 5, edx = 0, ecx = 1, eax = 134441520, eip = 16777324, cs = 0, efl = 0, uesp = 134438900, ss = 19640256}
        threads = 0x0
        stopped = 0
        newproc = 134438900
        nportnames = 0
        thread = 16927252
        sigthread = 19674880
        statecount = 19760428
        portnames = 0x0
        nporttypes = 0
        porttypes = 0x0
        nthreads = 0
        ports_locked = 0
        env = {{__jmpbuf = {19974152, 19974152, 0, 0, 16783680, 17894163}, __mask_was_saved = 0, __saved_mask = 134441992}}
        pid = 134438900
        i = 16807887
        err = <optimized out>
        ss = 0x130c808
        __PRETTY_FUNCTION__ = "_Fork"
        lose = <optimized out>
#6  0x01110892 in __libc_fork () at fork.c:74
        multiple_threads = true
        lastrun = 0
--Type <RET> for more, q to quit, c to continue without paging--
        nss_database_data = {nsswitch_conf = {size = 75406264232385142, ino = 84360991733424116, mtime = {tv_sec = 577421931782412918, tv_nsec = 16826440}, ctime = {tv_sec = 72187705911477250, tv_nsec = 134441520}}, services = {0x8036c08, 0x100d960, 0x1, 0x5, 0x0, 0x90, 0x8035ff4, 0x1007588, 0x16, 0x54, 0x107aaf6 <__GI_getenv+102>, 0x1018cf8, 0x1110850 <__libc_fork>, 0xe, 0x0, 0x801033d <_dl_fixup+13>, 0x100bff4}, reload_disabled = 16784548, initialized = false}
        pid = <optimized out>
#7  0x010089af in ?? ()
No symbol table info available.
Backtrace stopped: previous frame inner to this frame (corrupt stack?)



Interestingly, watching for the $gs update:

€ gdb --args ./ld.so --library-path=/tmp ./test-as-const-rtld-sizes
(gdb) b _start
Breakpoint 1 at 0x1a5a0
(gdb) r
Starting program: /tmp/ld.so --library-path /tmp ./test-as-const-rtld-sizes

Thread 5 hit Breakpoint 1, 0x0801a5a0 in _start ()
(gdb) watch $gs
Watchpoint 2: $gs
(gdb) c
Continuing.

Thread 5 hit Watchpoint 2: $gs

Old value = 31
New value = 75
_hurd_tls_init (tcb=0x100e6c0) at ../sysdeps/mach/hurd/i386/tls.h:179
179       __hurd_reply_port0 = MACH_PORT_NULL;
(gdb) bt
#0  _hurd_tls_init (tcb=0x100e6c0) at ../sysdeps/mach/hurd/i386/tls.h:179
#1  call_tls_init_tp (addr=0x100e6c0) at ../sysdeps/generic/dl-call_tls_init_tp.h:31
#2  init_tls (naudit=naudit@entry=0) at rtld.c:797
#3  0x0801db49 in dl_main (phdr=<optimized out>, phnum=<optimized out>, user_entry=<optimized out>, auxv=<optimized out>)
    at rtld.c:2046
#4  0x0801a07d in go (argdata=0x1001d50) at ../sysdeps/mach/hurd/dl-sysdep.c:172
#5  0x0801fa13 in _hurd_startup (argptr=<optimized out>, main=<optimized out>) at hurdstartup.c:184
#6  0x08019ee5 in _dl_sysdep_start (start_argptr=0x1002000, dl_main=0x801b810 <dl_main>)
    at ../sysdeps/mach/hurd/dl-sysdep.c:229
#7  0x0801b565 in _dl_start_final (arg=<optimized out>) at rtld.c:495
#8  _dl_start (arg=<optimized out>) at rtld.c:582
#9  0x0801a5ab in _start () from /tmp/ld.so

At that point the library loading has happened:

(gdb) info sharedlibrary 
From        To          Syms Read   Shared Object Library
0x08000db0  0x080256e1  Yes         /tmp/ld.so
0x0102a650  0x01200d35  No          /tmp/libc.so.0.3
0x012c49a0  0x012d0ad4  No          /tmp/libmachuser.so.1
0x012e0bc0  0x012fee50  No          /tmp/libhurduser.so.0.3

And the function symbols indeed seem to have been overloaded:

(gdb) l __write
384     __write (int fd, const void *buf, size_t nbytes)
385     {
386       error_t err;
387       vm_size_t nwrote;
388
389       assert (fd < _hurd_init_dtablesize);


That is why I'm thinking that apparently exposing the libc functions
happens before setting up TLS, and thus potential for mayhem if libc
assumes that TLS is set up. The loading itself is apparently done in the
_dl_map_object_deps call of dl_main.

Samuel
  
Sergey Bugaev April 13, 2023, 10:02 a.m. UTC | #13
Wow, this is great, thank you! You've really gone above and beyond
compared to what I expected you to do.

Some replies below; will reply to other points later.

On Thu, Apr 13, 2023 at 2:47 AM Samuel Thibault <samuel.thibault@gnu.org> wrote:
> > Maybe you're building with some flags that affect this? I'm only doing
> > ../configure.
>
> I'm using
>
> ../configure --prefix= --enable-pt_chown

Yeah, that shouldn't influence anything vs what I have.

> I have uploaded the build result of master +
> b37899d34d2190ef4b454283188f22519f096048 restored on:
>
> https://dept-info.labri.fr/~thibault/tmp/libc.so.0.3
> https://dept-info.labri.fr/~thibault/tmp/ld.so
> https://dept-info.labri.fr/~thibault/tmp/test-as-const-rtld-sizes
>
> you can run it by hand with
> ./ld.so --library-path $PWD ./test-as-const-rtld-sizes
>
> It hangs on my system. I have put the core dump on
>
> https://dept-info.labri.fr/~thibault/tmp/core.18601
>
> which can be inspected with
>
> gdb ./ld.so core.18601

Thank you, I'm going to take a look.

> Running live gdb ./ld.so 18529, I get:
>
> (gdb) thread apply all bt
>
> Thread 2 (Thread 18529.2):
> #0  0x0102aa3c in __GI___mach_msg_trap () at /usr/src/glibc-upstream/build/mach/mach_msg_trap.S:2
> #1  0x0102b1d6 in __GI___mach_msg (msg=0x1315d10, option=3, send_size=64, rcv_size=32, rcv_name=0, timeout=0, notify=0) at msg.c:111
> #2  0x012c9850 in __gsync_wait (task=<optimized out>, addr=<optimized out>, val1=<optimized out>, val2=<optimized out>, msec=<optimized out>, flags=<optimized out>) at ./build-tree/hurd-i386-libc/mach/RPC_gsync_wait.c:186
> #3  0x0104631b in __GI___spin_lock (__lock=0x12bb844 <_hurd_siglock>) at ../mach/lock-intern.h:60
> #4  __GI___mutex_lock (__lock=0x12bb844 <_hurd_siglock>) at ../mach/lock-intern.h:119
> #5  __GI__hurd_thread_sigstate (thread=<optimized out>) at hurdsig.c:80
> #6  0x0116abb8 in _hurd_critical_section_lock () at ../hurd/hurd/signal.h:230
> #7  _hurd_fd_get (fd=2) at ../hurd/hurd/fd.h:74
> #8  __GI___write_nocancel (fd=2, buf=0x1315e60, nbytes=<optimized out>) at ../sysdeps/mach/hurd/write_nocancel.c:26
> #9  0x01149135 in __GI___libc_write (fd=2, buf=0x1315e60, nbytes=41) at ../sysdeps/mach/hurd/write.c:26
> #10 0x0116ff07 in __GI___writev (fd=<optimized out>, vector=<optimized out>, count=<optimized out>) at ../sysdeps/posix/writev.c:87
> #11 0x010b9df5 in writev_for_fatal (fd=<optimized out>, total=<optimized out>, niov=<optimized out>, iov=<optimized out>) at ../sysdeps/posix/libc_fatal.c:44
> #12 __libc_message (fmt=<optimized out>) at ../sysdeps/posix/libc_fatal.c:124
> #13 0x010b9ead in __GI___libc_fatal (message=0x12216b4 "hurd: Can't add reference on Mach thread\n") at ../sysdeps/posix/libc_fatal.c:159
> #14 0x01046524 in __GI__hurd_thread_sigstate (thread=<optimized out>) at hurdsig.c:136
> #15 0x0103fd33 in __GI__hurd_self_sigstate () at ../hurd/hurd/signal.h:173
> #16 _hurd_msgport_receive (arg=<error reading variable: Cannot access memory at address 0x1316004>) at msgportdemux.c:47
> Backtrace stopped: Cannot access memory at address 0x1316000

So the immediate cause of the hang is we deadlock trying to take the
_hurd_siglock while already holding it (inside #14 0x01046524 in
__GI__hurd_thread_sigstate), since it's just a non-recursive struct
mutex. We should probably write our own version of writev_for_fatal
that does no locking (other than maybe trylock) and tries to not touch
any TLS or sigstate. Like, just grab the port from _hurd_dtable (with
no critical sections, no nothin') and call io_write on it (with the
regular non-intr mach_msg).

That, and abort () should be more careful with locking the sigstate
too, in order not to fault and/or deadlock if sigstate / TLS is
broken. Well, in this case we didn't even get to abort (), we
deadlocked on the write, but abort would be next.

But the underlying issue is that the thread port is bogus, which --
what? How can this even happen? This is not even some other thread's
port (maybe a thread could die and then its port right would turn into
a dead name, and then mach_port_mod_refs would return
KERN_INVALID_RIGHT if you try to add a send right...), this is our
very own thread port, the result of mach_thread_self () which was
called just several moments ago in hurd_self_sigstate ()!

If we're trying to tie this to TLS somehow, maybe the port is fine,
but the mach_port_mod_refs RPS fails because something is off with the
reply port. But also note that at this point we're well into libc.so
(this is the msgport thread already, and the other thread below is
already running user's code!), so clearly the TLS must have been set
up (and not by TLS_INIT_TP, this is not the main thread). And since we
managed to create the thread (which is done in libc.so, not ld.so),
TLS must have been OK in the main thread too (thread_create is not a
direct syscall).

And if TLS wasn't set up, I'd expect a TLS access to segfault or
busfault or something, not to read bogus data -- or am I wrong? what
does $gs_base initially point to, before it's set up? I would assume
it's either just 0x0, or the %gs:something read does not work at all
(SIGBUS).

> Interestingly, watching for the $gs update:
>
> € gdb --args ./ld.so --library-path=/tmp ./test-as-const-rtld-sizes
> (gdb) b _start
> Breakpoint 1 at 0x1a5a0
> (gdb) r
> Starting program: /tmp/ld.so --library-path /tmp ./test-as-const-rtld-sizes
>
> Thread 5 hit Breakpoint 1, 0x0801a5a0 in _start ()
> (gdb) watch $gs
> Watchpoint 2: $gs
> (gdb) c
> Continuing.

Cool, I didn't know you could watch a register like that -- although
it appears to be super slow, so it must be not using hardware
watchpoints.

> At that point the library loading has happened:
>
> (gdb) info sharedlibrary
> From        To          Syms Read   Shared Object Library
> 0x08000db0  0x080256e1  Yes         /tmp/ld.so
> 0x0102a650  0x01200d35  No          /tmp/libc.so.0.3
> 0x012c49a0  0x012d0ad4  No          /tmp/libmachuser.so.1
> 0x012e0bc0  0x012fee50  No          /tmp/libhurduser.so.0.3
>
> And the function symbols indeed seem to have been overloaded:
>
> (gdb) l __write
> 384     __write (int fd, const void *buf, size_t nbytes)
> 385     {
> 386       error_t err;
> 387       vm_size_t nwrote;
> 388
> 389       assert (fd < _hurd_init_dtablesize);
>
>
> That is why I'm thinking that apparently exposing the libc functions
> happens before setting up TLS, and thus potential for mayhem if libc
> assumes that TLS is set up. The loading itself is apparently done in the
> _dl_map_object_deps call of dl_main.

Well, if that's why you're thinking libc.so functions are already in
use by ld.so, this will be easy to disprove :)

GDB is super cool, but it's not *that* smart. When you "l __write", it
likely just looks through the "loaded" DSOs and finds the symbol,
looks up its debuginfo, then source, and prints that. It may even say
that there are several places where a symbol with the same name is
defined. Here's what I get (on a different executable):

Thread 4 hit Temporary breakpoint 1, 0x080483f0 in main ()
(gdb) l __write
file: "../sysdeps/mach/hurd/dl-sysdep.c", line number: 389, symbol: "__write"
384     ../sysdeps/mach/hurd/dl-sysdep.c: No such file or directory.
file: "../sysdeps/mach/hurd/write.c", line number: 25, symbol:
"__GI___libc_write"
20      ../sysdeps/mach/hurd/write.c: No such file or directory.

Point is, GDB can look up symbols in DSOs, but it doesn't understand
symbol resolution rules: RTLD_LOCAL vs GLOBAL, linking namespaces,
whether a DSO has already been relocated or not, all of those things.

What you should really check is not what GDB prints on "l __write",
but rather what the GOT/PLT slots inside ld.so contain; that is where
ld.so will jump when it calls the functions. Here's an annotated GDB
session (with upstream Debian's glibc):

$ gdb -q ./hello
Reading symbols from ./hello...
# Start running so gdb can resolve addresses & symbols inside ld.so:
(gdb) starti
Starting program: /home/bugaevc/hello
Thread 4 stopped.
0x0001d550 in _start () from /lib/ld.so
# Let's look at the GOT/PLT entry for __write (actually __write_nocancel):
(gdb) p &'__write_nocancel@got.plt'
$1 = (<text from jump slot in .got.plt, no debug info> *) 0x36034
<__write_nocancel@got.plt>
(gdb) p '__write_nocancel@got.plt'
$2 = (<text from jump slot in .got.plt, no debug info>) 0xde6
# The entry itself is at 0x36034 (this will come in useful later), but
as of _start, it contains garbage (which is to be expected). Now let's
advance to TLS setup, and check again:
(gdb) advance __i386_set_gdt
__i386_set_gdt (target_thread=96, selector=0x1037b94, desc=...) at
./build-tree/hurd-i386-libc/mach/RPC_i386_set_gdt.c:79
79      ./build-tree/hurd-i386-libc/mach/RPC_i386_set_gdt.c: No such
file or directory.
(gdb) p '__write_nocancel@got.plt'
$3 = (<text from jump slot in .got.plt, no debug info>) 0x1c1f0 <__write>
# See, now the PLT entry points to __write. But whose __write this is?
(gdb) info symbol 0x1c1f0
__write_nocancel in section .text of /lib/ld.so
# It's ld.so's! That's because it has initially relocated itself so
that its symbols point back to itself.
# And now let's advance to where the signal thread is set up:
(gdb) tb _hurdsig_init
Function "_hurdsig_init" not defined.
Make breakpoint pending on future shared library load? (y or [n]) y
Temporary breakpoint 1 (_hurdsig_init) pending.
(gdb) c
Continuing.
Thread 4 hit Temporary breakpoint 1, _hurdsig_init
(intarray=0x103c000, intarraysize=5) at ./hurd/hurdsig.c:1453
1453    ./hurd/hurdsig.c: No such file or directory.
# Sanity check: are we inside libc.so now?
(gdb) info symbol $eip
_hurdsig_init in section .text of /lib/i386-gnu/libc.so.0.3
# Yes we are, good. Let's look at the PLT entry:
(gdb) p '__write_nocancel@got.plt'
$4 = (<text from jump slot in .got.plt, no debug info>) 0x11b5820
<__GI___write_nocancel>
# Huh, it clearly points to a different __write_nocancel now! But
whose PLT entry are we looking at now, ld.so's or libc.so's? Who
knows, let's put in that address from above explicitly:
(gdb) p *(void**) 0x36034
$5 = (void *) 0x11b5820 <__GI___write_nocancel>
# And just to be very sure, this is libc.so's __write_nocancel, right?
(gdb) info symbol 0x11b5820
__write_nocancel in section .text of /lib/i386-gnu/libc.so.0.3
# Right :)

Hope I'm making my point clear: "l __write" is no basis to suspect
that ld.so has already been bound to libc.so function. Something else
must be going on.

I'll be back with more results, and thank you again.

Sergey
  
Samuel Thibault April 13, 2023, 10:10 a.m. UTC | #14
Sergey Bugaev via Libc-alpha, le jeu. 13 avril 2023 13:02:58 +0300, a ecrit:
> this is our very own thread port, the result of mach_thread_self ()
> which was called just several moments ago in hurd_self_sigstate ()!

IIRC this is cached inside glibc... in a TLS variable probably. And thus
if that cache is not properly set up because TLS/non-TLS is not clear at
some point, mayhem happens :)

> > (gdb) watch $gs
> > Watchpoint 2: $gs
> > (gdb) c
> > Continuing.
> 
> Cool, I didn't know you could watch a register like that -- although
> it appears to be super slow, so it must be not using hardware
> watchpoints.

Yes, it's all software. Gdb can do a lot in software, it's just very slow :)

> What you should really check is not what GDB prints on "l __write",
> but rather what the GOT/PLT slots inside ld.so contain;

Yes, sure, I just don't have time to dive into that.

Samuel
  
Sergey Bugaev April 13, 2023, 12:17 p.m. UTC | #15
Alright, here's some more analysis.

I was unable to fetch your core dump (403), but the test case and
libc/ld all 200'ed, and the crash / hang reproduces -- awesome.

and guess what? Firstly, the error we get from mach_port_mod_refs is
EMACH_RCV_INVALID_NAME 268451842 (ipc/rcv) invalid name
so my hunch that this one reply port was broken turned out correct. So
now looking at how we get it...

(gdb) disas __mig_get_reply_port
Dump of assembler code for function __GI___mig_get_reply_port:
   0x0001c310 <+0>:       call   0x1cb9e2 <__x86.get_pc_thunk.cx>
   0x0001c315 <+5>:       add    $0x28ecdf,%ecx
   0x0001c31b <+11>:      mov    %gs:0x0,%eax
   0x0001c321 <+17>:      mov    0x38(%eax),%edx
   0x0001c324 <+20>:      test   %edx,%edx
   0x0001c326 <+22>:      je     0x1c340 <__GI___mig_get_reply_port+48>
   0x0001c328 <+24>:      lea    0x16d8(%ecx),%ecx
   0x0001c32e <+30>:      add    $0x38,%eax
   0x0001c331 <+33>:      cmp    %ecx,%eax
   0x0001c333 <+35>:      je     0x1c339 <__GI___mig_get_reply_port+41>
   0x0001c335 <+37>:      cmp    (%ecx),%edx
   0x0001c337 <+39>:      je     0x1c340 <__GI___mig_get_reply_port+48>
   0x0001c339 <+41>:      mov    %edx,%eax
   0x0001c33b <+43>:      ret
   0x0001c33c <+44>:      lea    0x0(%esi,%eiz,1),%esi
   0x0001c340 <+48>:      sub    $0xc,%esp
   0x0001c343 <+51>:      call   0x1ba40 <__GI___mach_reply_port>
   0x0001c348 <+56>:      mov    %gs:0x0,%eax
   0x0001c34e <+62>:      mov    0x38(%eax),%eax
   0x0001c351 <+65>:      add    $0xc,%esp
   0x0001c354 <+68>:      ret

That is surely very different from the one I cited in the cover
letter! Look at what it's doing to the result of mach_reply_port (in
%eax) -- it straight-up overwrites it with the tcb pointer.

That is, of course, exactly the __seg_gs miscompilation I reported,
and exactly what "hurd: Remove __hurd_local_reply_port" was supposed
to work around (by not accessing it as THREAD_SELF->reply_port, but
rather using THREAD_SETMEM). I have now sent the second version of
that patch, please try applying it and test if that fixes it.

And the commit that has broken things here was
748511f0bb61785f976e18843d707a8ba8fffe29 ("hurd: i386 TLS tweaks"),
where I made THREAD_SELF (and friends) work through __seg_gs,
triggering the miscompilation. I'm surprised your testing hasn't
caught it earlier, but maybe the extra branch/indirection for the
no-tls case was masking the miscompilation. Please also check if the
other reply port tweak you reverted today is also innocent.

I have uploaded my own builds of libc.so and ld.so at [0] & [1] (but
these are with v1 of "hurd: Remove __hurd_local_reply_port", and with
all of this patchset applied).
[0] https://darnassus.sceen.net/~bugaevc/libc.so
[1] https://darnassus.sceen.net/~bugaevc/ld.so

Please test whether they work on your system.

Sergey
  
Samuel Thibault April 13, 2023, 9:47 p.m. UTC | #16
Hello,

Sergey Bugaev, le jeu. 13 avril 2023 15:17:51 +0300, a ecrit:
> I have now sent the second version of
> that patch, please try applying it and test if that fixes it.

I'll give it a try.

> Please also check if the other reply port tweak you reverted today is
> also innocent.

The same tests fail, but differently :)

€ ./testrun.sh  signal/tst-signal --direct
Set handler.
Sending myself signal 15.
Received signal 15 (Terminated).
Fatal glibc error: ../sysdeps/mach/hurd/mig-reply.c:92 (__mig_dealloc_reply_port): assertion failed: port == arg
Abandon (core dumped)

#0  0x01027a5c in __GI___mach_msg_trap ()
    at /usr/src/glibc-upstream/build/mach/mach_msg_trap.S:2
2       kernel_trap (__mach_msg_trap,-25,7)
[Current thread is 1 (process 20128)]
(gdb) bt
#0  0x01027a5c in __GI___mach_msg_trap ()
    at /usr/src/glibc-upstream/build/mach/mach_msg_trap.S:2
#1  0x010281f6 in __GI___mach_msg (msg=0x10017d0, option=3, send_size=48,
    rcv_size=32, rcv_name=31, timeout=0, notify=0) at msg.c:111
#2  0x012eb1e8 in __msg_sig_post (process=27, signal=6, sigcode=0, refport=35)
    at /usr/src/glibc-upstream/build/hurd/RPC_msg_sig_post.c:158
#3  0x01074dbc in kill_port (refport=<optimized out>, msgport=<optimized out>)
    at ../sysdeps/mach/hurd/kill.c:67
#4  kill_pid (pid=pid@entry=20128) at ../sysdeps/mach/hurd/kill.c:104
#5  0x0107509c in __GI___kill (pid=<optimized out>, sig=<optimized out>)
    at ../sysdeps/mach/hurd/kill.c:138
#6  0x01074242 in __GI_raise (signo=6) at ../sysdeps/htl/raise.c:52
#7  __GI_raise (signo=6) at ../sysdeps/htl/raise.c:34
#8  0x010277b1 in __GI_abort () at abort.c:79
#9  0x0102791c in __libc_message (fmt=<optimized out>)
    at ../sysdeps/posix/libc_fatal.c:150
#10 0x0106bc20 in __libc_assert_fail (assertion=0x1222762 "port == arg",
    file=0x121c2a0 "../sysdeps/mach/hurd/mig-reply.c", line=92,
    function=0x121c2c4 <__PRETTY_FUNCTION__.0> "__mig_dealloc_reply_port")
    at __libc_assert_fail.c:31
#11 0x010283d2 in __GI___mig_dealloc_reply_port (arg=<optimized out>)
    at ../sysdeps/mach/hurd/mig-reply.c:92
#12 0x012eb23e in __msg_sig_post (process=27, signal=15, sigcode=0, refport=35)
    at /usr/src/glibc-upstream/build/hurd/RPC_msg_sig_post.c:160
#13 0x01074dbc in kill_port (refport=<optimized out>, msgport=<optimized out>)
    at ../sysdeps/mach/hurd/kill.c:67
#14 kill_pid (pid=pid@entry=20128) at ../sysdeps/mach/hurd/kill.c:104
#15 0x0107509c in __GI___kill (pid=<optimized out>, sig=<optimized out>)
    at ../sysdeps/mach/hurd/kill.c:138
#16 0x01074242 in __GI_raise (signo=15) at ../sysdeps/htl/raise.c:52
#17 __GI_raise (signo=15) at ../sysdeps/htl/raise.c:34
#18 0x010075ac in ?? ()
#19 0x0105d88f in __libc_start_call_main (argv=0x0, argc=16807472, main=0x2)
    at ../sysdeps/generic/libc_start_call_main.h:23
#20 __libc_start_main_impl (main=0x2, argc=16807472, argv=0x0, init=0x1007657,
    fini=0x1007540, rtld_fini=0x2, stack_end=0x1001d54)
    at ../csu/libc-start.c:360

I have put these on
https://dept-info.labri.fr/~thibault/tmp/libc.so
https://dept-info.labri.fr/~thibault/tmp/ld.so
https://dept-info.labri.fr/~thibault/tmp/core


Samuel
  
Samuel Thibault April 13, 2023, 10:21 p.m. UTC | #17
Samuel Thibault, le jeu. 13 avril 2023 23:47:38 +0200, a ecrit:
> Sergey Bugaev, le jeu. 13 avril 2023 15:17:51 +0300, a ecrit:
> > I have now sent the second version of
> > that patch, please try applying it and test if that fixes it.
> 
> I'll give it a try.

The first tests seem to be passing, I'll wait for the whole testsuite to
pass before pushing again.

Samuel
  
Sergey Bugaev April 14, 2023, 8:29 a.m. UTC | #18
On Fri, Apr 14, 2023 at 12:47 AM Samuel Thibault
<samuel.thibault@gnu.org> wrote:
> #10 0x0106bc20 in __libc_assert_fail (assertion=0x1222762 "port == arg",
>     file=0x121c2a0 "../sysdeps/mach/hurd/mig-reply.c", line=92,
>     function=0x121c2c4 <__PRETTY_FUNCTION__.0> "__mig_dealloc_reply_port")
>     at __libc_assert_fail.c:31
> #11 0x010283d2 in __GI___mig_dealloc_reply_port (arg=<optimized out>)
>     at ../sysdeps/mach/hurd/mig-reply.c:92
> #12 0x012eb23e in __msg_sig_post (process=27, signal=15, sigcode=0, refport=35)
>     at /usr/src/glibc-upstream/build/hurd/RPC_msg_sig_post.c:160
> #13 0x01074dbc in kill_port (refport=<optimized out>, msgport=<optimized out>)
>     at ../sysdeps/mach/hurd/kill.c:67

Interesting...

So first of all we're wrong, and the signal code does not destroy the
reply port. In fact it takes care to restore it:

  /* Destroy the MiG reply port used by the signal handler, and restore the
     reply port in use by the thread when interrupted.  */
  reply_port = THREAD_GETMEM (THREAD_SELF, reply_port);
  THREAD_SETMEM (THREAD_SELF, reply_port, scp->sc_reply_port);
  if (MACH_PORT_VALID (reply_port))
    __mach_port_destroy (__mach_task_self (), reply_port);

(Yes, that's the very same code we've altered recently, but it also
restored the reply port before my changes.)

See, it destroys the current reply port, and restores
scp->sc_reply_port. So then when we jump back to the user's code
(which should be in intr-msg), it gets MACH_RCV_TIMED_OUT (if that's
what happened), and proceeds to destroy the port itself.

One notable thing here: the sigreturn code (above) uses sc_reply_port
to destroy the current reply port. So:
* if the rcv timed out, we're going to destroy the reply port, but we
  haven't yet, so the server could fake a reply to mach_port_destroy
* if the mach_port_destroy RPC itself fails, we'll dealloc the user's port
  right here, and then it'll mismatch what the user expects it to be

The latter is in fact what's tripping the assertion in your test case.
mach_port_destroy (well, your test case contains my
mach_port_mod_refs) expects to receive a matching reply, but instead
we get a msg_sig_post_reply. We'd have caught this earlier if we
didn't neglect the result of mach_port_destroy.

Q: But why do we get msg_sig_post_reply here, why not in
   rpc_wait_trampoline, *before* running the signal handler?
A: Because msg_sig_post is uninterruptible. (and this is stupid! and so
   frustrating! we should just make it interruptible)

So: we should not use the user's reply port for this RPC. This is my
fault (747812349d42427c835aeac987aa67641d84f1ad). In my defense, there
was no comment explaining why this would be a bad idea; we should
write one. I'll prepare a patch.

But secondly, if we are not destroying the user's reply port, but
restoring it, then I don't think the "port = MACH_PORT_NULL, arg =
stale name" case can happen? So we don't need to handle it?

We should really just assert (arg == port), and this will help us find
more broken code.

Thoughts?

Sergey
  
Samuel Thibault April 14, 2023, 8:36 a.m. UTC | #19
Sergey Bugaev, le ven. 14 avril 2023 11:29:37 +0300, a ecrit:
> But secondly, if we are not destroying the user's reply port, but
> restoring it, then I don't think the "port = MACH_PORT_NULL, arg =
> stale name" case can happen? So we don't need to handle it?
> 
> We should really just assert (arg == port), and this will help us find
> more broken code.

I'd rather aim for that, yes.

Samuel
  
Sergey Bugaev April 14, 2023, 8:53 a.m. UTC | #20
On Fri, Apr 14, 2023 at 11:39 AM Samuel Thibault
<samuel.thibault@gnu.org> wrote:
> Sergey Bugaev, le ven. 14 avril 2023 11:29:37 +0300, a ecrit:
> > But secondly, if we are not destroying the user's reply port, but
> > restoring it, then I don't think the "port = MACH_PORT_NULL, arg =
> > stale name" case can happen? So we don't need to handle it?

And just as I sent this, I discovered that there is in fact a place
where we do destroy the port! Ugh!

It's in _hurdsig_abort_rpcs, if the interrupt_operation call fails.

Let's maybe not do that either? We're already making mach_msg seem to
have returned EINTR, which intr-msg knows how to handle.

Currently it's doing this:

    case EINTR:
      /* Either the process was stopped and continued,
         or the server doesn't support interrupt_operation.  */
      if (ss->intr_port != MACH_PORT_NULL)
        /* If this signal was for us and it should interrupt calls, the
           signal thread will have cleared SS->intr_port.
           Since it's not cleared, the signal was for another thread,
           or SA_RESTART is set.  Restart the interrupted call.  */
        {
          /* Make sure we have a valid reply port.  The one we were using
             may have been destroyed by interruption.  */
          m->header.msgh_local_port = rcv_name = __mig_get_reply_port ();
          m->header.msgh_bits = msgh_bits;
          option = user_option;
          timeout = user_timeout;
          goto message;
        }
      err = EINTR;

      /* The EINTR return indicates cancellation, so clear the flag.  */
      ss->cancel = 0;
      break;

but I'm thinking it instead should be doing this:

    case EINTR:
      /* Either the process was stopped and continued,
         or the server doesn't support interrupt_operation.  */
      if (ss->intr_port != MACH_PORT_NULL)
        /* If this signal was for us and it should interrupt calls, the
           signal thread will have cleared SS->intr_port.
           Since it's not cleared, the signal was for another thread,
           or SA_RESTART is set.  Restart the interrupted call.  */
        {
          /* Our RPC was interrupted, and the server may have kept the reply
             right.  Get a fresh reply port from MIG.  */
          __mig_dealloc_reply_port (rcv_name);
          m->header.msgh_local_port = rcv_name = __mig_get_reply_port ();

          m->header.msgh_bits = msgh_bits;
          option = user_option;
          timeout = user_timeout;
          goto message;
        }
      err = EINTR;

      /* The EINTR return indicates cancellation, so clear the flag.  */
      ss->cancel = 0;
      break;

...coupled with _hurdsig_abort_rpcs not deallocating our port. wdyt?

Sergey
  
Samuel Thibault April 14, 2023, 9:09 a.m. UTC | #21
Sergey Bugaev via Libc-alpha, le ven. 14 avril 2023 11:53:43 +0300, a ecrit:
> It's in _hurdsig_abort_rpcs, if the interrupt_operation call fails.
> 
> Let's maybe not do that either? We're already making mach_msg seem to
> have returned EINTR, which intr-msg knows how to handle.

Mmm, yes, but we need to make *sure* we don't re-use the port and
that it gets dropped at some point not too far, otherwise the
server will stay stuck trying to reply. Managing to make the port
a dead name seems a good way to me. Something like adding a send
right to the port, then dropping the receive right. And making the
__mig_dealloc_reply_port detect that case (if dropping the receive right
returns KERN_INVALID_VALUE, it means it has indeed become a dead
name for a send right, which one can drop instead).

Again, in such code I don't think I can see the whole picture so it's
testsuites that will find out the various cases.

Samuel
  
Sergey Bugaev April 14, 2023, 9:23 a.m. UTC | #22
On Fri, Apr 14, 2023 at 12:12 PM Samuel Thibault
<samuel.thibault@gnu.org> wrote:
> Sergey Bugaev via Libc-alpha, le ven. 14 avril 2023 11:53:43 +0300, a ecrit:
> > It's in _hurdsig_abort_rpcs, if the interrupt_operation call fails.
> >
> > Let's maybe not do that either? We're already making mach_msg seem to
> > have returned EINTR, which intr-msg knows how to handle.
>
> Mmm, yes, but we need to make *sure* we don't re-use the port and
> that it gets dropped at some point not too far, otherwise the
> server will stay stuck trying to reply.

We should drop the reply port before waiting for the signal handler to
run, since the handler may block -- is that what you're saying? If it
is, good point!

> Managing to make the port
> a dead name seems a good way to me. Something like adding a send
> right to the port, then dropping the receive right. And making the
> __mig_dealloc_reply_port detect that case (if dropping the receive right
> returns KERN_INVALID_VALUE, it means it has indeed become a dead
> name for a send right, which one can drop instead).

Right, sounds good.

> Again, in such code I don't think I can see the whole picture so it's
> testsuites that will find out the various cases.
>
> Samuel

Sergey
  
Samuel Thibault April 14, 2023, 9:31 a.m. UTC | #23
Sergey Bugaev via Libc-alpha, le ven. 14 avril 2023 12:23:00 +0300, a ecrit:
> On Fri, Apr 14, 2023 at 12:12 PM Samuel Thibault
> <samuel.thibault@gnu.org> wrote:
> > Sergey Bugaev via Libc-alpha, le ven. 14 avril 2023 11:53:43 +0300, a ecrit:
> > > It's in _hurdsig_abort_rpcs, if the interrupt_operation call fails.
> > >
> > > Let's maybe not do that either? We're already making mach_msg seem to
> > > have returned EINTR, which intr-msg knows how to handle.
> >
> > Mmm, yes, but we need to make *sure* we don't re-use the port and
> > that it gets dropped at some point not too far, otherwise the
> > server will stay stuck trying to reply.
> 
> We should drop the reply port before waiting for the signal handler to
> run, since the handler may block -- is that what you're saying?

That's a kind of scenario indeed. I don't know if it's actually
exercised in the wild, but I wouldn't be surprised.

Samuel
  
Samuel Thibault April 14, 2023, 5:34 p.m. UTC | #24
Reapplied on top of "Remove __hurd_local_reply_port", thanks!

Sergey Bugaev, le dim. 19 mars 2023 18:10:07 +0300, a ecrit:
> When glibc is built as a shared library, TLS is always initialized by
> the call of TLS_INIT_TP () macro made inside the dynamic loader, prior
> to running the main program (see dl-call_tls_init_tp.h). We can take
> advantage of this: we know for sure that __LIBC_NO_TLS () will evaluate
> to 0 in all other cases, so let the compiler know that explicitly too.
> 
> Also, only define _hurd_tls_init () and TLS_INIT_TP () under the same
> conditions (either !SHARED or inside rtld), to statically assert that
> this is the case.
> 
> Other than a microoptimization, this also helps with avoiding awkward
> sharing of the __libc_tls_initialized variable between ld.so and libc.so
> that we would have to do otherwise -- we know for sure that no sharing
> is required, simply because __libc_tls_initialized would always be set
> to true inside libc.so.
> 
> Signed-off-by: Sergey Bugaev <bugaevc@gmail.com>
> ---
>  sysdeps/mach/hurd/Makefile                    |  4 ++
>  sysdeps/mach/hurd/i386/dl-tls-initialized.c   | 21 +++++++++
>  sysdeps/mach/hurd/i386/tls.h                  | 43 +++++++++++--------
>  sysdeps/mach/hurd/x86/init-first.c            | 11 +----
>  sysdeps/mach/hurd/x86_64/dl-tls-initialized.c | 21 +++++++++
>  sysdeps/mach/hurd/x86_64/tls.h                | 19 +++++---
>  6 files changed, 85 insertions(+), 34 deletions(-)
>  create mode 100644 sysdeps/mach/hurd/i386/dl-tls-initialized.c
>  create mode 100644 sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> 
> diff --git a/sysdeps/mach/hurd/Makefile b/sysdeps/mach/hurd/Makefile
> index d5584930..f43e92ba 100644
> --- a/sysdeps/mach/hurd/Makefile
> +++ b/sysdeps/mach/hurd/Makefile
> @@ -197,6 +197,10 @@ ifeq (hurd, $(subdir))
>  sysdep_routines += cthreads
>  endif
>  
> +ifeq (elf, $(subdir))
> +sysdep-dl-routines += dl-tls-initialized
> +endif
> +
>  ifeq (io, $(subdir))
>  sysdep_routines += f_setlk close_nocancel close_nocancel_nostatus \
>  		   fcntl_nocancel open_nocancel openat_nocancel read_nocancel \
> diff --git a/sysdeps/mach/hurd/i386/dl-tls-initialized.c b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
> new file mode 100644
> index 00000000..493ec239
> --- /dev/null
> +++ b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
> @@ -0,0 +1,21 @@
> +/* Determine whether TLS is initialized, for i386/Hurd.
> +   Copyright (C) 1995-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#ifndef SHARED
> +unsigned short __init1_desc;
> +#endif
> diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
> index 0f8dd241..ee7b8004 100644
> --- a/sysdeps/mach/hurd/i386/tls.h
> +++ b/sysdeps/mach/hurd/i386/tls.h
> @@ -69,18 +69,6 @@ _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x30,
>                    |  (desc->high_word & 0xff000000));			      \
>    })
>  
> -/* Return 1 if TLS is not initialized yet.  */
> -#ifndef SHARED
> -extern unsigned short __init1_desc;
> -#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
> -#else
> -#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
> -#endif
> -
> -#define __LIBC_NO_TLS()							      \
> -  ({ unsigned short ds, gs;						      \
> -     asm ("movw %%ds,%w0; movw %%gs,%w1" : "=q" (ds), "=q" (gs));	      \
> -     __builtin_expect(__HURD_DESC_INITIAL(gs, ds), 0); })
>  #endif
>  
>  /* The TCB can have any size and the memory following the address the
> @@ -125,6 +113,28 @@ extern unsigned short __init1_desc;
>  
>  # define HURD_SEL_LDT(sel) (__builtin_expect ((sel) & 4, 0))
>  
> +#ifndef SHARED
> +extern unsigned short __init1_desc;
> +# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
> +#else
> +# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
> +#endif
> +
> +#if !defined (SHARED) || IS_IN (rtld)
> +/* Return 1 if TLS is not initialized yet.  */
> +extern inline bool __attribute__ ((unused))
> +__LIBC_NO_TLS (void)
> +{
> +  unsigned short ds, gs;
> +  asm ("movw %%ds, %w0\n"
> +       "movw %%gs, %w1"
> +       : "=q" (ds), "=q" (gs));
> +  return __glibc_unlikely (__HURD_DESC_INITIAL (gs, ds));
> +}
> +
> +/* Code to initially initialize the thread pointer.  This might need
> +   special attention since 'errno' is not yet available and if the
> +   operation can cause a failure 'errno' must not be touched.  */
>  static inline bool __attribute__ ((unused))
>  _hurd_tls_init (tcbhead_t *tcb)
>  {
> @@ -168,11 +178,10 @@ out:
>    return success;
>  }
>  
> -/* Code to initially initialize the thread pointer.  This might need
> -   special attention since 'errno' is not yet available and if the
> -   operation can cause a failure 'errno' must not be touched.  */
> -# define TLS_INIT_TP(descr) \
> -    _hurd_tls_init ((tcbhead_t *) (descr))
> +# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> +#else /* defined (SHARED) && !IS_IN (rtld) */
> +# define __LIBC_NO_TLS() 0
> +#endif
>  
>  # if __GNUC_PREREQ (6, 0)
>  
> diff --git a/sysdeps/mach/hurd/x86/init-first.c b/sysdeps/mach/hurd/x86/init-first.c
> index 48c330ec..89a5f44c 100644
> --- a/sysdeps/mach/hurd/x86/init-first.c
> +++ b/sysdeps/mach/hurd/x86/init-first.c
> @@ -40,13 +40,6 @@ extern char **_dl_argv;
>  
>  #ifndef SHARED
>  static tcbhead_t __init1_tcbhead;
> -# ifndef __x86_64__
> -unsigned short __init1_desc;
> -# endif
> -#endif
> -
> -#ifdef __x86_64__
> -unsigned char __libc_tls_initialized;
>  #endif
>  
>  /* Things that want to be run before _hurd_init or much anything else.
> @@ -166,9 +159,7 @@ first_init (void)
>    _hurd_tls_init (&__init1_tcbhead);
>  
>    /* Make sure __LIBC_NO_TLS () keeps evaluating to 1.  */
> -# ifdef __x86_64__
> -  __libc_tls_initialized = 0;
> -# else
> +# ifndef __x86_64__
>    asm ("movw %%gs,%w0" : "=m" (__init1_desc));
>  # endif
>  #endif
> diff --git a/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> new file mode 100644
> index 00000000..d0766f95
> --- /dev/null
> +++ b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
> @@ -0,0 +1,21 @@
> +/* Determine whether TLS is initialized, for x86_64/Hurd.
> +   Copyright (C) 1995-2023 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +#if !defined (SHARED) || IS_IN (rtld)
> +unsigned short __libc_tls_initialized;
> +#endif
> diff --git a/sysdeps/mach/hurd/x86_64/tls.h b/sysdeps/mach/hurd/x86_64/tls.h
> index cf74e1f4..da504d9c 100644
> --- a/sysdeps/mach/hurd/x86_64/tls.h
> +++ b/sysdeps/mach/hurd/x86_64/tls.h
> @@ -68,10 +68,6 @@ _Static_assert (offsetof (tcbhead_t, stack_guard) == 0x28,
>  _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x70,
>                  "split stack pointer offset");
>  
> -extern unsigned char __libc_tls_initialized;
> -
> -# define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
> -
>  /* The TCB can have any size and the memory following the address the
>     thread pointer points to is unspecified.  Allocate the TCB there.  */
>  # define TLS_TCB_AT_TP	1
> @@ -87,8 +83,6 @@ extern unsigned char __libc_tls_initialized;
>  # define TCB_ALIGNMENT	64
>  
>  
> -# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> -
>  # define THREAD_SELF							\
>    (*(tcbhead_t * __seg_fs *) offsetof (tcbhead_t, tcb))
>  /* Read member of the thread descriptor directly.  */
> @@ -174,6 +168,10 @@ _hurd_tls_new (thread_t child, tcbhead_t *tcb)
>                               i386_FSGS_BASE_STATE_COUNT);
>  }
>  
> +# if !defined (SHARED) || IS_IN (rtld)
> +extern unsigned char __libc_tls_initialized;
> +#  define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
> +
>  static inline bool __attribute__ ((unused))
>  _hurd_tls_init (tcbhead_t *tcb)
>  {
> @@ -184,11 +182,18 @@ _hurd_tls_init (tcbhead_t *tcb)
>    tcb->multiple_threads = 1;
>  
>    err = _hurd_tls_new (self, tcb);
> +  if (err == 0)
> +    __libc_tls_initialized = 1;
>    __mach_port_deallocate (__mach_task_self (), self);
> -  __libc_tls_initialized = 1;
>    return err == 0;
>  }
>  
> +#  define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
> +# else /* defined (SHARED) && !IS_IN (rtld) */
> +#  define __LIBC_NO_TLS() 0
> +# endif
> +
> +
>  
>  /* Global scope switch support.  */
>  # define THREAD_GSCOPE_FLAG_UNUSED 0
> -- 
> 2.39.2
>
  
Sergey Bugaev April 14, 2023, 7:52 p.m. UTC | #25
On Fri, Apr 14, 2023 at 8:34 PM Samuel Thibault <samuel.thibault@gnu.org> wrote:
> Reapplied on top of "Remove __hurd_local_reply_port", thanks!

Awesome, thank you!

I have implemented the changes we talked about (replacing the receive
right with a dead name...), and they even seem to pass the few tests
from the testsuite I can run manually, but now I'm thinking maybe we
better avoid touching that code, or we'll break something else. So I
sent the other changes (including restoring the usage of
MACH_PORT_DEAD inside sigreturn), but not that patch.

Please test.

Sergey
  
Samuel Thibault April 17, 2023, 7:16 a.m. UTC | #26
Hello,

Sergey Bugaev via Libc-alpha, le ven. 14 avril 2023 11:53:43 +0300, a ecrit:
> -         /* Make sure we have a valid reply port.  The one we were using
> -            may have been destroyed by interruption.  */
> -         m->header.msgh_local_port = rcv_name = __mig_get_reply_port ();

> +         /* Our RPC was interrupted, and the server may have kept the reply
> +            right.  Get a fresh reply port from MIG.  */
> +         __mig_dealloc_reply_port (rcv_name);
> +         m->header.msgh_local_port = rcv_name = __mig_get_reply_port ();

That would be in line with cleaning the __mig_*_reply_port interface,
indeed.

Samuel
  

Patch

diff --git a/sysdeps/mach/hurd/Makefile b/sysdeps/mach/hurd/Makefile
index d5584930..f43e92ba 100644
--- a/sysdeps/mach/hurd/Makefile
+++ b/sysdeps/mach/hurd/Makefile
@@ -197,6 +197,10 @@  ifeq (hurd, $(subdir))
 sysdep_routines += cthreads
 endif
 
+ifeq (elf, $(subdir))
+sysdep-dl-routines += dl-tls-initialized
+endif
+
 ifeq (io, $(subdir))
 sysdep_routines += f_setlk close_nocancel close_nocancel_nostatus \
 		   fcntl_nocancel open_nocancel openat_nocancel read_nocancel \
diff --git a/sysdeps/mach/hurd/i386/dl-tls-initialized.c b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
new file mode 100644
index 00000000..493ec239
--- /dev/null
+++ b/sysdeps/mach/hurd/i386/dl-tls-initialized.c
@@ -0,0 +1,21 @@ 
+/* Determine whether TLS is initialized, for i386/Hurd.
+   Copyright (C) 1995-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef SHARED
+unsigned short __init1_desc;
+#endif
diff --git a/sysdeps/mach/hurd/i386/tls.h b/sysdeps/mach/hurd/i386/tls.h
index 0f8dd241..ee7b8004 100644
--- a/sysdeps/mach/hurd/i386/tls.h
+++ b/sysdeps/mach/hurd/i386/tls.h
@@ -69,18 +69,6 @@  _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x30,
                   |  (desc->high_word & 0xff000000));			      \
   })
 
-/* Return 1 if TLS is not initialized yet.  */
-#ifndef SHARED
-extern unsigned short __init1_desc;
-#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
-#else
-#define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
-#endif
-
-#define __LIBC_NO_TLS()							      \
-  ({ unsigned short ds, gs;						      \
-     asm ("movw %%ds,%w0; movw %%gs,%w1" : "=q" (ds), "=q" (gs));	      \
-     __builtin_expect(__HURD_DESC_INITIAL(gs, ds), 0); })
 #endif
 
 /* The TCB can have any size and the memory following the address the
@@ -125,6 +113,28 @@  extern unsigned short __init1_desc;
 
 # define HURD_SEL_LDT(sel) (__builtin_expect ((sel) & 4, 0))
 
+#ifndef SHARED
+extern unsigned short __init1_desc;
+# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds) || (gs) == __init1_desc)
+#else
+# define __HURD_DESC_INITIAL(gs, ds) ((gs) == (ds))
+#endif
+
+#if !defined (SHARED) || IS_IN (rtld)
+/* Return 1 if TLS is not initialized yet.  */
+extern inline bool __attribute__ ((unused))
+__LIBC_NO_TLS (void)
+{
+  unsigned short ds, gs;
+  asm ("movw %%ds, %w0\n"
+       "movw %%gs, %w1"
+       : "=q" (ds), "=q" (gs));
+  return __glibc_unlikely (__HURD_DESC_INITIAL (gs, ds));
+}
+
+/* Code to initially initialize the thread pointer.  This might need
+   special attention since 'errno' is not yet available and if the
+   operation can cause a failure 'errno' must not be touched.  */
 static inline bool __attribute__ ((unused))
 _hurd_tls_init (tcbhead_t *tcb)
 {
@@ -168,11 +178,10 @@  out:
   return success;
 }
 
-/* Code to initially initialize the thread pointer.  This might need
-   special attention since 'errno' is not yet available and if the
-   operation can cause a failure 'errno' must not be touched.  */
-# define TLS_INIT_TP(descr) \
-    _hurd_tls_init ((tcbhead_t *) (descr))
+# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
+#else /* defined (SHARED) && !IS_IN (rtld) */
+# define __LIBC_NO_TLS() 0
+#endif
 
 # if __GNUC_PREREQ (6, 0)
 
diff --git a/sysdeps/mach/hurd/x86/init-first.c b/sysdeps/mach/hurd/x86/init-first.c
index 48c330ec..89a5f44c 100644
--- a/sysdeps/mach/hurd/x86/init-first.c
+++ b/sysdeps/mach/hurd/x86/init-first.c
@@ -40,13 +40,6 @@  extern char **_dl_argv;
 
 #ifndef SHARED
 static tcbhead_t __init1_tcbhead;
-# ifndef __x86_64__
-unsigned short __init1_desc;
-# endif
-#endif
-
-#ifdef __x86_64__
-unsigned char __libc_tls_initialized;
 #endif
 
 /* Things that want to be run before _hurd_init or much anything else.
@@ -166,9 +159,7 @@  first_init (void)
   _hurd_tls_init (&__init1_tcbhead);
 
   /* Make sure __LIBC_NO_TLS () keeps evaluating to 1.  */
-# ifdef __x86_64__
-  __libc_tls_initialized = 0;
-# else
+# ifndef __x86_64__
   asm ("movw %%gs,%w0" : "=m" (__init1_desc));
 # endif
 #endif
diff --git a/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
new file mode 100644
index 00000000..d0766f95
--- /dev/null
+++ b/sysdeps/mach/hurd/x86_64/dl-tls-initialized.c
@@ -0,0 +1,21 @@ 
+/* Determine whether TLS is initialized, for x86_64/Hurd.
+   Copyright (C) 1995-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#if !defined (SHARED) || IS_IN (rtld)
+unsigned short __libc_tls_initialized;
+#endif
diff --git a/sysdeps/mach/hurd/x86_64/tls.h b/sysdeps/mach/hurd/x86_64/tls.h
index cf74e1f4..da504d9c 100644
--- a/sysdeps/mach/hurd/x86_64/tls.h
+++ b/sysdeps/mach/hurd/x86_64/tls.h
@@ -68,10 +68,6 @@  _Static_assert (offsetof (tcbhead_t, stack_guard) == 0x28,
 _Static_assert (offsetof (tcbhead_t, __private_ss) == 0x70,
                 "split stack pointer offset");
 
-extern unsigned char __libc_tls_initialized;
-
-# define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
-
 /* The TCB can have any size and the memory following the address the
    thread pointer points to is unspecified.  Allocate the TCB there.  */
 # define TLS_TCB_AT_TP	1
@@ -87,8 +83,6 @@  extern unsigned char __libc_tls_initialized;
 # define TCB_ALIGNMENT	64
 
 
-# define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
-
 # define THREAD_SELF							\
   (*(tcbhead_t * __seg_fs *) offsetof (tcbhead_t, tcb))
 /* Read member of the thread descriptor directly.  */
@@ -174,6 +168,10 @@  _hurd_tls_new (thread_t child, tcbhead_t *tcb)
                              i386_FSGS_BASE_STATE_COUNT);
 }
 
+# if !defined (SHARED) || IS_IN (rtld)
+extern unsigned char __libc_tls_initialized;
+#  define __LIBC_NO_TLS() __builtin_expect (!__libc_tls_initialized, 0)
+
 static inline bool __attribute__ ((unused))
 _hurd_tls_init (tcbhead_t *tcb)
 {
@@ -184,11 +182,18 @@  _hurd_tls_init (tcbhead_t *tcb)
   tcb->multiple_threads = 1;
 
   err = _hurd_tls_new (self, tcb);
+  if (err == 0)
+    __libc_tls_initialized = 1;
   __mach_port_deallocate (__mach_task_self (), self);
-  __libc_tls_initialized = 1;
   return err == 0;
 }
 
+#  define TLS_INIT_TP(descr) _hurd_tls_init ((tcbhead_t *) (descr))
+# else /* defined (SHARED) && !IS_IN (rtld) */
+#  define __LIBC_NO_TLS() 0
+# endif
+
+
 
 /* Global scope switch support.  */
 # define THREAD_GSCOPE_FLAG_UNUSED 0