[01/10] nptl: Perform signal initialization upon pthread_create

Message ID b1124dd9617db8dbc14ded760af5b361222bbaa8.1621347402.git.fweimer@redhat.com
State Superseded
Headers
Series nptl: Complete libpthread removal |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent

Commit Message

Florian Weimer May 18, 2021, 2:24 p.m. UTC
  Install signal handlers and unblock signals before pthread_create
creates the first thread.
---
 nptl/Versions         |  5 ++-
 nptl/nptl-init.c      | 75 ------------------------------------
 nptl/pthreadP.h       |  6 +++
 nptl/pthread_cancel.c | 88 ++++++++++++++++++++++++++++++++++++++-----
 nptl/pthread_create.c | 45 +++++++++++++++++++++-
 5 files changed, 131 insertions(+), 88 deletions(-)
  

Comments

Adhemerval Zanella May 20, 2021, 7:15 p.m. UTC | #1
On 18/05/2021 11:24, Florian Weimer via Libc-alpha wrote:
> Install signal handlers and unblock signals before pthread_create
> creates the first thread.
> ---
>  nptl/Versions         |  5 ++-
>  nptl/nptl-init.c      | 75 ------------------------------------
>  nptl/pthreadP.h       |  6 +++
>  nptl/pthread_cancel.c | 88 ++++++++++++++++++++++++++++++++++++++-----
>  nptl/pthread_create.c | 45 +++++++++++++++++++++-
>  5 files changed, 131 insertions(+), 88 deletions(-)
> 
> diff --git a/nptl/Versions b/nptl/Versions
> index e7883cbb49..d96b830d05 100644
> --- a/nptl/Versions
> +++ b/nptl/Versions
> @@ -367,8 +367,6 @@ libc {
>      tss_set;
>    }
>    GLIBC_PRIVATE {
> -     __nptl_create_event;
> -     __nptl_death_event;
>      __default_pthread_attr;
>      __default_pthread_attr_lock;
>      __futex_abstimed_wait64;
> @@ -386,11 +384,14 @@ libc {
>      __lll_trylock_elision;
>      __lll_unlock_elision;
>      __mutex_aconf;
> +    __nptl_create_event;
>      __nptl_deallocate_stack;
>      __nptl_deallocate_tsd;
> +    __nptl_death_event;
>      __nptl_free_tcb;
>      __nptl_nthreads;
>      __nptl_setxid_sighandler;
> +    __nptl_sigcancel_handler;
>      __nptl_stack_list_add;
>      __nptl_stack_list_del;
>      __pthread_attr_copy;
> diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
> index f4b86fbfaf..bc4831ac89 100644
> --- a/nptl/nptl-init.c
> +++ b/nptl/nptl-init.c
> @@ -44,84 +44,9 @@ size_t __static_tls_align_m1;
>  /* Version of the library, used in libthread_db to detect mismatches.  */
>  static const char nptl_version[] __attribute_used__ = VERSION;
>  
> -/* For asynchronous cancellation we use a signal.  This is the handler.  */
> -static void
> -sigcancel_handler (int sig, siginfo_t *si, void *ctx)
> -{
> -  /* Safety check.  It would be possible to call this function for
> -     other signals and send a signal from another process.  This is not
> -     correct and might even be a security problem.  Try to catch as
> -     many incorrect invocations as possible.  */
> -  if (sig != SIGCANCEL
> -      || si->si_pid != __getpid()
> -      || si->si_code != SI_TKILL)
> -    return;
> -
> -  struct pthread *self = THREAD_SELF;
> -
> -  int oldval = THREAD_GETMEM (self, cancelhandling);
> -  while (1)
> -    {
> -      /* We are canceled now.  When canceled by another thread this flag
> -	 is already set but if the signal is directly send (internally or
> -	 from another process) is has to be done here.  */
> -      int newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK;
> -
> -      if (oldval == newval || (oldval & EXITING_BITMASK) != 0)
> -	/* Already canceled or exiting.  */
> -	break;
> -
> -      int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling, newval,
> -					      oldval);
> -      if (curval == oldval)
> -	{
> -	  /* Set the return value.  */
> -	  THREAD_SETMEM (self, result, PTHREAD_CANCELED);
> -
> -	  /* Make sure asynchronous cancellation is still enabled.  */
> -	  if ((newval & CANCELTYPE_BITMASK) != 0)
> -	    /* Run the registered destructors and terminate the thread.  */
> -	    __do_cancel ();
> -
> -	  break;
> -	}
> -
> -      oldval = curval;
> -    }
> -}
> -
> -
> -/* When using __thread for this, we do it in libc so as not
> -   to give libpthread its own TLS segment just for this.  */
> -extern void **__libc_dl_error_tsd (void) __attribute__ ((const));
> -
> -
>  void
>  __pthread_initialize_minimal_internal (void)
>  {
> -  struct sigaction sa;
> -  __sigemptyset (&sa.sa_mask);
> -
> -  /* Install the cancellation signal handler.  If for some reason we
> -     cannot install the handler we do not abort.  Maybe we should, but
> -     it is only asynchronous cancellation which is affected.  */
> -  sa.sa_sigaction = sigcancel_handler;
> -  sa.sa_flags = SA_SIGINFO;
> -  (void) __libc_sigaction (SIGCANCEL, &sa, NULL);
> -
> -  /* Install the handle to change the threads' uid/gid.  */
> -  sa.sa_sigaction = __nptl_setxid_sighandler;
> -  sa.sa_flags = SA_SIGINFO | SA_RESTART;
> -  (void) __libc_sigaction (SIGSETXID, &sa, NULL);
> -
> -  /* The parent process might have left the signals blocked.  Just in
> -     case, unblock it.  We reuse the signal mask in the sigaction
> -     structure.  It is already cleared.  */
> -  __sigaddset (&sa.sa_mask, SIGCANCEL);
> -  __sigaddset (&sa.sa_mask, SIGSETXID);
> -  INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &sa.sa_mask,
> -			 NULL, __NSIG_BYTES);
> -
>    /* Get the size of the static and alignment requirements for the TLS
>       block.  */
>    size_t static_tls_align;
> diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
> index f93806e540..497c2ad3d9 100644
> --- a/nptl/pthreadP.h
> +++ b/nptl/pthreadP.h
> @@ -571,6 +571,12 @@ libc_hidden_proto (__pthread_attr_setsigmask_internal)
>  extern __typeof (pthread_attr_getsigmask_np) __pthread_attr_getsigmask_np;
>  libc_hidden_proto (__pthread_attr_getsigmask_np)
>  
> +/* The cancellation signal handler defined alongside with
> +   pthread_cancel.  This is included in statically linked binaries
> +   only if pthread_cancel is linked in.  */
> +void __nptl_sigcancel_handler (int sig, siginfo_t *si, void *ctx);
> +libc_hidden_proto (__nptl_sigcancel_handler)
> +
>  /* Special versions which use non-exported functions.  */
>  extern void __pthread_cleanup_push (struct _pthread_cleanup_buffer *buffer,
>  				    void (*routine) (void *), void *arg);
> diff --git a/nptl/pthread_cancel.c b/nptl/pthread_cancel.c
> index e4ad602900..802c691874 100644
> --- a/nptl/pthread_cancel.c
> +++ b/nptl/pthread_cancel.c
> @@ -26,6 +26,63 @@
>  #include <unwind-link.h>
>  #include <stdio.h>
>  #include <gnu/lib-names.h>
> +#include <sys/single_threaded.h>
> +
> +/* For asynchronous cancellation we use a signal.  This is the core
> +   logic of the signal handler.  */
> +static void
> +sigcancel_handler (void)
> +{
> +  struct pthread *self = THREAD_SELF;
> +
> +  int oldval = THREAD_GETMEM (self, cancelhandling);
> +  while (1)
> +    {
> +      /* We are canceled now.  When canceled by another thread this flag
> +	 is already set but if the signal is directly send (internally or
> +	 from another process) is has to be done here.  */
> +      int newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK;
> +
> +      if (oldval == newval || (oldval & EXITING_BITMASK) != 0)
> +	/* Already canceled or exiting.  */
> +	break;
> +
> +      int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling, newval,
> +					      oldval);
> +      if (curval == oldval)
> +	{
> +	  /* Set the return value.  */
> +	  THREAD_SETMEM (self, result, PTHREAD_CANCELED);
> +
> +	  /* Make sure asynchronous cancellation is still enabled.  */
> +	  if ((newval & CANCELTYPE_BITMASK) != 0)
> +	    /* Run the registered destructors and terminate the thread.  */
> +	    __do_cancel ();
> +
> +	  break;
> +	}
> +
> +      oldval = curval;
> +    }
> +}
> +
> +/* This is the actually installed SIGCANCEL handler.  It adds some
> +   safety checks before performing the cancellation.  */
> +void
> +__nptl_sigcancel_handler (int sig, siginfo_t *si, void *ctx)
> +{
> +  /* Safety check.  It would be possible to call this function for
> +     other signals and send a signal from another process.  This is not
> +     correct and might even be a security problem.  Try to catch as
> +     many incorrect invocations as possible.  */
> +  if (sig != SIGCANCEL
> +      || si->si_pid != __getpid()
> +      || si->si_code != SI_TKILL)
> +    return;
> +
> +  sigcancel_handler ();
> +}
> +libc_hidden_def (__nptl_sigcancel_handler)
>  
>  int
>  __pthread_cancel (pthread_t th)
> @@ -72,14 +129,23 @@ __pthread_cancel (pthread_t th)
>  						    oldval))
>  	    goto again;
>  
> -	  /* The cancellation handler will take care of marking the
> -	     thread as canceled.  */
> -	  pid_t pid = __getpid ();
> -
> -	  int val = INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid,
> -					   SIGCANCEL);
> -	  if (INTERNAL_SYSCALL_ERROR_P (val))
> -	    result = INTERNAL_SYSCALL_ERRNO (val);
> +	  if (pd == THREAD_SELF)
> +	    /* This is not merely an optimization: An application may
> +	       call pthread_cancel (pthread_self ()) without calling
> +	       pthread_create, so the signal handler may not have been
> +	       set up for a self-cancel.  */
> +	    sigcancel_handler ();

I think it would be simple to just call __pthread_exit (PTHREAD_CANCELED)
here, it won't require to split the cancellation handler, it already
unwind if cancel state is enabled and asynchronous, and it does not
require add another PTHREAD_STATIC_FN_REQUIRE hack. 

It would require an extra __libc_unwind_link_get call, but I think we
can optimize it later (I am working on a patch to simplify it).

> +	  else
> +	    {
> +	      /* The cancellation handler will take care of marking the
> +		 thread as canceled.  */
> +	      pid_t pid = __getpid ();
> +
> +	      int val = INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid,
> +					       SIGCANCEL);
> +	      if (INTERNAL_SYSCALL_ERROR_P (val))
> +		result = INTERNAL_SYSCALL_ERRNO (val);
> +	    }
>  
>  	  break;
>  	}
> @@ -106,4 +172,8 @@ versioned_symbol (libc, __pthread_cancel, pthread_cancel, GLIBC_2_34);
>  compat_symbol (libpthread, __pthread_cancel, pthread_cancel, GLIBC_2_0);
>  #endif
>  
> -PTHREAD_STATIC_FN_REQUIRE (__pthread_create)
> +/* Ensure that the unwinder is always linked in (the __pthread_unwind
> +   reference from __do_cancel is weak).  Use ___pthread_unwind_next
> +   (three underscores) to produce a strong reference to the same
> +   file.  */
> +PTHREAD_STATIC_FN_REQUIRE (___pthread_unwind_next)
> diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
> index 770656453d..772b5efcc6 100644
> --- a/nptl/pthread_create.c
> +++ b/nptl/pthread_create.c
> @@ -56,6 +56,43 @@ static struct rtld_global *__nptl_rtld_global __attribute_used__
>    = &_rtld_global;
>  #endif
>  
> +/* This performs the initialization necessary when going from
> +   single-threaded to multi-threaded mode for the first time.  */
> +static void
> +late_init (void)
> +{
> +  struct sigaction sa;
> +  __sigemptyset (&sa.sa_mask);
> +
> +  /* Install the cancellation signal handler (in static builds only if
> +     pthread_cancel has been linked in).  If for some reason we cannot
> +     install the handler we do not abort.  Maybe we should, but it is
> +     only asynchronous cancellation which is affected.  */
> +#ifndef SHARED
> +  extern __typeof (__nptl_sigcancel_handler) __nptl_sigcancel_handler
> +    __attribute__ ((weak));
> +  if (__nptl_sigcancel_handler != NULL)
> +#endif

This weak symbol can be avoided if we move the cancellation setup
on pthread_cancel instead.  I still think this is best approach,
it disentangle the cancellation handling.

> +    {
> +      sa.sa_sigaction = __nptl_sigcancel_handler;
> +      sa.sa_flags = SA_SIGINFO;
> +      (void) __libc_sigaction (SIGCANCEL, &sa, NULL);
> +    }
> +
> +  /* Install the handle to change the threads' uid/gid.  */
> +  sa.sa_sigaction = __nptl_setxid_sighandler;
> +  sa.sa_flags = SA_SIGINFO | SA_RESTART;
> +  (void) __libc_sigaction (SIGSETXID, &sa, NULL);
> +
> +  /* The parent process might have left the signals blocked.  Just in
> +     case, unblock it.  We reuse the signal mask in the sigaction
> +     structure.  It is already cleared.  */
> +  __sigaddset (&sa.sa_mask, SIGCANCEL);
> +  __sigaddset (&sa.sa_mask, SIGSETXID);
> +  INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &sa.sa_mask,
> +			 NULL, __NSIG_BYTES);
> +}
> +
>  /* Code to allocate and deallocate a stack.  */
>  #include "allocatestack.c"
>  
> @@ -459,9 +496,13 @@ __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
>  {
>    STACK_VARIABLES;
>  
> -  /* Avoid a data race in the multi-threaded case.  */
> +  /* Avoid a data race in the multi-threaded case, and call the
> +     deferred initialization only once.  */
>    if (__libc_single_threaded)
> -    __libc_single_threaded = 0;
> +    {
> +      late_init ();
> +      __libc_single_threaded = 0;
> +    }
>  
>    const struct pthread_attr *iattr = (struct pthread_attr *) attr;
>    union pthread_attr_transparent default_attr;
>
  
Florian Weimer May 20, 2021, 7:41 p.m. UTC | #2
* Adhemerval Zanella:

>>  int
>>  __pthread_cancel (pthread_t th)
>> @@ -72,14 +129,23 @@ __pthread_cancel (pthread_t th)
>>  						    oldval))
>>  	    goto again;
>>  
>> -	  /* The cancellation handler will take care of marking the
>> -	     thread as canceled.  */
>> -	  pid_t pid = __getpid ();
>> -
>> -	  int val = INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid,
>> -					   SIGCANCEL);
>> -	  if (INTERNAL_SYSCALL_ERROR_P (val))
>> -	    result = INTERNAL_SYSCALL_ERRNO (val);
>> +	  if (pd == THREAD_SELF)
>> +	    /* This is not merely an optimization: An application may
>> +	       call pthread_cancel (pthread_self ()) without calling
>> +	       pthread_create, so the signal handler may not have been
>> +	       set up for a self-cancel.  */
>> +	    sigcancel_handler ();
>
> I think it would be simple to just call __pthread_exit (PTHREAD_CANCELED)
> here, it won't require to split the cancellation handler, it already
> unwind if cancel state is enabled and asynchronous, and it does not
> require add another PTHREAD_STATIC_FN_REQUIRE hack. 
>
> It would require an extra __libc_unwind_link_get call, but I think we
> can optimize it later (I am working on a patch to simplify it).

It would be correct, I think.  pthread_cancel is not a cancellation
point.

#include <stdio.h>
#include <pthread.h>

int
main (void)
{
  pthread_cancel (pthread_self ());
  puts ("about to exit");
}

This should print “about to exit”.

>> +/* This performs the initialization necessary when going from
>> +   single-threaded to multi-threaded mode for the first time.  */
>> +static void
>> +late_init (void)
>> +{
>> +  struct sigaction sa;
>> +  __sigemptyset (&sa.sa_mask);
>> +
>> +  /* Install the cancellation signal handler (in static builds only if
>> +     pthread_cancel has been linked in).  If for some reason we cannot
>> +     install the handler we do not abort.  Maybe we should, but it is
>> +     only asynchronous cancellation which is affected.  */
>> +#ifndef SHARED
>> +  extern __typeof (__nptl_sigcancel_handler) __nptl_sigcancel_handler
>> +    __attribute__ ((weak));
>> +  if (__nptl_sigcancel_handler != NULL)
>> +#endif
>
> This weak symbol can be avoided if we move the cancellation setup
> on pthread_cancel instead.  I still think this is best approach,
> it disentangle the cancellation handling.

But then we either have to introduce yet another global flag or install
the signal handler unconditionally before every cancel operation.  I do
not think this results in a simplification.

Thanks,
Florian
  
Adhemerval Zanella May 20, 2021, 7:57 p.m. UTC | #3
On 20/05/2021 16:41, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>>  int
>>>  __pthread_cancel (pthread_t th)
>>> @@ -72,14 +129,23 @@ __pthread_cancel (pthread_t th)
>>>  						    oldval))
>>>  	    goto again;
>>>  
>>> -	  /* The cancellation handler will take care of marking the
>>> -	     thread as canceled.  */
>>> -	  pid_t pid = __getpid ();
>>> -
>>> -	  int val = INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid,
>>> -					   SIGCANCEL);
>>> -	  if (INTERNAL_SYSCALL_ERROR_P (val))
>>> -	    result = INTERNAL_SYSCALL_ERRNO (val);
>>> +	  if (pd == THREAD_SELF)
>>> +	    /* This is not merely an optimization: An application may
>>> +	       call pthread_cancel (pthread_self ()) without calling
>>> +	       pthread_create, so the signal handler may not have been
>>> +	       set up for a self-cancel.  */
>>> +	    sigcancel_handler ();
>>
>> I think it would be simple to just call __pthread_exit (PTHREAD_CANCELED)
>> here, it won't require to split the cancellation handler, it already
>> unwind if cancel state is enabled and asynchronous, and it does not
>> require add another PTHREAD_STATIC_FN_REQUIRE hack. 
>>
>> It would require an extra __libc_unwind_link_get call, but I think we
>> can optimize it later (I am working on a patch to simplify it).
> 
> It would be correct, I think.  pthread_cancel is not a cancellation
> point.
> 
> #include <stdio.h>
> #include <pthread.h>
> 
> int
> main (void)
> {
>   pthread_cancel (pthread_self ());
>   puts ("about to exit");
> }
> 
> This should print “about to exit”.

Yes, this is essentially sysdeps/pthread/tst-cancel-self.c.

> 
>>> +/* This performs the initialization necessary when going from
>>> +   single-threaded to multi-threaded mode for the first time.  */
>>> +static void
>>> +late_init (void)
>>> +{
>>> +  struct sigaction sa;
>>> +  __sigemptyset (&sa.sa_mask);
>>> +
>>> +  /* Install the cancellation signal handler (in static builds only if
>>> +     pthread_cancel has been linked in).  If for some reason we cannot
>>> +     install the handler we do not abort.  Maybe we should, but it is
>>> +     only asynchronous cancellation which is affected.  */
>>> +#ifndef SHARED
>>> +  extern __typeof (__nptl_sigcancel_handler) __nptl_sigcancel_handler
>>> +    __attribute__ ((weak));
>>> +  if (__nptl_sigcancel_handler != NULL)
>>> +#endif
>>
>> This weak symbol can be avoided if we move the cancellation setup
>> on pthread_cancel instead.  I still think this is best approach,
>> it disentangle the cancellation handling.
> 
> But then we either have to introduce yet another global flag or install
> the signal handler unconditionally before every cancel operation.  I do
> not think this results in a simplification.

The flag will be just a static bool or int only define on pthread_cancel,
something like:

  int
  __pthread_cancel (pthread_t th) 
  {
    [...]
    static int init = 0;
    if (atomic_load_relaxed (&init) == 0)
      {
        install_sighandler ();
        init = 1;
      }
    [...]
  }

Bu the main advantage is to move the cancellation code logically when
it is actually used, and it is small improvement on both static
linking (since the static code will be used solely is cancellation is
used) and on runtime (since sigaction will be set only if pthread_cancel
is called).
  
Florian Weimer May 20, 2021, 8:05 p.m. UTC | #4
* Adhemerval Zanella:

> On 20/05/2021 16:41, Florian Weimer wrote:
>> * Adhemerval Zanella:
>> 
>>>>  int
>>>>  __pthread_cancel (pthread_t th)
>>>> @@ -72,14 +129,23 @@ __pthread_cancel (pthread_t th)
>>>>  						    oldval))
>>>>  	    goto again;
>>>>  
>>>> -	  /* The cancellation handler will take care of marking the
>>>> -	     thread as canceled.  */
>>>> -	  pid_t pid = __getpid ();
>>>> -
>>>> -	  int val = INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid,
>>>> -					   SIGCANCEL);
>>>> -	  if (INTERNAL_SYSCALL_ERROR_P (val))
>>>> -	    result = INTERNAL_SYSCALL_ERRNO (val);
>>>> +	  if (pd == THREAD_SELF)
>>>> +	    /* This is not merely an optimization: An application may
>>>> +	       call pthread_cancel (pthread_self ()) without calling
>>>> +	       pthread_create, so the signal handler may not have been
>>>> +	       set up for a self-cancel.  */
>>>> +	    sigcancel_handler ();
>>>
>>> I think it would be simple to just call __pthread_exit (PTHREAD_CANCELED)
>>> here, it won't require to split the cancellation handler, it already
>>> unwind if cancel state is enabled and asynchronous, and it does not
>>> require add another PTHREAD_STATIC_FN_REQUIRE hack. 
>>>
>>> It would require an extra __libc_unwind_link_get call, but I think we
>>> can optimize it later (I am working on a patch to simplify it).
>> 
>> It would be correct, I think.  pthread_cancel is not a cancellation
>> point.
>> 
>> #include <stdio.h>
>> #include <pthread.h>
>> 
>> int
>> main (void)
>> {
>>   pthread_cancel (pthread_self ());
>>   puts ("about to exit");
>> }
>> 
>> This should print “about to exit”.
>
> Yes, this is essentially sysdeps/pthread/tst-cancel-self.c.

But surely this won't work if we call pthread_exit (PTHREAD_CANCELED)
because that exits the main thread at that point, and not at the next
cancellation point.

>>>> +/* This performs the initialization necessary when going from
>>>> +   single-threaded to multi-threaded mode for the first time.  */
>>>> +static void
>>>> +late_init (void)
>>>> +{
>>>> +  struct sigaction sa;
>>>> +  __sigemptyset (&sa.sa_mask);
>>>> +
>>>> +  /* Install the cancellation signal handler (in static builds only if
>>>> +     pthread_cancel has been linked in).  If for some reason we cannot
>>>> +     install the handler we do not abort.  Maybe we should, but it is
>>>> +     only asynchronous cancellation which is affected.  */
>>>> +#ifndef SHARED
>>>> +  extern __typeof (__nptl_sigcancel_handler) __nptl_sigcancel_handler
>>>> +    __attribute__ ((weak));
>>>> +  if (__nptl_sigcancel_handler != NULL)
>>>> +#endif
>>>
>>> This weak symbol can be avoided if we move the cancellation setup
>>> on pthread_cancel instead.  I still think this is best approach,
>>> it disentangle the cancellation handling.
>> 
>> But then we either have to introduce yet another global flag or install
>> the signal handler unconditionally before every cancel operation.  I do
>> not think this results in a simplification.
>
> The flag will be just a static bool or int only define on pthread_cancel,
> something like:
>
>   int
>   __pthread_cancel (pthread_t th) 
>   {
>     [...]
>     static int init = 0;
>     if (atomic_load_relaxed (&init) == 0)
>       {
>         install_sighandler ();
>         init = 1;
>       }
>     [...]
>   }
>
> Bu the main advantage is to move the cancellation code logically when
> it is actually used, and it is small improvement on both static
> linking (since the static code will be used solely is cancellation is
> used) and on runtime (since sigaction will be set only if pthread_cancel
> is called).

Okay, I can prepare a version along these lines.  But in general, I
consider less data and fewer conditionals an improvement.

Thanks,
Florian
  
Adhemerval Zanella May 20, 2021, 8:32 p.m. UTC | #5
On 20/05/2021 17:05, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> On 20/05/2021 16:41, Florian Weimer wrote:
>>> * Adhemerval Zanella:
>>>
>>>>>  int
>>>>>  __pthread_cancel (pthread_t th)
>>>>> @@ -72,14 +129,23 @@ __pthread_cancel (pthread_t th)
>>>>>  						    oldval))
>>>>>  	    goto again;
>>>>>  
>>>>> -	  /* The cancellation handler will take care of marking the
>>>>> -	     thread as canceled.  */
>>>>> -	  pid_t pid = __getpid ();
>>>>> -
>>>>> -	  int val = INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid,
>>>>> -					   SIGCANCEL);
>>>>> -	  if (INTERNAL_SYSCALL_ERROR_P (val))
>>>>> -	    result = INTERNAL_SYSCALL_ERRNO (val);
>>>>> +	  if (pd == THREAD_SELF)
>>>>> +	    /* This is not merely an optimization: An application may
>>>>> +	       call pthread_cancel (pthread_self ()) without calling
>>>>> +	       pthread_create, so the signal handler may not have been
>>>>> +	       set up for a self-cancel.  */
>>>>> +	    sigcancel_handler ();
>>>>
>>>> I think it would be simple to just call __pthread_exit (PTHREAD_CANCELED)
>>>> here, it won't require to split the cancellation handler, it already
>>>> unwind if cancel state is enabled and asynchronous, and it does not
>>>> require add another PTHREAD_STATIC_FN_REQUIRE hack. 
>>>>
>>>> It would require an extra __libc_unwind_link_get call, but I think we
>>>> can optimize it later (I am working on a patch to simplify it).
>>>
>>> It would be correct, I think.  pthread_cancel is not a cancellation
>>> point.
>>>
>>> #include <stdio.h>
>>> #include <pthread.h>
>>>
>>> int
>>> main (void)
>>> {
>>>   pthread_cancel (pthread_self ());
>>>   puts ("about to exit");
>>> }
>>>
>>> This should print “about to exit”.
>>
>> Yes, this is essentially sysdeps/pthread/tst-cancel-self.c.
> 
> But surely this won't work if we call pthread_exit (PTHREAD_CANCELED)
> because that exits the main thread at that point, and not at the next
> cancellation point.

But you need to use the same condition as the other places:

  if (CANCEL_ENABLED_AND_CANCELED_AND_ASYNCHRONOUS (...)
    __pthread_exit (PTHREAD_CANCELLED);

I really hope to once we get the libpthread move done to simplify
the whole cancellation code.

> 
>>>>> +/* This performs the initialization necessary when going from
>>>>> +   single-threaded to multi-threaded mode for the first time.  */
>>>>> +static void
>>>>> +late_init (void)
>>>>> +{
>>>>> +  struct sigaction sa;
>>>>> +  __sigemptyset (&sa.sa_mask);
>>>>> +
>>>>> +  /* Install the cancellation signal handler (in static builds only if
>>>>> +     pthread_cancel has been linked in).  If for some reason we cannot
>>>>> +     install the handler we do not abort.  Maybe we should, but it is
>>>>> +     only asynchronous cancellation which is affected.  */
>>>>> +#ifndef SHARED
>>>>> +  extern __typeof (__nptl_sigcancel_handler) __nptl_sigcancel_handler
>>>>> +    __attribute__ ((weak));
>>>>> +  if (__nptl_sigcancel_handler != NULL)
>>>>> +#endif
>>>>
>>>> This weak symbol can be avoided if we move the cancellation setup
>>>> on pthread_cancel instead.  I still think this is best approach,
>>>> it disentangle the cancellation handling.
>>>
>>> But then we either have to introduce yet another global flag or install
>>> the signal handler unconditionally before every cancel operation.  I do
>>> not think this results in a simplification.
>>
>> The flag will be just a static bool or int only define on pthread_cancel,
>> something like:
>>
>>   int
>>   __pthread_cancel (pthread_t th) 
>>   {
>>     [...]
>>     static int init = 0;
>>     if (atomic_load_relaxed (&init) == 0)
>>       {
>>         install_sighandler ();
>>         init = 1;
>>       }
>>     [...]
>>   }
>>
>> Bu the main advantage is to move the cancellation code logically when
>> it is actually used, and it is small improvement on both static
>> linking (since the static code will be used solely is cancellation is
>> used) and on runtime (since sigaction will be set only if pthread_cancel
>> is called).
> 
> Okay, I can prepare a version along these lines.  But in general, I
> consider less data and fewer conditionals an improvement.

Thanks.
  
Florian Weimer May 21, 2021, 9:58 a.m. UTC | #6
* Adhemerval Zanella:

>>> Bu the main advantage is to move the cancellation code logically when
>>> it is actually used, and it is small improvement on both static
>>> linking (since the static code will be used solely is cancellation is
>>> used) and on runtime (since sigaction will be set only if pthread_cancel
>>> is called).
>> 
>> Okay, I can prepare a version along these lines.  But in general, I
>> consider less data and fewer conditionals an improvement.
>
> Thanks.

Unfortunately, it does not work with the present implementation.

sysdeps/unix/sysv/linux/createthread.c contains this:

	      /* The operation failed.  We have to kill the thread.
		 We let the normal cancellation mechanism do the work.  */

	      pid_t pid = __getpid ();
	      INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid, SIGCANCEL);

This obviously needs a working SIGCANCEL handler, so pthread_create and
pthread_cancel are not as separate as we thought.

So I think we should go with my original version for the time being.  We
can switch to lazy initialization if we implement a different way for
handling late thread creation failure.

SIG_IGN handler disposition should not affect timer_create (which uses
SIGCANCEL internally) because sigwaitinfo still wakes up on such
signals.

Thanks,
Florian
  
Adhemerval Zanella May 21, 2021, 11:31 a.m. UTC | #7
On 21/05/2021 06:58, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>>>> Bu the main advantage is to move the cancellation code logically when
>>>> it is actually used, and it is small improvement on both static
>>>> linking (since the static code will be used solely is cancellation is
>>>> used) and on runtime (since sigaction will be set only if pthread_cancel
>>>> is called).
>>>
>>> Okay, I can prepare a version along these lines.  But in general, I
>>> consider less data and fewer conditionals an improvement.
>>
>> Thanks.
> 
> Unfortunately, it does not work with the present implementation.
> 
> sysdeps/unix/sysv/linux/createthread.c contains this:
> 
> 	      /* The operation failed.  We have to kill the thread.
> 		 We let the normal cancellation mechanism do the work.  */
> 
> 	      pid_t pid = __getpid ();
> 	      INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid, SIGCANCEL);
> 
> This obviously needs a working SIGCANCEL handler, so pthread_create and
> pthread_cancel are not as separate as we thought.

We can use __libc_signal_block_app on pthread_create instead of
__libc_signal_block_all and call __pthread_cancel on createthread.  The
SIGCANCEL is internal, so the race condition should not happen (assuming
well behave programs).  The pthread_cancel call on createthread can then
enable the signal.

> 
> So I think we should go with my original version for the time being.  We
> can switch to lazy initialization if we implement a different way for
> handling late thread creation failure.

Alright, I think we can change it later.

> 
> SIG_IGN handler disposition should not affect timer_create (which uses
> SIGCANCEL internally) because sigwaitinfo still wakes up on such
> signals.
> 
> Thanks,
> Florian
>
  
Adhemerval Zanella May 21, 2021, 12:40 p.m. UTC | #8
On 21/05/2021 08:31, Adhemerval Zanella wrote:
> 
> 
> On 21/05/2021 06:58, Florian Weimer wrote:
>> * Adhemerval Zanella:
>>
>>>>> Bu the main advantage is to move the cancellation code logically when
>>>>> it is actually used, and it is small improvement on both static
>>>>> linking (since the static code will be used solely is cancellation is
>>>>> used) and on runtime (since sigaction will be set only if pthread_cancel
>>>>> is called).
>>>>
>>>> Okay, I can prepare a version along these lines.  But in general, I
>>>> consider less data and fewer conditionals an improvement.
>>>
>>> Thanks.
>>
>> Unfortunately, it does not work with the present implementation.
>>
>> sysdeps/unix/sysv/linux/createthread.c contains this:
>>
>> 	      /* The operation failed.  We have to kill the thread.
>> 		 We let the normal cancellation mechanism do the work.  */
>>
>> 	      pid_t pid = __getpid ();
>> 	      INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid, SIGCANCEL);
>>
>> This obviously needs a working SIGCANCEL handler, so pthread_create and
>> pthread_cancel are not as separate as we thought.
> 
> We can use __libc_signal_block_app on pthread_create instead of
> __libc_signal_block_all and call __pthread_cancel on createthread.  The
> SIGCANCEL is internal, so the race condition should not happen (assuming
> well behave programs).  The pthread_cancel call on createthread can then
> enable the signal.

Ok, I didn't realize the current approach does require the SIGCANCEL
handler to be active so and invalid affinity mask or scheduler can
cancel the thread execution.
  

Patch

diff --git a/nptl/Versions b/nptl/Versions
index e7883cbb49..d96b830d05 100644
--- a/nptl/Versions
+++ b/nptl/Versions
@@ -367,8 +367,6 @@  libc {
     tss_set;
   }
   GLIBC_PRIVATE {
-     __nptl_create_event;
-     __nptl_death_event;
     __default_pthread_attr;
     __default_pthread_attr_lock;
     __futex_abstimed_wait64;
@@ -386,11 +384,14 @@  libc {
     __lll_trylock_elision;
     __lll_unlock_elision;
     __mutex_aconf;
+    __nptl_create_event;
     __nptl_deallocate_stack;
     __nptl_deallocate_tsd;
+    __nptl_death_event;
     __nptl_free_tcb;
     __nptl_nthreads;
     __nptl_setxid_sighandler;
+    __nptl_sigcancel_handler;
     __nptl_stack_list_add;
     __nptl_stack_list_del;
     __pthread_attr_copy;
diff --git a/nptl/nptl-init.c b/nptl/nptl-init.c
index f4b86fbfaf..bc4831ac89 100644
--- a/nptl/nptl-init.c
+++ b/nptl/nptl-init.c
@@ -44,84 +44,9 @@  size_t __static_tls_align_m1;
 /* Version of the library, used in libthread_db to detect mismatches.  */
 static const char nptl_version[] __attribute_used__ = VERSION;
 
-/* For asynchronous cancellation we use a signal.  This is the handler.  */
-static void
-sigcancel_handler (int sig, siginfo_t *si, void *ctx)
-{
-  /* Safety check.  It would be possible to call this function for
-     other signals and send a signal from another process.  This is not
-     correct and might even be a security problem.  Try to catch as
-     many incorrect invocations as possible.  */
-  if (sig != SIGCANCEL
-      || si->si_pid != __getpid()
-      || si->si_code != SI_TKILL)
-    return;
-
-  struct pthread *self = THREAD_SELF;
-
-  int oldval = THREAD_GETMEM (self, cancelhandling);
-  while (1)
-    {
-      /* We are canceled now.  When canceled by another thread this flag
-	 is already set but if the signal is directly send (internally or
-	 from another process) is has to be done here.  */
-      int newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK;
-
-      if (oldval == newval || (oldval & EXITING_BITMASK) != 0)
-	/* Already canceled or exiting.  */
-	break;
-
-      int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling, newval,
-					      oldval);
-      if (curval == oldval)
-	{
-	  /* Set the return value.  */
-	  THREAD_SETMEM (self, result, PTHREAD_CANCELED);
-
-	  /* Make sure asynchronous cancellation is still enabled.  */
-	  if ((newval & CANCELTYPE_BITMASK) != 0)
-	    /* Run the registered destructors and terminate the thread.  */
-	    __do_cancel ();
-
-	  break;
-	}
-
-      oldval = curval;
-    }
-}
-
-
-/* When using __thread for this, we do it in libc so as not
-   to give libpthread its own TLS segment just for this.  */
-extern void **__libc_dl_error_tsd (void) __attribute__ ((const));
-
-
 void
 __pthread_initialize_minimal_internal (void)
 {
-  struct sigaction sa;
-  __sigemptyset (&sa.sa_mask);
-
-  /* Install the cancellation signal handler.  If for some reason we
-     cannot install the handler we do not abort.  Maybe we should, but
-     it is only asynchronous cancellation which is affected.  */
-  sa.sa_sigaction = sigcancel_handler;
-  sa.sa_flags = SA_SIGINFO;
-  (void) __libc_sigaction (SIGCANCEL, &sa, NULL);
-
-  /* Install the handle to change the threads' uid/gid.  */
-  sa.sa_sigaction = __nptl_setxid_sighandler;
-  sa.sa_flags = SA_SIGINFO | SA_RESTART;
-  (void) __libc_sigaction (SIGSETXID, &sa, NULL);
-
-  /* The parent process might have left the signals blocked.  Just in
-     case, unblock it.  We reuse the signal mask in the sigaction
-     structure.  It is already cleared.  */
-  __sigaddset (&sa.sa_mask, SIGCANCEL);
-  __sigaddset (&sa.sa_mask, SIGSETXID);
-  INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &sa.sa_mask,
-			 NULL, __NSIG_BYTES);
-
   /* Get the size of the static and alignment requirements for the TLS
      block.  */
   size_t static_tls_align;
diff --git a/nptl/pthreadP.h b/nptl/pthreadP.h
index f93806e540..497c2ad3d9 100644
--- a/nptl/pthreadP.h
+++ b/nptl/pthreadP.h
@@ -571,6 +571,12 @@  libc_hidden_proto (__pthread_attr_setsigmask_internal)
 extern __typeof (pthread_attr_getsigmask_np) __pthread_attr_getsigmask_np;
 libc_hidden_proto (__pthread_attr_getsigmask_np)
 
+/* The cancellation signal handler defined alongside with
+   pthread_cancel.  This is included in statically linked binaries
+   only if pthread_cancel is linked in.  */
+void __nptl_sigcancel_handler (int sig, siginfo_t *si, void *ctx);
+libc_hidden_proto (__nptl_sigcancel_handler)
+
 /* Special versions which use non-exported functions.  */
 extern void __pthread_cleanup_push (struct _pthread_cleanup_buffer *buffer,
 				    void (*routine) (void *), void *arg);
diff --git a/nptl/pthread_cancel.c b/nptl/pthread_cancel.c
index e4ad602900..802c691874 100644
--- a/nptl/pthread_cancel.c
+++ b/nptl/pthread_cancel.c
@@ -26,6 +26,63 @@ 
 #include <unwind-link.h>
 #include <stdio.h>
 #include <gnu/lib-names.h>
+#include <sys/single_threaded.h>
+
+/* For asynchronous cancellation we use a signal.  This is the core
+   logic of the signal handler.  */
+static void
+sigcancel_handler (void)
+{
+  struct pthread *self = THREAD_SELF;
+
+  int oldval = THREAD_GETMEM (self, cancelhandling);
+  while (1)
+    {
+      /* We are canceled now.  When canceled by another thread this flag
+	 is already set but if the signal is directly send (internally or
+	 from another process) is has to be done here.  */
+      int newval = oldval | CANCELING_BITMASK | CANCELED_BITMASK;
+
+      if (oldval == newval || (oldval & EXITING_BITMASK) != 0)
+	/* Already canceled or exiting.  */
+	break;
+
+      int curval = THREAD_ATOMIC_CMPXCHG_VAL (self, cancelhandling, newval,
+					      oldval);
+      if (curval == oldval)
+	{
+	  /* Set the return value.  */
+	  THREAD_SETMEM (self, result, PTHREAD_CANCELED);
+
+	  /* Make sure asynchronous cancellation is still enabled.  */
+	  if ((newval & CANCELTYPE_BITMASK) != 0)
+	    /* Run the registered destructors and terminate the thread.  */
+	    __do_cancel ();
+
+	  break;
+	}
+
+      oldval = curval;
+    }
+}
+
+/* This is the actually installed SIGCANCEL handler.  It adds some
+   safety checks before performing the cancellation.  */
+void
+__nptl_sigcancel_handler (int sig, siginfo_t *si, void *ctx)
+{
+  /* Safety check.  It would be possible to call this function for
+     other signals and send a signal from another process.  This is not
+     correct and might even be a security problem.  Try to catch as
+     many incorrect invocations as possible.  */
+  if (sig != SIGCANCEL
+      || si->si_pid != __getpid()
+      || si->si_code != SI_TKILL)
+    return;
+
+  sigcancel_handler ();
+}
+libc_hidden_def (__nptl_sigcancel_handler)
 
 int
 __pthread_cancel (pthread_t th)
@@ -72,14 +129,23 @@  __pthread_cancel (pthread_t th)
 						    oldval))
 	    goto again;
 
-	  /* The cancellation handler will take care of marking the
-	     thread as canceled.  */
-	  pid_t pid = __getpid ();
-
-	  int val = INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid,
-					   SIGCANCEL);
-	  if (INTERNAL_SYSCALL_ERROR_P (val))
-	    result = INTERNAL_SYSCALL_ERRNO (val);
+	  if (pd == THREAD_SELF)
+	    /* This is not merely an optimization: An application may
+	       call pthread_cancel (pthread_self ()) without calling
+	       pthread_create, so the signal handler may not have been
+	       set up for a self-cancel.  */
+	    sigcancel_handler ();
+	  else
+	    {
+	      /* The cancellation handler will take care of marking the
+		 thread as canceled.  */
+	      pid_t pid = __getpid ();
+
+	      int val = INTERNAL_SYSCALL_CALL (tgkill, pid, pd->tid,
+					       SIGCANCEL);
+	      if (INTERNAL_SYSCALL_ERROR_P (val))
+		result = INTERNAL_SYSCALL_ERRNO (val);
+	    }
 
 	  break;
 	}
@@ -106,4 +172,8 @@  versioned_symbol (libc, __pthread_cancel, pthread_cancel, GLIBC_2_34);
 compat_symbol (libpthread, __pthread_cancel, pthread_cancel, GLIBC_2_0);
 #endif
 
-PTHREAD_STATIC_FN_REQUIRE (__pthread_create)
+/* Ensure that the unwinder is always linked in (the __pthread_unwind
+   reference from __do_cancel is weak).  Use ___pthread_unwind_next
+   (three underscores) to produce a strong reference to the same
+   file.  */
+PTHREAD_STATIC_FN_REQUIRE (___pthread_unwind_next)
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index 770656453d..772b5efcc6 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -56,6 +56,43 @@  static struct rtld_global *__nptl_rtld_global __attribute_used__
   = &_rtld_global;
 #endif
 
+/* This performs the initialization necessary when going from
+   single-threaded to multi-threaded mode for the first time.  */
+static void
+late_init (void)
+{
+  struct sigaction sa;
+  __sigemptyset (&sa.sa_mask);
+
+  /* Install the cancellation signal handler (in static builds only if
+     pthread_cancel has been linked in).  If for some reason we cannot
+     install the handler we do not abort.  Maybe we should, but it is
+     only asynchronous cancellation which is affected.  */
+#ifndef SHARED
+  extern __typeof (__nptl_sigcancel_handler) __nptl_sigcancel_handler
+    __attribute__ ((weak));
+  if (__nptl_sigcancel_handler != NULL)
+#endif
+    {
+      sa.sa_sigaction = __nptl_sigcancel_handler;
+      sa.sa_flags = SA_SIGINFO;
+      (void) __libc_sigaction (SIGCANCEL, &sa, NULL);
+    }
+
+  /* Install the handle to change the threads' uid/gid.  */
+  sa.sa_sigaction = __nptl_setxid_sighandler;
+  sa.sa_flags = SA_SIGINFO | SA_RESTART;
+  (void) __libc_sigaction (SIGSETXID, &sa, NULL);
+
+  /* The parent process might have left the signals blocked.  Just in
+     case, unblock it.  We reuse the signal mask in the sigaction
+     structure.  It is already cleared.  */
+  __sigaddset (&sa.sa_mask, SIGCANCEL);
+  __sigaddset (&sa.sa_mask, SIGSETXID);
+  INTERNAL_SYSCALL_CALL (rt_sigprocmask, SIG_UNBLOCK, &sa.sa_mask,
+			 NULL, __NSIG_BYTES);
+}
+
 /* Code to allocate and deallocate a stack.  */
 #include "allocatestack.c"
 
@@ -459,9 +496,13 @@  __pthread_create_2_1 (pthread_t *newthread, const pthread_attr_t *attr,
 {
   STACK_VARIABLES;
 
-  /* Avoid a data race in the multi-threaded case.  */
+  /* Avoid a data race in the multi-threaded case, and call the
+     deferred initialization only once.  */
   if (__libc_single_threaded)
-    __libc_single_threaded = 0;
+    {
+      late_init ();
+      __libc_single_threaded = 0;
+    }
 
   const struct pthread_attr *iattr = (struct pthread_attr *) attr;
   union pthread_attr_transparent default_attr;