[3/3] nptl: Fix race between pthread_kill and thread exit (bug 12889)

Message ID 811aff39780f8d997421dcda8c6afaf01ea1ee6a.1629208174.git.fweimer@redhat.com
State Superseded
Headers
Series Fix ESRCH issues in pthread_cancel, pthread_kill |

Checks

Context Check Description
dj/TryBot-apply_patch success Patch applied to master at the time it was sent
dj/TryBot-32bit fail Patch caused testsuite regressions

Commit Message

Florian Weimer Aug. 17, 2021, 1:51 p.m. UTC
  Thread exit is delayed until all pending pthread_kill operations
on the thread have completed.  This avoids sending signals to the
wrong thread or a spurious ESRCH error.

The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived
from a downstream test originally written by Marek Polacek.
---
 nptl/allocatestack.c                          |   1 +
 nptl/descr.h                                  |  12 ++
 nptl/pthread_create.c                         |  20 ++++
 nptl/pthread_kill.c                           |  23 +++-
 support/Makefile                              |   3 +-
 sysdeps/pthread/Makefile                      |   2 +
 sysdeps/pthread/tst-kill4.c                   |  90 --------------
 .../pthread/tst-pthread_cancel-select-loop.c  |  87 ++++++++++++++
 sysdeps/pthread/tst-pthread_kill-exiting.c    | 110 ++++++++++++++++++
 9 files changed, 254 insertions(+), 94 deletions(-)
 delete mode 100644 sysdeps/pthread/tst-kill4.c
 create mode 100644 sysdeps/pthread/tst-pthread_cancel-select-loop.c
 create mode 100644 sysdeps/pthread/tst-pthread_kill-exiting.c
  

Comments

Adhemerval Zanella Aug. 19, 2021, 4:12 p.m. UTC | #1
On 17/08/2021 10:51, Florian Weimer via Libc-alpha wrote:
> Thread exit is delayed until all pending pthread_kill operations
> on the thread have completed.  This avoids sending signals to the
> wrong thread or a spurious ESRCH error.
> 
> The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived
> from a downstream test originally written by Marek Polacek.

And I really hope we could first sort out the BZ#19951 and move away
of using pthread::tid as the synchronization member to check thread
termination.  I sent a possible fix [1], but it does not fully handle
the pthread::tid access, it would require to a proper lock to 
synchronize between thread exit and pthread_kill (as Rich has suggested
on BZ#12889).

I will need to send an updated version of my pthread fixes patchset,
since clone3 changes broke it and INVALID_TD_P needs fixing as well.

[1] https://patchwork.sourceware.org/project/glibc/patch/20210610193639.3650754-5-adhemerval.zanella@linaro.org/

> ---
>  nptl/allocatestack.c                          |   1 +
>  nptl/descr.h                                  |  12 ++
>  nptl/pthread_create.c                         |  20 ++++
>  nptl/pthread_kill.c                           |  23 +++-
>  support/Makefile                              |   3 +-
>  sysdeps/pthread/Makefile                      |   2 +
>  sysdeps/pthread/tst-kill4.c                   |  90 --------------
>  .../pthread/tst-pthread_cancel-select-loop.c  |  87 ++++++++++++++
>  sysdeps/pthread/tst-pthread_kill-exiting.c    | 110 ++++++++++++++++++
>  9 files changed, 254 insertions(+), 94 deletions(-)
>  delete mode 100644 sysdeps/pthread/tst-kill4.c
>  create mode 100644 sysdeps/pthread/tst-pthread_cancel-select-loop.c
>  create mode 100644 sysdeps/pthread/tst-pthread_kill-exiting.c
> 
> diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
> index cfe37a3443..344f7f8ec5 100644
> --- a/nptl/allocatestack.c
> +++ b/nptl/allocatestack.c
> @@ -127,6 +127,7 @@ get_cached_stack (size_t *sizep, void **memp)
>    /* No pending event.  */
>    result->nextevent = NULL;
>  
> +  result->exit_signal_futex = 0;
>    result->tls_state = (struct tls_internal_t) { 0 };
>  
>    /* Clear the DTV.  */
> diff --git a/nptl/descr.h b/nptl/descr.h
> index c85778d449..7255d1dc77 100644
> --- a/nptl/descr.h
> +++ b/nptl/descr.h
> @@ -396,6 +396,18 @@ struct pthread
>       PTHREAD_CANCEL_ASYNCHRONOUS).  */
>    unsigned char canceltype;
>  
> +  /* Futex to prevent thread exit during concurrent pthread_kill.  The
> +     least-significant bit is an exit flag.  The remaining bits count
> +     the number of pthread_kill operations in progress.
> +
> +     Upon exit, the thread bit sets the LSB, and waits until the futex
> +     variable is 1.
> +
> +     pthread_kill atomically adds 2 to the futex variable before the
> +     signal-sending operation, subtracts 2 after it, and skips sending
> +     the signal if the least-significant bit is set.  */
> +  unsigned int exit_signal_futex;
> +
>    /* Used on strsignal.  */
>    struct tls_internal_t tls_state;
>  
> diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
> index d8ec299cb1..7e1a3cfcdd 100644
> --- a/nptl/pthread_create.c
> +++ b/nptl/pthread_create.c
> @@ -37,6 +37,7 @@
>  #include <sys/single_threaded.h>
>  #include <version.h>
>  #include <clone_internal.h>
> +#include <futex-internal.h>
>  
>  #include <shlib-compat.h>
>  
> @@ -485,6 +486,25 @@ start_thread (void *arg)
>      /* This was the last thread.  */
>      exit (0);
>  
> +  /* Blocking signals is required to make pthread_kill
> +     async-signal-safe.  It prevents sending a signal from this thread
> +     to itself during its final stages.  This must come after the exit
> +     call above because atexit handlers must not run with signals
> +     blocked.  */
> +  __libc_signal_block_all (NULL);
> +
> +  /* Delay exiting this thread until there are no concurrent
> +     pthread_kill operations in progress.  */
> +  {
> +    /* Indicate that no further signals should be sent.  */
> +    atomic_fetch_or_release (&pd->exit_signal_futex, 1);
> +
> +    /* Wait until there are no pthread_kill operations left.  */
> +    unsigned int val;
> +    while ((val = atomic_load_acquire (&pd->exit_signal_futex)) != 1)
> +      futex_wait (&pd->exit_signal_futex, val, FUTEX_PRIVATE);
> +  }
> +
>  #ifndef __ASSUME_SET_ROBUST_LIST
>    /* If this thread has any robust mutexes locked, handle them now.  */
>  # if __PTHREAD_MUTEX_HAVE_PREV
> diff --git a/nptl/pthread_kill.c b/nptl/pthread_kill.c
> index 5d4c86f920..b67bd4b769 100644
> --- a/nptl/pthread_kill.c
> +++ b/nptl/pthread_kill.c
> @@ -18,6 +18,7 @@
>  
>  #include <unistd.h>
>  #include <pthreadP.h>
> +#include <futex-internal.h>
>  #include <shlib-compat.h>
>  
>  int
> @@ -41,9 +42,25 @@ __pthread_kill_internal (pthread_t threadid, int signo)
>      {
>        pid_t pid = __getpid ();
>  
> -      val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo);
> -      val = (INTERNAL_SYSCALL_ERROR_P (val)
> -	    ? INTERNAL_SYSCALL_ERRNO (val) : 0);
> +      if (atomic_fetch_add_acq_rel (&pd->exit_signal_futex, 2) & 1)
> +	/* If the thread is exiting, it has already blocked all
> +	   signals, so sending the signal does not have any effect.
> +	   Not sending the signal avoids a spurious ESRCH from tgkill
> +	   if the thread manages to complete its exit before the
> +	   signal can be sent.  */
> +	val = 0;
> +      else
> +	{
> +	  val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo);
> +	  val = (INTERNAL_SYSCALL_ERROR_P (val)
> +		 ? INTERNAL_SYSCALL_ERRNO (val) : 0);
> +	}
> +
> +      /* A value 3 before the update indicates that the thread is
> +	 exiting, and this is the last remaining concurrent
> +	 pthread_kill operation.  */
> +      if (atomic_fetch_add_acq_rel (&pd->exit_signal_futex, -2) == 3)
> +	futex_wake (&pd->exit_signal_futex, 1, FUTEX_PRIVATE);
>      }
>    else
>      /* The kernel reports that the thread has exited.  POSIX specifies
> diff --git a/support/Makefile b/support/Makefile
> index a462781718..ef2b1a980a 100644
> --- a/support/Makefile
> +++ b/support/Makefile
> @@ -82,9 +82,10 @@ libsupport-routines = \
>    support_test_compare_blob \
>    support_test_compare_failure \
>    support_test_compare_string \
> -  support_write_file_string \
>    support_test_main \
>    support_test_verify_impl \
> +  support_wait_for_thread_exit \
> +  support_write_file_string \
>    temp_file \
>    timespec \
>    timespec-time64 \
> diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
> index dedfa0d290..48dba717a1 100644
> --- a/sysdeps/pthread/Makefile
> +++ b/sysdeps/pthread/Makefile
> @@ -119,7 +119,9 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \
>  	 tst-unwind-thread \
>  	 tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \
>  	 tst-pthread_cancel-exited \
> +	 tst-pthread_cancel-select-loop \
>  	 tst-pthread_kill-exited \
> +	 tst-pthread_kill-exiting \
>  	 # tests
>  
>  tests-time64 := \
> diff --git a/sysdeps/pthread/tst-kill4.c b/sysdeps/pthread/tst-kill4.c
> deleted file mode 100644
> index 9563939792..0000000000
> --- a/sysdeps/pthread/tst-kill4.c
> +++ /dev/null
> @@ -1,90 +0,0 @@
> -/* Copyright (C) 2003-2021 Free Software Foundation, Inc.
> -   This file is part of the GNU C Library.
> -   Contributed by Ulrich Drepper <drepper@redhat.com>, 2003.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <https://www.gnu.org/licenses/>.  */
> -
> -#include <errno.h>
> -#include <pthread.h>
> -#include <signal.h>
> -#include <stdio.h>
> -#include <stdlib.h>
> -#include <unistd.h>
> -
> -
> -static void *
> -tf (void *a)
> -{
> -  return NULL;
> -}
> -
> -
> -int
> -do_test (void)
> -{
> -  pthread_attr_t at;
> -  if (pthread_attr_init (&at) != 0)
> -    {
> -      puts ("attr_create failed");
> -      exit (1);
> -    }
> -
> -  /* Limit thread stack size, because if it is too large, pthread_join
> -     will free it immediately rather than put it into stack cache.  */
> -  if (pthread_attr_setstacksize (&at, 2 * 1024 * 1024) != 0)
> -    {
> -      puts ("setstacksize failed");
> -      exit (1);
> -    }
> -
> -  pthread_t th;
> -  if (pthread_create (&th, &at, tf, NULL) != 0)
> -    {
> -      puts ("create failed");
> -      exit (1);
> -    }
> -
> -  pthread_attr_destroy (&at);
> -
> -  if (pthread_join (th, NULL) != 0)
> -    {
> -      puts ("join failed");
> -      exit (1);
> -    }
> -
> -  /* The following only works because we assume here something about
> -     the implementation.  Namely, that the memory allocated for the
> -     thread descriptor is not going away, that the TID field is
> -     cleared and therefore the signal is sent to process 0, and that
> -     we can savely assume there is no other process with this ID at
> -     that time.  */
> -  int e = pthread_kill (th, 0);
> -  if (e == 0)
> -    {
> -      puts ("pthread_kill succeeded");
> -      exit (1);
> -    }
> -  if (e != ESRCH)
> -    {
> -      puts ("pthread_kill didn't return ESRCH");
> -      exit (1);
> -    }
> -
> -  return 0;
> -}
> -
> -
> -#define TEST_FUNCTION do_test ()
> -#include "../test-skeleton.c"
> diff --git a/sysdeps/pthread/tst-pthread_cancel-select-loop.c b/sysdeps/pthread/tst-pthread_cancel-select-loop.c
> new file mode 100644
> index 0000000000..a62087589c
> --- /dev/null
> +++ b/sysdeps/pthread/tst-pthread_cancel-select-loop.c
> @@ -0,0 +1,87 @@
> +/* Test that pthread_cancel succeeds during thread exit.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* This test tries to trigger an internal race condition in
> +   pthread_cancel, where the cancellation signal is sent after the
> +   thread has begun the cancellation process.  This can result in a
> +   spurious ESRCH error.  For the original bug 12889, the window is
> +   quite small, so the bug was not reproduced in every run.  */
> +
> +#include <stdbool.h>
> +#include <stddef.h>
> +#include <support/check.h>
> +#include <support/xthread.h>
> +#include <support/xunistd.h>
> +#include <sys/select.h>
> +#include <unistd.h>
> +
> +/* Set to true by timeout_thread_function when the test should
> +   terminate.  */
> +static bool timeout;
> +
> +static void *
> +timeout_thread_function (void *unused)
> +{
> +  usleep (5 * 1000 * 1000);
> +  __atomic_store_n (&timeout, true, __ATOMIC_RELAXED);
> +  return NULL;
> +}
> +
> +/* Used for blocking the select function below.  */
> +static int pipe_fds[2];
> +
> +static void *
> +canceled_thread_function (void *unused)
> +{
> +  while (true)
> +    {
> +      fd_set rfs;
> +      fd_set wfs;
> +      fd_set efs;
> +      FD_ZERO (&rfs);
> +      FD_ZERO (&wfs);
> +      FD_ZERO (&efs);
> +      FD_SET (pipe_fds[0], &rfs);
> +
> +      /* If the cancellation request is recognized early, the thread
> +         begins exiting while the cancellation signal arrives.  */
> +      select (FD_SETSIZE, &rfs, &wfs, &efs, NULL);
> +    }
> +  return NULL;
> +}
> +
> +static int
> +do_test (void)
> +{
> +  xpipe (pipe_fds);
> +  pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL);
> +
> +  while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED))
> +    {
> +      pthread_t thr = xpthread_create (NULL, canceled_thread_function, NULL);
> +      xpthread_cancel (thr);
> +      TEST_VERIFY (xpthread_join (thr) == PTHREAD_CANCELED);
> +    }
> +
> +  xpthread_join (thr_timeout);
> +  xclose (pipe_fds[0]);
> +  xclose (pipe_fds[1]);
> +  return 0;
> +}
> +
> +#include <support/test-driver.c>
> diff --git a/sysdeps/pthread/tst-pthread_kill-exiting.c b/sysdeps/pthread/tst-pthread_kill-exiting.c
> new file mode 100644
> index 0000000000..15550d2310
> --- /dev/null
> +++ b/sysdeps/pthread/tst-pthread_kill-exiting.c
> @@ -0,0 +1,110 @@
> +/* Test that pthread_kill succeeds during thread exit.
> +   Copyright (C) 2021 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <https://www.gnu.org/licenses/>.  */
> +
> +/* This test verifies that pthread_kill for a thread that is exiting
> +   succeeds (with or without actually delivering the signal).  */
> +
> +#include <array_length.h>
> +#include <stdbool.h>
> +#include <stddef.h>
> +#include <support/xsignal.h>
> +#include <support/xthread.h>
> +#include <unistd.h>
> +
> +/* Set to true by timeout_thread_function when the test should
> +   terminate.  */
> +static bool timeout;
> +
> +static void *
> +timeout_thread_function (void *unused)
> +{
> +  usleep (1000 * 1000);
> +  __atomic_store_n (&timeout, true, __ATOMIC_RELAXED);
> +  return NULL;
> +}
> +
> +/* Used to synchronize the sending threads with the target thread and
> +   main thread.  */
> +static pthread_barrier_t barrier_1;
> +static pthread_barrier_t barrier_2;
> +
> +/* The target thread to which signals are to be sent.  */
> +static pthread_t target_thread;
> +
> +static void *
> +sender_thread_function (void *unused)
> +{
> +  while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED))
> +    {
> +      /* Wait until target_thread has been initialized.  The target
> +         thread and main thread participate in this barrier.  */
> +      xpthread_barrier_wait (&barrier_1);
> +
> +      xpthread_kill (target_thread, SIGUSR1);
> +
> +      /* Communicate that the signal has been sent.  The main thread
> +         participates in this barrier.  */
> +      xpthread_barrier_wait (&barrier_2);
> +    }
> +  return NULL;
> +}
> +
> +static void *
> +target_thread_function (void *unused)
> +{
> +  target_thread = pthread_self ();
> +  xpthread_barrier_wait (&barrier_1);
> +  return NULL;
> +}
> +
> +static int
> +do_test (void)
> +{
> +  xsignal (SIGUSR1, SIG_IGN);
> +
> +  pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL);
> +
> +  pthread_t threads[4];
> +  xpthread_barrier_init (&barrier_1, NULL, array_length (threads) + 2);
> +  xpthread_barrier_init (&barrier_2, NULL, array_length (threads) + 1);
> +
> +  for (int i = 0; i < array_length (threads); ++i)
> +    threads[i] = xpthread_create (NULL, sender_thread_function, NULL);
> +
> +  while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED))
> +    {
> +      xpthread_create (NULL, target_thread_function, NULL);
> +
> +      /* Wait for the target thread to be set up and signal sending to
> +         start.  */
> +      xpthread_barrier_wait (&barrier_1);
> +
> +      /* Wait for signal sending to complete.  */
> +      xpthread_barrier_wait (&barrier_2);
> +
> +      xpthread_join (target_thread);
> +    }
> +
> +  for (int i = 0; i < array_length (threads); ++i)
> +    xpthread_join (threads[i]);
> +  xpthread_join (thr_timeout);
> +
> +  return 0;
> +}
> +
> +#include <support/test-driver.c>
>
  
Florian Weimer Aug. 19, 2021, 4:37 p.m. UTC | #2
* Adhemerval Zanella via Libc-alpha:

> On 17/08/2021 10:51, Florian Weimer via Libc-alpha wrote:
>> Thread exit is delayed until all pending pthread_kill operations
>> on the thread have completed.  This avoids sending signals to the
>> wrong thread or a spurious ESRCH error.
>> 
>> The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived
>> from a downstream test originally written by Marek Polacek.
>
> And I really hope we could first sort out the BZ#19951 and move away
> of using pthread::tid as the synchronization member to check thread
> termination.  I sent a possible fix [1], but it does not fully handle
> the pthread::tid access, it would require to a proper lock to 
> synchronize between thread exit and pthread_kill (as Rich has suggested
> on BZ#12889).
>
> I will need to send an updated version of my pthread fixes patchset,
> since clone3 changes broke it and INVALID_TD_P needs fixing as well.
>
> [1] https://patchwork.sourceware.org/project/glibc/patch/20210610193639.3650754-5-adhemerval.zanella@linaro.org/

I think my patch is independent of the other fixes.  Its advantage over
other proposals is that it does not add waiting to pthread_kill, so
there is no signal-safety issue.

(But I probably should test if cancellation from a signal handler that
is invoked synchronously via pthread_kill still works.)

Thanks,
Florian
  
Adhemerval Zanella Aug. 19, 2021, 4:49 p.m. UTC | #3
On 19/08/2021 13:37, Florian Weimer wrote:
> * Adhemerval Zanella via Libc-alpha:
> 
>> On 17/08/2021 10:51, Florian Weimer via Libc-alpha wrote:
>>> Thread exit is delayed until all pending pthread_kill operations
>>> on the thread have completed.  This avoids sending signals to the
>>> wrong thread or a spurious ESRCH error.
>>>
>>> The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived
>>> from a downstream test originally written by Marek Polacek.
>>
>> And I really hope we could first sort out the BZ#19951 and move away
>> of using pthread::tid as the synchronization member to check thread
>> termination.  I sent a possible fix [1], but it does not fully handle
>> the pthread::tid access, it would require to a proper lock to 
>> synchronize between thread exit and pthread_kill (as Rich has suggested
>> on BZ#12889).
>>
>> I will need to send an updated version of my pthread fixes patchset,
>> since clone3 changes broke it and INVALID_TD_P needs fixing as well.
>>
>> [1] https://patchwork.sourceware.org/project/glibc/patch/20210610193639.3650754-5-adhemerval.zanella@linaro.org/
> 
> I think my patch is independent of the other fixes.  Its advantage over
> other proposals is that it does not add waiting to pthread_kill, so
> there is no signal-safety issue.

I am not sure, the way I think we should fix to also fix BZ#19951
would to add a lock to access 'tid' and uses not only on pthread_kill,
but also on pthread_getcpuclockid, pthread_getschedparam, 
pthread_setschedparam, and pthread_setschedprio.  The lock also
simplifies the code, there is no need to special handling of futex
internal state. I also think pthread_kill would also be simplified.

The lock is just to simplify the code, if we really need to add
some scalability we might use a more clever code as you are proposing.

> 
> (But I probably should test if cancellation from a signal handler that
> is invoked synchronously via pthread_kill still works.)
> 
> Thanks,
> Florian
>
  
Florian Weimer Aug. 20, 2021, 4:24 p.m. UTC | #4
* Adhemerval Zanella:

> On 19/08/2021 13:37, Florian Weimer wrote:
>> * Adhemerval Zanella via Libc-alpha:
>> 
>>> On 17/08/2021 10:51, Florian Weimer via Libc-alpha wrote:
>>>> Thread exit is delayed until all pending pthread_kill operations
>>>> on the thread have completed.  This avoids sending signals to the
>>>> wrong thread or a spurious ESRCH error.
>>>>
>>>> The test sysdeps/pthread/tst-pthread_cancel-select-loop.c is derived
>>>> from a downstream test originally written by Marek Polacek.
>>>
>>> And I really hope we could first sort out the BZ#19951 and move away
>>> of using pthread::tid as the synchronization member to check thread
>>> termination.  I sent a possible fix [1], but it does not fully handle
>>> the pthread::tid access, it would require to a proper lock to 
>>> synchronize between thread exit and pthread_kill (as Rich has suggested
>>> on BZ#12889).
>>>
>>> I will need to send an updated version of my pthread fixes patchset,
>>> since clone3 changes broke it and INVALID_TD_P needs fixing as well.
>>>
>>> [1] https://patchwork.sourceware.org/project/glibc/patch/20210610193639.3650754-5-adhemerval.zanella@linaro.org/
>> 
>> I think my patch is independent of the other fixes.  Its advantage over
>> other proposals is that it does not add waiting to pthread_kill, so
>> there is no signal-safety issue.
>
> I am not sure, the way I think we should fix to also fix BZ#19951
> would to add a lock to access 'tid' and uses not only on pthread_kill,
> but also on pthread_getcpuclockid, pthread_getschedparam, 
> pthread_setschedparam, and pthread_setschedprio.  The lock also
> simplifies the code, there is no need to special handling of futex
> internal state. I also think pthread_kill would also be simplified.
>
> The lock is just to simplify the code, if we really need to add
> some scalability we might use a more clever code as you are proposing.

I don't think we can wait for the fix for bug 19951 because glibc 2.34
has a real regression in pthread_cancel: it can fail spuriously with
ESRCH.  And it can happen with a single pthread_cancel call in the
program, you don't even need concurrent pthread_cancel to trigger this
bug now.

I have moved the magic no-TID-reuse functionality into a separate
function in the new version, so we can replace it once we have something
better.

Thanks,
Florian
  

Patch

diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index cfe37a3443..344f7f8ec5 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -127,6 +127,7 @@  get_cached_stack (size_t *sizep, void **memp)
   /* No pending event.  */
   result->nextevent = NULL;
 
+  result->exit_signal_futex = 0;
   result->tls_state = (struct tls_internal_t) { 0 };
 
   /* Clear the DTV.  */
diff --git a/nptl/descr.h b/nptl/descr.h
index c85778d449..7255d1dc77 100644
--- a/nptl/descr.h
+++ b/nptl/descr.h
@@ -396,6 +396,18 @@  struct pthread
      PTHREAD_CANCEL_ASYNCHRONOUS).  */
   unsigned char canceltype;
 
+  /* Futex to prevent thread exit during concurrent pthread_kill.  The
+     least-significant bit is an exit flag.  The remaining bits count
+     the number of pthread_kill operations in progress.
+
+     Upon exit, the thread bit sets the LSB, and waits until the futex
+     variable is 1.
+
+     pthread_kill atomically adds 2 to the futex variable before the
+     signal-sending operation, subtracts 2 after it, and skips sending
+     the signal if the least-significant bit is set.  */
+  unsigned int exit_signal_futex;
+
   /* Used on strsignal.  */
   struct tls_internal_t tls_state;
 
diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
index d8ec299cb1..7e1a3cfcdd 100644
--- a/nptl/pthread_create.c
+++ b/nptl/pthread_create.c
@@ -37,6 +37,7 @@ 
 #include <sys/single_threaded.h>
 #include <version.h>
 #include <clone_internal.h>
+#include <futex-internal.h>
 
 #include <shlib-compat.h>
 
@@ -485,6 +486,25 @@  start_thread (void *arg)
     /* This was the last thread.  */
     exit (0);
 
+  /* Blocking signals is required to make pthread_kill
+     async-signal-safe.  It prevents sending a signal from this thread
+     to itself during its final stages.  This must come after the exit
+     call above because atexit handlers must not run with signals
+     blocked.  */
+  __libc_signal_block_all (NULL);
+
+  /* Delay exiting this thread until there are no concurrent
+     pthread_kill operations in progress.  */
+  {
+    /* Indicate that no further signals should be sent.  */
+    atomic_fetch_or_release (&pd->exit_signal_futex, 1);
+
+    /* Wait until there are no pthread_kill operations left.  */
+    unsigned int val;
+    while ((val = atomic_load_acquire (&pd->exit_signal_futex)) != 1)
+      futex_wait (&pd->exit_signal_futex, val, FUTEX_PRIVATE);
+  }
+
 #ifndef __ASSUME_SET_ROBUST_LIST
   /* If this thread has any robust mutexes locked, handle them now.  */
 # if __PTHREAD_MUTEX_HAVE_PREV
diff --git a/nptl/pthread_kill.c b/nptl/pthread_kill.c
index 5d4c86f920..b67bd4b769 100644
--- a/nptl/pthread_kill.c
+++ b/nptl/pthread_kill.c
@@ -18,6 +18,7 @@ 
 
 #include <unistd.h>
 #include <pthreadP.h>
+#include <futex-internal.h>
 #include <shlib-compat.h>
 
 int
@@ -41,9 +42,25 @@  __pthread_kill_internal (pthread_t threadid, int signo)
     {
       pid_t pid = __getpid ();
 
-      val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo);
-      val = (INTERNAL_SYSCALL_ERROR_P (val)
-	    ? INTERNAL_SYSCALL_ERRNO (val) : 0);
+      if (atomic_fetch_add_acq_rel (&pd->exit_signal_futex, 2) & 1)
+	/* If the thread is exiting, it has already blocked all
+	   signals, so sending the signal does not have any effect.
+	   Not sending the signal avoids a spurious ESRCH from tgkill
+	   if the thread manages to complete its exit before the
+	   signal can be sent.  */
+	val = 0;
+      else
+	{
+	  val = INTERNAL_SYSCALL_CALL (tgkill, pid, tid, signo);
+	  val = (INTERNAL_SYSCALL_ERROR_P (val)
+		 ? INTERNAL_SYSCALL_ERRNO (val) : 0);
+	}
+
+      /* A value 3 before the update indicates that the thread is
+	 exiting, and this is the last remaining concurrent
+	 pthread_kill operation.  */
+      if (atomic_fetch_add_acq_rel (&pd->exit_signal_futex, -2) == 3)
+	futex_wake (&pd->exit_signal_futex, 1, FUTEX_PRIVATE);
     }
   else
     /* The kernel reports that the thread has exited.  POSIX specifies
diff --git a/support/Makefile b/support/Makefile
index a462781718..ef2b1a980a 100644
--- a/support/Makefile
+++ b/support/Makefile
@@ -82,9 +82,10 @@  libsupport-routines = \
   support_test_compare_blob \
   support_test_compare_failure \
   support_test_compare_string \
-  support_write_file_string \
   support_test_main \
   support_test_verify_impl \
+  support_wait_for_thread_exit \
+  support_write_file_string \
   temp_file \
   timespec \
   timespec-time64 \
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
index dedfa0d290..48dba717a1 100644
--- a/sysdeps/pthread/Makefile
+++ b/sysdeps/pthread/Makefile
@@ -119,7 +119,9 @@  tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \
 	 tst-unwind-thread \
 	 tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \
 	 tst-pthread_cancel-exited \
+	 tst-pthread_cancel-select-loop \
 	 tst-pthread_kill-exited \
+	 tst-pthread_kill-exiting \
 	 # tests
 
 tests-time64 := \
diff --git a/sysdeps/pthread/tst-kill4.c b/sysdeps/pthread/tst-kill4.c
deleted file mode 100644
index 9563939792..0000000000
--- a/sysdeps/pthread/tst-kill4.c
+++ /dev/null
@@ -1,90 +0,0 @@ 
-/* Copyright (C) 2003-2021 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Ulrich Drepper <drepper@redhat.com>, 2003.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <https://www.gnu.org/licenses/>.  */
-
-#include <errno.h>
-#include <pthread.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-
-static void *
-tf (void *a)
-{
-  return NULL;
-}
-
-
-int
-do_test (void)
-{
-  pthread_attr_t at;
-  if (pthread_attr_init (&at) != 0)
-    {
-      puts ("attr_create failed");
-      exit (1);
-    }
-
-  /* Limit thread stack size, because if it is too large, pthread_join
-     will free it immediately rather than put it into stack cache.  */
-  if (pthread_attr_setstacksize (&at, 2 * 1024 * 1024) != 0)
-    {
-      puts ("setstacksize failed");
-      exit (1);
-    }
-
-  pthread_t th;
-  if (pthread_create (&th, &at, tf, NULL) != 0)
-    {
-      puts ("create failed");
-      exit (1);
-    }
-
-  pthread_attr_destroy (&at);
-
-  if (pthread_join (th, NULL) != 0)
-    {
-      puts ("join failed");
-      exit (1);
-    }
-
-  /* The following only works because we assume here something about
-     the implementation.  Namely, that the memory allocated for the
-     thread descriptor is not going away, that the TID field is
-     cleared and therefore the signal is sent to process 0, and that
-     we can savely assume there is no other process with this ID at
-     that time.  */
-  int e = pthread_kill (th, 0);
-  if (e == 0)
-    {
-      puts ("pthread_kill succeeded");
-      exit (1);
-    }
-  if (e != ESRCH)
-    {
-      puts ("pthread_kill didn't return ESRCH");
-      exit (1);
-    }
-
-  return 0;
-}
-
-
-#define TEST_FUNCTION do_test ()
-#include "../test-skeleton.c"
diff --git a/sysdeps/pthread/tst-pthread_cancel-select-loop.c b/sysdeps/pthread/tst-pthread_cancel-select-loop.c
new file mode 100644
index 0000000000..a62087589c
--- /dev/null
+++ b/sysdeps/pthread/tst-pthread_cancel-select-loop.c
@@ -0,0 +1,87 @@ 
+/* Test that pthread_cancel succeeds during thread exit.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This test tries to trigger an internal race condition in
+   pthread_cancel, where the cancellation signal is sent after the
+   thread has begun the cancellation process.  This can result in a
+   spurious ESRCH error.  For the original bug 12889, the window is
+   quite small, so the bug was not reproduced in every run.  */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <support/check.h>
+#include <support/xthread.h>
+#include <support/xunistd.h>
+#include <sys/select.h>
+#include <unistd.h>
+
+/* Set to true by timeout_thread_function when the test should
+   terminate.  */
+static bool timeout;
+
+static void *
+timeout_thread_function (void *unused)
+{
+  usleep (5 * 1000 * 1000);
+  __atomic_store_n (&timeout, true, __ATOMIC_RELAXED);
+  return NULL;
+}
+
+/* Used for blocking the select function below.  */
+static int pipe_fds[2];
+
+static void *
+canceled_thread_function (void *unused)
+{
+  while (true)
+    {
+      fd_set rfs;
+      fd_set wfs;
+      fd_set efs;
+      FD_ZERO (&rfs);
+      FD_ZERO (&wfs);
+      FD_ZERO (&efs);
+      FD_SET (pipe_fds[0], &rfs);
+
+      /* If the cancellation request is recognized early, the thread
+         begins exiting while the cancellation signal arrives.  */
+      select (FD_SETSIZE, &rfs, &wfs, &efs, NULL);
+    }
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  xpipe (pipe_fds);
+  pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL);
+
+  while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED))
+    {
+      pthread_t thr = xpthread_create (NULL, canceled_thread_function, NULL);
+      xpthread_cancel (thr);
+      TEST_VERIFY (xpthread_join (thr) == PTHREAD_CANCELED);
+    }
+
+  xpthread_join (thr_timeout);
+  xclose (pipe_fds[0]);
+  xclose (pipe_fds[1]);
+  return 0;
+}
+
+#include <support/test-driver.c>
diff --git a/sysdeps/pthread/tst-pthread_kill-exiting.c b/sysdeps/pthread/tst-pthread_kill-exiting.c
new file mode 100644
index 0000000000..15550d2310
--- /dev/null
+++ b/sysdeps/pthread/tst-pthread_kill-exiting.c
@@ -0,0 +1,110 @@ 
+/* Test that pthread_kill succeeds during thread exit.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+/* This test verifies that pthread_kill for a thread that is exiting
+   succeeds (with or without actually delivering the signal).  */
+
+#include <array_length.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <support/xsignal.h>
+#include <support/xthread.h>
+#include <unistd.h>
+
+/* Set to true by timeout_thread_function when the test should
+   terminate.  */
+static bool timeout;
+
+static void *
+timeout_thread_function (void *unused)
+{
+  usleep (1000 * 1000);
+  __atomic_store_n (&timeout, true, __ATOMIC_RELAXED);
+  return NULL;
+}
+
+/* Used to synchronize the sending threads with the target thread and
+   main thread.  */
+static pthread_barrier_t barrier_1;
+static pthread_barrier_t barrier_2;
+
+/* The target thread to which signals are to be sent.  */
+static pthread_t target_thread;
+
+static void *
+sender_thread_function (void *unused)
+{
+  while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED))
+    {
+      /* Wait until target_thread has been initialized.  The target
+         thread and main thread participate in this barrier.  */
+      xpthread_barrier_wait (&barrier_1);
+
+      xpthread_kill (target_thread, SIGUSR1);
+
+      /* Communicate that the signal has been sent.  The main thread
+         participates in this barrier.  */
+      xpthread_barrier_wait (&barrier_2);
+    }
+  return NULL;
+}
+
+static void *
+target_thread_function (void *unused)
+{
+  target_thread = pthread_self ();
+  xpthread_barrier_wait (&barrier_1);
+  return NULL;
+}
+
+static int
+do_test (void)
+{
+  xsignal (SIGUSR1, SIG_IGN);
+
+  pthread_t thr_timeout = xpthread_create (NULL, timeout_thread_function, NULL);
+
+  pthread_t threads[4];
+  xpthread_barrier_init (&barrier_1, NULL, array_length (threads) + 2);
+  xpthread_barrier_init (&barrier_2, NULL, array_length (threads) + 1);
+
+  for (int i = 0; i < array_length (threads); ++i)
+    threads[i] = xpthread_create (NULL, sender_thread_function, NULL);
+
+  while (!__atomic_load_n (&timeout, __ATOMIC_RELAXED))
+    {
+      xpthread_create (NULL, target_thread_function, NULL);
+
+      /* Wait for the target thread to be set up and signal sending to
+         start.  */
+      xpthread_barrier_wait (&barrier_1);
+
+      /* Wait for signal sending to complete.  */
+      xpthread_barrier_wait (&barrier_2);
+
+      xpthread_join (target_thread);
+    }
+
+  for (int i = 0; i < array_length (threads); ++i)
+    xpthread_join (threads[i]);
+  xpthread_join (thr_timeout);
+
+  return 0;
+}
+
+#include <support/test-driver.c>