[COMMITTED] NaCl: Make thread exit wake pthread_join.

Message ID 20150528224149.504A72C3B00@topped-with-meat.com
State Committed
Headers

Commit Message

Roland McGrath May 28, 2015, 10:41 p.m. UTC
  * sysdeps/nacl/exit-thread.h (__exit_thread): If not detached,
	set THREAD_SELF->tid to a magic value and futex-wake it.
	Pass its address to the thread_exit system call.
	* sysdeps/nacl/pthread-pids.h (__nacl_get_tid): Assert that TID's low
	bit is clear.
	* sysdeps/nacl/lowlevellock.h: New file.
	* sysdeps/nacl/lll_timedwait_tid.c: New file.
  

Comments

Torvald Riegel May 29, 2015, 9:19 a.m. UTC | #1
On Thu, 2015-05-28 at 15:41 -0700, Roland McGrath wrote:
> 	* sysdeps/nacl/exit-thread.h (__exit_thread): If not detached,
> 	set THREAD_SELF->tid to a magic value and futex-wake it.
> 	Pass its address to the thread_exit system call.
> 	* sysdeps/nacl/pthread-pids.h (__nacl_get_tid): Assert that TID's low
> 	bit is clear.
> 	* sysdeps/nacl/lowlevellock.h: New file.
> 	* sysdeps/nacl/lll_timedwait_tid.c: New file.
> 
> diff --git a/sysdeps/nacl/exit-thread.h b/sysdeps/nacl/exit-thread.h
> index a08a5b1..c809405 100644
> --- a/sysdeps/nacl/exit-thread.h
> +++ b/sysdeps/nacl/exit-thread.h
> @@ -16,8 +16,11 @@
>     License along with the GNU C Library; if not, see
>     <http://www.gnu.org/licenses/>.  */
>  
> -#include <stddef.h>
> +#include <assert.h>
> +#include <atomic.h>
> +#include <lowlevellock.h>
>  #include <nacl-interfaces.h>
> +#include <nptl/pthreadP.h>
>  
>  /* This causes the current thread to exit, without affecting other
>     threads in the process if there are any.  If there are no other
> @@ -26,7 +29,49 @@
>  static inline void __attribute__ ((noreturn, always_inline, unused))
>  __exit_thread (void)
>  {
> -  __nacl_irt_thread.thread_exit (NULL);
> +  struct pthread *pd = THREAD_SELF;
> +
> +  /* The generic logic for pthread_join and stack/descriptor reuse is
> +     based on the Linux kernel feature that will clear and futex-wake
> +     a designated address as a final part of thread teardown.  Correct
> +     synchronization relies on the fact that these happen only after
> +     there is no possibility of user code touching or examining the
> +     late thread's stack.
> +
> +     The NaCl system interface implements half of this: it clears a
> +     word after the thread's user stack is safely dead, but it does
> +     not futex-wake the location.  So, some shenanigans are required.
> +     We change and futex-wake the location here, so as to wake up any
> +     blocked pthread_join (i.e. lll_wait_tid) or pthread_timedjoin_np
> +     (i.e. lll_timedwait_tid).  However, that's before we have safely
> +     vacated the stack.  So instead of clearing the location, we set
> +     it to a special magic value, NACL_EXITING_TID.  This counts as a
> +     "live thread" value for all the generic logic, but is recognized
> +     specially in lll_wait_tid and lll_timedwait_tid (lowlevellock.h).
> +     Once it has this value, lll_wait_tid will busy-wait for the
> +     location to be cleared to zero by the NaCl system code.  Only then
> +     is the stack actually safe to reuse.  */
> +
> +  if (!IS_DETACHED (pd))
> +    {
> +      /* The magic value must not be one that could ever be a valid
> +	 TID value.  See pthread-pids.h about the low bit.  */
> +      assert (NACL_EXITING_TID & 1);
> +
> +      /* The magic value must not be one that has the "free" flag
> +	 (i.e. sign bit) set.  If that bit is set, then the
> +	 descriptor could be reused for a new thread.  */
> +      assert (NACL_EXITING_TID > 0);
> +
> +      atomic_store_relaxed (&pd->tid, NACL_EXITING_TID);

Maybe add a comment why the relaxed store is sufficient here (it's just
a flag for busy-waiting, it doesn't imply a happens-before for any other
state change).  This may be different from the store of the final value,
for which I'm wondering whether it needs to be a release MO store.  See
below for comments on the (acquire?) load side of this.

> +      lll_futex_wake (&pd->tid, 1, LLL_PRIVATE);
> +    }
> +
> +  /* This clears PD->tid some time after the thread stack can never
> +     be touched again.  Unfortunately, it does not also do a
> +     futex-wake at that time (as Linux does via CLONE_CHILD_CLEARTID
> +     and set_tid_address).  So lll_wait_tid does some busy-waiting.  */
> +  __nacl_irt_thread.thread_exit (&pd->tid);
>  
>    /* That never returns unless something is severely and unrecoverably wrong.
>       If it ever does, try to make sure we crash.  */
> diff --git a/sysdeps/nacl/lll_timedwait_tid.c b/sysdeps/nacl/lll_timedwait_tid.c
> new file mode 100644
> index 0000000..ecaf0b1
> --- /dev/null
> +++ b/sysdeps/nacl/lll_timedwait_tid.c
> @@ -0,0 +1,61 @@
> +/* Timed waiting for thread death.  NaCl version.
> +   Copyright (C) 2015 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#include <assert.h>
> +#include <atomic.h>
> +#include <errno.h>
> +#include <lowlevellock.h>
> +#include <sys/time.h>
> +
> +int
> +__lll_timedwait_tid (int *tidp, const struct timespec *abstime)
> +{
> +  /* Reject invalid timeouts.  */
> +  if (__glibc_unlikely (abstime->tv_nsec < 0)
> +      || __glibc_unlikely (abstime->tv_nsec >= 1000000000))
> +    return EINVAL;
> +
> +  /* Repeat until thread terminated.  */
> +  int tid;
> +  while ((tid = atomic_load_relaxed (tidp)) != 0)

See below.

> +    {
> +      /* See exit-thread.h for details.  */
> +      if (tid == NACL_EXITING_TID)
> +	/* The thread should now be in the process of exiting, so it will
> +	   finish quick enough that the timeout doesn't matter.  If any
> +	   thread ever stays in this state for long, there is something
> +	   catastrophically wrong.  */
> +	BUSY_WAIT_NOP;
> +      else
> +	{
> +	  assert (tid > 0);
> +
> +	  /* If *FUTEX == TID, wait until woken or timeout.  */
> +	  int err = __nacl_irt_futex.futex_wait_abs ((volatile int *) tidp,
> +						     tid, abstime);
> +	  if (err != 0)
> +	    {
> +	      if (__glibc_likely (err == ETIMEDOUT))
> +		return err;
> +	      assert (err == EAGAIN);
> +	    }
> +	}
> +    }
> +
> +  return 0;
> +}
> diff --git a/sysdeps/nacl/lowlevellock.h b/sysdeps/nacl/lowlevellock.h
> new file mode 100644
> index 0000000..0b85d8d
> --- /dev/null
> +++ b/sysdeps/nacl/lowlevellock.h
> @@ -0,0 +1,45 @@
> +/* Low-level lock implementation.  NaCl version.
> +   Copyright (C) 2015 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library.  If not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef _LOWLEVELLOCK_H
> +
> +/* Everything except the exit handling is the same as the generic code.  */
> +# include <sysdeps/nptl/lowlevellock.h>
> +
> +# ifndef BUSY_WAIT_NOP
> +#  define BUSY_WAIT_NOP		__sync_synchronize ()
> +# endif

Are you sure that adding a barrier is the right thing to do here
(especially one of the __sync variety that we want to get rid off
eventually)?  Technically, you don't need the barrier.  Other archs use
"pause" code sequences.

> +/* See exit-thread.h for details.  */
> +# define NACL_EXITING_TID	1
> +
> +# undef lll_wait_tid
> +# define lll_wait_tid(tid)				\
> +  do {							\
> +    __typeof (tid) __tid;				\
> +    volatile __typeof (tid) *__tidp = &(tid);		\
> +    while ((__tid = atomic_load_relaxed (__tidp)) != 0) \

I'm aware we use a relaxed load in the generic version as well but I'm
wondering why that is actually sufficient and we don't need an acquire
load.  pthread_join does seem to access values provided by the finished
thread (eg, load pd->result).

I vaguely remember thinking about that issue when we changed the
documentation of lll_wait_tid -- but I can't find any notes or emails
about it.

Do you have a reasoning why it's sufficient, or is this something we
need to dig up / come up with?

> +      {							\
> +	if (__tid == NACL_EXITING_TID)			\
> +	  BUSY_WAIT_NOP;				\
> +	else						\
> +	  lll_futex_wait (__tidp, __tid, LLL_PRIVATE);	\
> +      }							\
> +  } while (0)
> +
> +#endif	/* lowlevellock.h */
  
Roland McGrath June 10, 2015, 1:28 a.m. UTC | #2
Torvald, thanks very much for reviewing this NaCl-specific code!  I
very much appreciate your help with the concurrency issues (and any
other comments you or other folks might have).  It's above and beyond
the call of duty for anybody else to worry about the NaCl code, so I
consider it a personal favor to me and am grateful.

> Maybe add a comment why the relaxed store is sufficient here (it's just
> a flag for busy-waiting, it doesn't imply a happens-before for any other
> state change).  This may be different from the store of the final value,
> for which I'm wondering whether it needs to be a release MO store.  See
> below for comments on the (acquire?) load side of this.

Frankly I'm not all sure that it actually is sufficient.  

This code is modelled after an old glibc fork modified for NaCl (that
was never anywhere near fit for going upstream).  In that version the
field had been changed to be volatile and this was just a simple load.
I knew it didn't make any sense to use volatile, and started out just
using atomic_forced_read as the equivalent.  Then I decided that, in
keeping with new best practices, I should use something in the C11
vocabulary instead, and atomic_load_relaxed is the thing from that set
that is actually the same as atomic_forced_read in practice.  But I
was consciously glossing over the real memory-model issues to just get
something working already to meet a personal deadline I'd set for
myself.  However, that old model both was not at all necessarily
right, and was only used in production on x86, where you can get away
with a lot in practice.

> > +# ifndef BUSY_WAIT_NOP
> > +#  define BUSY_WAIT_NOP		__sync_synchronize ()
> > +# endif
> 
> Are you sure that adding a barrier is the right thing to do here
> (especially one of the __sync variety that we want to get rid off
> eventually)?  Technically, you don't need the barrier.  Other archs use
> "pause" code sequences.

A barrier is certainly not the right thing in any general sense.  This
is the generic fallback for when the machine-specific file doesn't
define anything.  On x86, it's defined to the pause instruction.  I
saw that the Linux kernel's equivalent macro cpu_relax() is defined to
smp_mb() on some machines that don't have an ideal instruction for
this, and decided __sync_synchronize was close enough to that to be a
reasonable fallback choice and at least better than nothing.  Perhaps
it's not really better than just a pure compiler barrier, which is
what Linux's cpu_relax() does on many other configurations.

What I'd really like to see is the sometimes-defined macro replaced
with an always-defined macro (or inline) in a sysdeps header for
purely machine-specific things (perhaps just its own tiny header).
Then the sysdeps/generic/ definition would be whatever consensus of
those wiser than me says is the most useful generic fallback, and of
course each machine maintainer can supply the optimal
machine-dependent thing if there is one.

> > +/* See exit-thread.h for details.  */
> > +# define NACL_EXITING_TID	1
> > +
> > +# undef lll_wait_tid
> > +# define lll_wait_tid(tid)				\
> > +  do {							\
> > +    __typeof (tid) __tid;				\
> > +    volatile __typeof (tid) *__tidp = &(tid);		\
> > +    while ((__tid = atomic_load_relaxed (__tidp)) != 0) \
> 
> I'm aware we use a relaxed load in the generic version as well but I'm
> wondering why that is actually sufficient and we don't need an acquire
> load.  pthread_join does seem to access values provided by the finished
> thread (eg, load pd->result).
> 
> I vaguely remember thinking about that issue when we changed the
> documentation of lll_wait_tid -- but I can't find any notes or emails
> about it.
> 
> Do you have a reasoning why it's sufficient, or is this something we
> need to dig up / come up with?

Nope, I have no reasons. :-)

It's quite plausible to me that there is something else done before
pthread_join's load and/or after after where the pthread_create.c code
(START_THREAD_DEFN) sets the value that constitutes a sufficient
barrier.  But if so, it is implicit and not explained in any comment.
So at the very least we need more comments, and it seems likely we do
actually need more barriers.

For example, it's not clear to me what barriers are implied by the
futex operations.  The Linux CLONE_THREAD_CLEARTID behavior is that
the exiting thread implicitly does a FUTEX_WAKE call, so if that
constitutes a necessary barrier then it should already be sufficient
(on Linux).  But we should be thoroughly explicit about such
expectations.  I imagine that is something you will document properly
in the new internal futex API, and that documentation can say how
__exit_thread is required to interact.

The actual NaCl picture is probably even worse.  The clearing done in
the system code does not use any kind of explicit barrier or atomic
operation.  The documentation (such as it is) for the system's
thread-exit call does not say anything about barrier semantics at all.
In practice it probably does what matters, because it incidentally
acquires and releases some internal locks both before and after
clearing the word.  But I would certainly like to get some clear
documentation on what memory-model semantics are provided by Linux's
CLONE_THREAD_CLEARTID and are required by libpthread.  (Then we'll
probably just try to make the NaCl system semantics match that
explicitly, modulo the actual futex-waking part.)


Thanks,
Roland
  

Patch

diff --git a/sysdeps/nacl/exit-thread.h b/sysdeps/nacl/exit-thread.h
index a08a5b1..c809405 100644
--- a/sysdeps/nacl/exit-thread.h
+++ b/sysdeps/nacl/exit-thread.h
@@ -16,8 +16,11 @@ 
    License along with the GNU C Library; if not, see
    <http://www.gnu.org/licenses/>.  */
 
-#include <stddef.h>
+#include <assert.h>
+#include <atomic.h>
+#include <lowlevellock.h>
 #include <nacl-interfaces.h>
+#include <nptl/pthreadP.h>
 
 /* This causes the current thread to exit, without affecting other
    threads in the process if there are any.  If there are no other
@@ -26,7 +29,49 @@ 
 static inline void __attribute__ ((noreturn, always_inline, unused))
 __exit_thread (void)
 {
-  __nacl_irt_thread.thread_exit (NULL);
+  struct pthread *pd = THREAD_SELF;
+
+  /* The generic logic for pthread_join and stack/descriptor reuse is
+     based on the Linux kernel feature that will clear and futex-wake
+     a designated address as a final part of thread teardown.  Correct
+     synchronization relies on the fact that these happen only after
+     there is no possibility of user code touching or examining the
+     late thread's stack.
+
+     The NaCl system interface implements half of this: it clears a
+     word after the thread's user stack is safely dead, but it does
+     not futex-wake the location.  So, some shenanigans are required.
+     We change and futex-wake the location here, so as to wake up any
+     blocked pthread_join (i.e. lll_wait_tid) or pthread_timedjoin_np
+     (i.e. lll_timedwait_tid).  However, that's before we have safely
+     vacated the stack.  So instead of clearing the location, we set
+     it to a special magic value, NACL_EXITING_TID.  This counts as a
+     "live thread" value for all the generic logic, but is recognized
+     specially in lll_wait_tid and lll_timedwait_tid (lowlevellock.h).
+     Once it has this value, lll_wait_tid will busy-wait for the
+     location to be cleared to zero by the NaCl system code.  Only then
+     is the stack actually safe to reuse.  */
+
+  if (!IS_DETACHED (pd))
+    {
+      /* The magic value must not be one that could ever be a valid
+	 TID value.  See pthread-pids.h about the low bit.  */
+      assert (NACL_EXITING_TID & 1);
+
+      /* The magic value must not be one that has the "free" flag
+	 (i.e. sign bit) set.  If that bit is set, then the
+	 descriptor could be reused for a new thread.  */
+      assert (NACL_EXITING_TID > 0);
+
+      atomic_store_relaxed (&pd->tid, NACL_EXITING_TID);
+      lll_futex_wake (&pd->tid, 1, LLL_PRIVATE);
+    }
+
+  /* This clears PD->tid some time after the thread stack can never
+     be touched again.  Unfortunately, it does not also do a
+     futex-wake at that time (as Linux does via CLONE_CHILD_CLEARTID
+     and set_tid_address).  So lll_wait_tid does some busy-waiting.  */
+  __nacl_irt_thread.thread_exit (&pd->tid);
 
   /* That never returns unless something is severely and unrecoverably wrong.
      If it ever does, try to make sure we crash.  */
diff --git a/sysdeps/nacl/lll_timedwait_tid.c b/sysdeps/nacl/lll_timedwait_tid.c
new file mode 100644
index 0000000..ecaf0b1
--- /dev/null
+++ b/sysdeps/nacl/lll_timedwait_tid.c
@@ -0,0 +1,61 @@ 
+/* Timed waiting for thread death.  NaCl version.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <assert.h>
+#include <atomic.h>
+#include <errno.h>
+#include <lowlevellock.h>
+#include <sys/time.h>
+
+int
+__lll_timedwait_tid (int *tidp, const struct timespec *abstime)
+{
+  /* Reject invalid timeouts.  */
+  if (__glibc_unlikely (abstime->tv_nsec < 0)
+      || __glibc_unlikely (abstime->tv_nsec >= 1000000000))
+    return EINVAL;
+
+  /* Repeat until thread terminated.  */
+  int tid;
+  while ((tid = atomic_load_relaxed (tidp)) != 0)
+    {
+      /* See exit-thread.h for details.  */
+      if (tid == NACL_EXITING_TID)
+	/* The thread should now be in the process of exiting, so it will
+	   finish quick enough that the timeout doesn't matter.  If any
+	   thread ever stays in this state for long, there is something
+	   catastrophically wrong.  */
+	BUSY_WAIT_NOP;
+      else
+	{
+	  assert (tid > 0);
+
+	  /* If *FUTEX == TID, wait until woken or timeout.  */
+	  int err = __nacl_irt_futex.futex_wait_abs ((volatile int *) tidp,
+						     tid, abstime);
+	  if (err != 0)
+	    {
+	      if (__glibc_likely (err == ETIMEDOUT))
+		return err;
+	      assert (err == EAGAIN);
+	    }
+	}
+    }
+
+  return 0;
+}
diff --git a/sysdeps/nacl/lowlevellock.h b/sysdeps/nacl/lowlevellock.h
new file mode 100644
index 0000000..0b85d8d
--- /dev/null
+++ b/sysdeps/nacl/lowlevellock.h
@@ -0,0 +1,45 @@ 
+/* Low-level lock implementation.  NaCl version.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _LOWLEVELLOCK_H
+
+/* Everything except the exit handling is the same as the generic code.  */
+# include <sysdeps/nptl/lowlevellock.h>
+
+# ifndef BUSY_WAIT_NOP
+#  define BUSY_WAIT_NOP		__sync_synchronize ()
+# endif
+
+/* See exit-thread.h for details.  */
+# define NACL_EXITING_TID	1
+
+# undef lll_wait_tid
+# define lll_wait_tid(tid)				\
+  do {							\
+    __typeof (tid) __tid;				\
+    volatile __typeof (tid) *__tidp = &(tid);		\
+    while ((__tid = atomic_load_relaxed (__tidp)) != 0) \
+      {							\
+	if (__tid == NACL_EXITING_TID)			\
+	  BUSY_WAIT_NOP;				\
+	else						\
+	  lll_futex_wait (__tidp, __tid, LLL_PRIVATE);	\
+      }							\
+  } while (0)
+
+#endif	/* lowlevellock.h */
diff --git a/sysdeps/nacl/pthread-pids.h b/sysdeps/nacl/pthread-pids.h
index ccb99d6..1589e5b 100644
--- a/sysdeps/nacl/pthread-pids.h
+++ b/sysdeps/nacl/pthread-pids.h
@@ -50,6 +50,9 @@  __nacl_get_tid (struct pthread *pd)
   assert ((id & 1) == 0);
   assert (sizeof id == sizeof tid);
   assert (tid > 0);
+  /* This ensures that NACL_EXITING_TID (lowlevellock.h) can never
+     be a valid TID value.  */
+  assert ((tid & 1) == 0);
   return tid;
 }