Make abort() AS-safe (Bug 26275).

Message ID 20200927141952.121047-1-carlos@redhat.com
State Rejected
Headers
Series Make abort() AS-safe (Bug 26275). |

Commit Message

Carlos O'Donell Sept. 27, 2020, 2:19 p.m. UTC
  As noted in bug 26275 the abort() function is not AS-safe and the
standard says it should be.

Upstream Rust complained about this:
https://github.com/rust-lang/rust/issues/73894#issuecomment-673478761

The fix is to take the abort lock in fork and then release it in the
child and parent. This assures that you have a consistent state for
the locks.

The test case is probabilistic and will only catch the defect when
run continuously for 100-600s.  The test only runs for 20s, and so
has a chance to detect the bug if it ever comes back again.  The
fork and abort interactions are tested for livelock in the test.

The manual is updated to indicate that abort is officially considered
to be AS-safe.

The supprot/timespec.* routines are updated to help provide a pattern
that supports running given operations for a certain amount of time
e.g. TIMESPEC_BEFORE().
---
 include/stdlib.h                 |   6 +-
 manual/startup.texi              |   5 +-
 stdlib/Makefile                  |   4 +-
 stdlib/abort.c                   |  11 ++
 stdlib/tst-threaded-fork-abort.c | 168 +++++++++++++++++++++++++++++++
 support/timespec.c               |  23 ++---
 support/timespec.h               |   5 +
 sysdeps/nptl/fork.c              |  11 ++
 8 files changed, 215 insertions(+), 18 deletions(-)
 create mode 100644 stdlib/tst-threaded-fork-abort.c
  

Comments

Florian Weimer Sept. 27, 2020, 8:04 p.m. UTC | #1
* Carlos O'Donell via Libc-alpha:

> diff --git a/stdlib/Makefile b/stdlib/Makefile
> index 4615f6dfe7..cd470e53ac 100644
> --- a/stdlib/Makefile
> +++ b/stdlib/Makefile
> @@ -87,7 +87,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
>  		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
>  		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
>  		   tst-setcontext6 tst-setcontext7 tst-setcontext8 \
> -		   tst-setcontext9 tst-bz20544
> +		   tst-setcontext9 tst-bz20544 tst-threaded-fork-abort \
>  
>  tests-internal	:= tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
>  		   tst-tls-atexit tst-tls-atexit-nodelete
> @@ -243,3 +243,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
>  	$(evaluate-test)
>  
>  $(objpfx)tst-makecontext: $(libdl)
> +
> +LDLIBS-tst-threaded-fork-abort = $(shared-thread-library)

This should be a dependency, so that the test gets relinked if the
thread library chagnes.

> --- /dev/null
> +++ b/stdlib/tst-threaded-fork-abort.c

> +   On a x86_64 VM (4 vCPU @ 3.5GHz) the test fails once every 100-600
> +   seconds when run continuosly in a loop.  We want this test to fail
> +   with a probability of say 10% so develpers see it at least once in
> +   a one or two week development period, or at least once during the
> +   release window. To achieve that we run for at least 20 seconds,
> +   and set the timeout at 30 seconds (gives time for a last iteration
> +   to complete).  */

Please make it an xtest.  I don't think the test is valuable enough to
run continuously during development, given its overhead.

> diff --git a/support/timespec.h b/support/timespec.h
> index 1a6775a882..280844f2c0 100644
> --- a/support/timespec.h
> +++ b/support/timespec.h
> @@ -55,6 +55,11 @@ struct timespec support_timespec_normalize (struct timespec time);
>  int support_timespec_check_in_range (struct timespec expected, struct timespec observed,
>  				  double lower_bound, double upper_bound);
>  
> +/* True if left is before right, false otherwise.  */
> +#define TIMESPEC_BEFORE(left, right) \
> +  ((left.tv_sec < right.tv_sec)					\
> +   || (left.tv_sec == right.tv_sec				\
> +       && left.tv_nsec < right.tv_nsec))

Should this be an inline function?

>  /* Check that the timespec on the left represents a time before the
>     time on the right. */
> diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
> index 5091a000e3..8fdc6589e5 100644
> --- a/sysdeps/nptl/fork.c
> +++ b/sysdeps/nptl/fork.c
> @@ -66,6 +66,11 @@ __libc_fork (void)
>      {
>        _IO_list_lock ();
>  
> +      /* Acquire the abort lock.  We need to prevent other threads
> +	 from aborting while we fork.  We must keep abort AS-safe
> +	 and to do that we need to own the lock in the child.  */
> +      __abort_fork_lock ();
> +
>        /* Acquire malloc locks.  This needs to come last because fork
>  	 handlers may use malloc, and the libio list lock has an
>  	 indirect malloc dependency as well (via the getdelim

Is this ordering really correct?  I think this will result in
deadlocks if another thread detects heap corruption and calls abort:
the forking thead acquires the abort lock and then the malloc locks,
while the other thread acquires one malloc lock and then the abort
lock.
  
Rich Felker Sept. 28, 2020, 11:48 p.m. UTC | #2
On Sun, Sep 27, 2020 at 10:04:02PM +0200, Florian Weimer wrote:
> * Carlos O'Donell via Libc-alpha:
> 
> > diff --git a/stdlib/Makefile b/stdlib/Makefile
> > index 4615f6dfe7..cd470e53ac 100644
> > --- a/stdlib/Makefile
> > +++ b/stdlib/Makefile
> > @@ -87,7 +87,7 @@ tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
> >  		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
> >  		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
> >  		   tst-setcontext6 tst-setcontext7 tst-setcontext8 \
> > -		   tst-setcontext9 tst-bz20544
> > +		   tst-setcontext9 tst-bz20544 tst-threaded-fork-abort \
> >  
> >  tests-internal	:= tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
> >  		   tst-tls-atexit tst-tls-atexit-nodelete
> > @@ -243,3 +243,5 @@ $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
> >  	$(evaluate-test)
> >  
> >  $(objpfx)tst-makecontext: $(libdl)
> > +
> > +LDLIBS-tst-threaded-fork-abort = $(shared-thread-library)
> 
> This should be a dependency, so that the test gets relinked if the
> thread library chagnes.
> 
> > --- /dev/null
> > +++ b/stdlib/tst-threaded-fork-abort.c
> 
> > +   On a x86_64 VM (4 vCPU @ 3.5GHz) the test fails once every 100-600
> > +   seconds when run continuosly in a loop.  We want this test to fail
> > +   with a probability of say 10% so develpers see it at least once in
> > +   a one or two week development period, or at least once during the
> > +   release window. To achieve that we run for at least 20 seconds,
> > +   and set the timeout at 30 seconds (gives time for a last iteration
> > +   to complete).  */
> 
> Please make it an xtest.  I don't think the test is valuable enough to
> run continuously during development, given its overhead.
> 
> > diff --git a/support/timespec.h b/support/timespec.h
> > index 1a6775a882..280844f2c0 100644
> > --- a/support/timespec.h
> > +++ b/support/timespec.h
> > @@ -55,6 +55,11 @@ struct timespec support_timespec_normalize (struct timespec time);
> >  int support_timespec_check_in_range (struct timespec expected, struct timespec observed,
> >  				  double lower_bound, double upper_bound);
> >  
> > +/* True if left is before right, false otherwise.  */
> > +#define TIMESPEC_BEFORE(left, right) \
> > +  ((left.tv_sec < right.tv_sec)					\
> > +   || (left.tv_sec == right.tv_sec				\
> > +       && left.tv_nsec < right.tv_nsec))
> 
> Should this be an inline function?
> 
> >  /* Check that the timespec on the left represents a time before the
> >     time on the right. */
> > diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
> > index 5091a000e3..8fdc6589e5 100644
> > --- a/sysdeps/nptl/fork.c
> > +++ b/sysdeps/nptl/fork.c
> > @@ -66,6 +66,11 @@ __libc_fork (void)
> >      {
> >        _IO_list_lock ();
> >  
> > +      /* Acquire the abort lock.  We need to prevent other threads
> > +	 from aborting while we fork.  We must keep abort AS-safe
> > +	 and to do that we need to own the lock in the child.  */
> > +      __abort_fork_lock ();
> > +
> >        /* Acquire malloc locks.  This needs to come last because fork
> >  	 handlers may use malloc, and the libio list lock has an
> >  	 indirect malloc dependency as well (via the getdelim
> 
> Is this ordering really correct?  I think this will result in
> deadlocks if another thread detects heap corruption and calls abort:
> the forking thead acquires the abort lock and then the malloc locks,
> while the other thread acquires one malloc lock and then the abort
> lock.

Is there a reason to take the lock across fork rather than just
resetting it in the child? After seeing this I'm working on fixing the
same issue in musl and was about to take the lock, but realized ours
isn't actually protecting any userspace data state, just excluding
sigaction on SIGABRT during abort.

Rich
  
Florian Weimer Sept. 29, 2020, 6:54 a.m. UTC | #3
* Rich Felker:

> Is there a reason to take the lock across fork rather than just
> resetting it in the child? After seeing this I'm working on fixing the
> same issue in musl and was about to take the lock, but realized ours
> isn't actually protecting any userspace data state, just excluding
> sigaction on SIGABRT during abort.

It's also necessary to stop the fork because the subprocess could
otherwise observe the impossible SIG_DFL state.  In case the signal
handler returns, the implementation needs to produce a termination
status with SIGABRT as the termination signal, and the only way I can
see to achieve that is to remove the signal handler and send the
signal again.  This suggests that a lock in sigaction is needed as
well.

But for the fork case, restting the lock in the new subprocess should
be sufficient.
  
Rich Felker Sept. 29, 2020, 2:42 p.m. UTC | #4
On Tue, Sep 29, 2020 at 08:54:59AM +0200, Florian Weimer wrote:
> * Rich Felker:
> 
> > Is there a reason to take the lock across fork rather than just
> > resetting it in the child? After seeing this I'm working on fixing the
> > same issue in musl and was about to take the lock, but realized ours
> > isn't actually protecting any userspace data state, just excluding
> > sigaction on SIGABRT during abort.
> 
> It's also necessary to stop the fork because the subprocess could
> otherwise observe the impossible SIG_DFL state.  In case the signal
> handler returns, the implementation needs to produce a termination
> status with SIGABRT as the termination signal, and the only way I can
> see to achieve that is to remove the signal handler and send the
> signal again.  This suggests that a lock in sigaction is needed as
> well.

Yes, in musl we already have the lock in sigaction -- that's the whole
point of the lock. To prevent other threads from fighting to change
the disposition back to SIG_IGN or a signal handler while abort is
trying to change it to SIG_DFL.

> But for the fork case, restting the lock in the new subprocess should
> be sufficient.

I don't follow. Do you mean taking the lock in the parent, but just
resetting it in the child? That should work but I don't see how it has
any advantage over just releasing it in the child.

Rich
  
Rich Felker Oct. 1, 2020, 2:30 a.m. UTC | #5
On Tue, Sep 29, 2020 at 10:42:07AM -0400, Rich Felker wrote:
> On Tue, Sep 29, 2020 at 08:54:59AM +0200, Florian Weimer wrote:
> > * Rich Felker:
> > 
> > > Is there a reason to take the lock across fork rather than just
> > > resetting it in the child? After seeing this I'm working on fixing the
> > > same issue in musl and was about to take the lock, but realized ours
> > > isn't actually protecting any userspace data state, just excluding
> > > sigaction on SIGABRT during abort.
> > 
> > It's also necessary to stop the fork because the subprocess could
> > otherwise observe the impossible SIG_DFL state.  In case the signal
> > handler returns, the implementation needs to produce a termination
> > status with SIGABRT as the termination signal, and the only way I can
> > see to achieve that is to remove the signal handler and send the
> > signal again.  This suggests that a lock in sigaction is needed as
> > well.
> 
> Yes, in musl we already have the lock in sigaction -- that's the whole
> point of the lock. To prevent other threads from fighting to change
> the disposition back to SIG_IGN or a signal handler while abort is
> trying to change it to SIG_DFL.
> 
> > But for the fork case, restting the lock in the new subprocess should
> > be sufficient.
> 
> I don't follow. Do you mean taking the lock in the parent, but just
> resetting it in the child? That should work but I don't see how it has
> any advantage over just releasing it in the child.

OK, this is a lot worse than you thought:

Even without fork, execve and posix_spawn can also see the SIGABRT
disposition change made by abort(), passing it on to a process that
should have started with a disposition of SIG_IGN if you hit exactly
the wrong spot in the race.

So, to fix this, these interfaces also have to take the abort lock,
and to make it AS-safe (since execve is required to be), need to block
all signals to take the lock. But execve can't leave signals blocked
or the new process image would inherit that state. So it has to
unblock them after taking the lock. But then a signal handler can
interrupt between taking the lock and the execve syscall, making abort
deadlock if called from the signal handler.

So how to solve this? Having the abort lock be recursive sounds like
it helps (avoid the deadlock above), but then the signal handler that
runs between taking the abort lock and making the execve syscall still
delays abort by other threads for an unbounded length of time, and in
fact it could even longjmp out, leaving a stale lock owner that
prevents any other thread from ever calling abort. Ultimately this
boils down to a general principle: you can't make AS-safe locks that
allow arbitrary application code to run while they're held.

I really don't see any way out without giving abort a mechanism to
"seize" other threads before changing the signal disposition. This
could for example be done with the same mechanism used for
multithreaded set*id (broadcast signal of an implementation-internal,
unblockable signal) or maybe with some seccomp hacks on a recent
enough kernel. Is there some better approach I'm missing??

All of this hell because Linux thought we didn't need a SYS_abort...

Rich
  
Florian Weimer Oct. 1, 2020, 6:08 a.m. UTC | #6
* Rich Felker:

> Even without fork, execve and posix_spawn can also see the SIGABRT
> disposition change made by abort(), passing it on to a process that
> should have started with a disposition of SIG_IGN if you hit exactly
> the wrong spot in the race.

My feeling is that it's not worth bothering with this kind of leakage.
We've had this bug forever in glibc, and no one has complained about
it.

Carlos is investigating removal of the abort lock from glibc, I think.
  
Rich Felker Oct. 1, 2020, 2:39 p.m. UTC | #7
On Thu, Oct 01, 2020 at 08:08:24AM +0200, Florian Weimer wrote:
> * Rich Felker:
> 
> > Even without fork, execve and posix_spawn can also see the SIGABRT
> > disposition change made by abort(), passing it on to a process that
> > should have started with a disposition of SIG_IGN if you hit exactly
> > the wrong spot in the race.
> 
> My feeling is that it's not worth bothering with this kind of leakage.
> We've had this bug forever in glibc, and no one has complained about
> it.
> 
> Carlos is investigating removal of the abort lock from glibc, I think.

I don't think that's a good solution. The lock is really important in
that it protects against serious wrong behavior *within the process*
like an application-installed signal handler for SIGABRT getting
called more than once. The worst outcome of the exec issue discussed
here (and similar for fork) is simply the wrong disposition (SIG_DFL
rather than SIG_IGN) getting passed on to the new process image -- and
it's very rare to actually want SIG_IGN to be inherited. Undoing an
important fix for wrong code execution just because it's an incomplete
fix for wrong disposition inheritance would be bad.

If we want to solve this without relying on broadcast/seizure of all
threads, it looks like it may be a tradeoff between the subtly wrong
behavior for inheritance of SIG_IGN, and subtle wrongness with respect
to POSIX requirements on termination status from abort. It's possible
to just probe whether the old status was SIG_IGN; if not, there's no
issue with the current approach of changing it to SIG_DFL. But if it
was SIG_IGN, you can sacrifice the desired SIGABRT termination and get
termination by SIGSEGV or SIGILL or SIGBUS (still abnormal status,
still generating a core, just not the right signal number) just by
leaving signals masked and executing an instruction that will fault in
one of these ways.

FWIW musl "already does this" (attempting to terminate via a_crash()
with signals still masked) if the SIGABRT after resetting its
disposition somehow fails to terminate the process. It then falls back
to raising SIGKILL, SYS_exit_group, SYS_exit, and for(;;), so we'd get
this behavior automatically just by skipping the sigaction if the old
disposition was SIG_IGN.

Rich
  
Carlos O'Donell Oct. 1, 2020, 2:49 p.m. UTC | #8
On 10/1/20 2:08 AM, Florian Weimer wrote:
> * Rich Felker:
> 
>> Even without fork, execve and posix_spawn can also see the SIGABRT
>> disposition change made by abort(), passing it on to a process that
>> should have started with a disposition of SIG_IGN if you hit exactly
>> the wrong spot in the race.
> 
> My feeling is that it's not worth bothering with this kind of leakage.
> We've had this bug forever in glibc, and no one has complained about
> it.
> 
> Carlos is investigating removal of the abort lock from glibc, I think.
 
I am investigating the removal, but I think the replacement solution
might be needing to have a helper thread carry out specific tasks.

Like Rich I'm still a little worried about the other cases that the
lock is intended to fix.
  
Rich Felker Oct. 1, 2020, 2:55 p.m. UTC | #9
On Thu, Oct 01, 2020 at 10:49:42AM -0400, Carlos O'Donell wrote:
> On 10/1/20 2:08 AM, Florian Weimer wrote:
> > * Rich Felker:
> > 
> >> Even without fork, execve and posix_spawn can also see the SIGABRT
> >> disposition change made by abort(), passing it on to a process that
> >> should have started with a disposition of SIG_IGN if you hit exactly
> >> the wrong spot in the race.
> > 
> > My feeling is that it's not worth bothering with this kind of leakage.
> > We've had this bug forever in glibc, and no one has complained about
> > it.
> > 
> > Carlos is investigating removal of the abort lock from glibc, I think.
>  
> I am investigating the removal, but I think the replacement solution
> might be needing to have a helper thread carry out specific tasks.

I'm confused what a helper thread could achieve here. The underlying
problem is that the kernel forces CLONE_SIGHAND on threads (EINVAL
without it) so that the disposition can't be changed in a thread-local
manner. Any new thread would have that same issue. It also would not
be something you could reliably create at abort time (especially since
abort is most often used on resource exhaustion and other unexpected
failures).

Rich
  
Florian Weimer Oct. 1, 2020, 3:11 p.m. UTC | #10
* Rich Felker:

> On Thu, Oct 01, 2020 at 08:08:24AM +0200, Florian Weimer wrote:
>> * Rich Felker:
>> 
>> > Even without fork, execve and posix_spawn can also see the SIGABRT
>> > disposition change made by abort(), passing it on to a process that
>> > should have started with a disposition of SIG_IGN if you hit exactly
>> > the wrong spot in the race.
>> 
>> My feeling is that it's not worth bothering with this kind of leakage.
>> We've had this bug forever in glibc, and no one has complained about
>> it.
>> 
>> Carlos is investigating removal of the abort lock from glibc, I think.
>
> I don't think that's a good solution. The lock is really important in
> that it protects against serious wrong behavior *within the process*
> like an application-installed signal handler for SIGABRT getting
> called more than once.

I think glibc currently has this bug.  We only avoid it for abort, but
I'm not sure if it's a bug to handle the handler multiple times if abort
is called more than once.

But even for the more general case (threads call sigaction to install a
SIGABRT handler): Do we actually need a lock there?  We reach this state
only after raise (SIGABRT) has returned.  At this point, we can set a
flag (not a lock), and every other thread that calls signal or sigaction
would instead perform the late-stage SIG_DFL-for-SIGABRT part of abort?
It probably still needs some fiddling with sigprocmask.

Thanks,
Florian
  
Rich Felker Oct. 1, 2020, 3:28 p.m. UTC | #11
On Thu, Oct 01, 2020 at 05:11:19PM +0200, Florian Weimer wrote:
> * Rich Felker:
> 
> > On Thu, Oct 01, 2020 at 08:08:24AM +0200, Florian Weimer wrote:
> >> * Rich Felker:
> >> 
> >> > Even without fork, execve and posix_spawn can also see the SIGABRT
> >> > disposition change made by abort(), passing it on to a process that
> >> > should have started with a disposition of SIG_IGN if you hit exactly
> >> > the wrong spot in the race.
> >> 
> >> My feeling is that it's not worth bothering with this kind of leakage.
> >> We've had this bug forever in glibc, and no one has complained about
> >> it.
> >> 
> >> Carlos is investigating removal of the abort lock from glibc, I think.
> >
> > I don't think that's a good solution. The lock is really important in
> > that it protects against serious wrong behavior *within the process*
> > like an application-installed signal handler for SIGABRT getting
> > called more than once.
> 
> I think glibc currently has this bug.  We only avoid it for abort, but
> I'm not sure if it's a bug to handle the handler multiple times if abort
> is called more than once.

I don't see anything in the spec that allows for the signal handler to
be called multiple times. The signal is raised (thereby following
normal semantics for if/how signal handler runs), and if a handler
runs and returns, the process is then required to terminate abnormally
as if by SIGABRT. This isn't a license to execute the signal handler
again or do other random observable things.

> But even for the more general case (threads call sigaction to install a
> SIGABRT handler): Do we actually need a lock there?  We reach this state
> only after raise (SIGABRT) has returned.  At this point, we can set a
> flag (not a lock), and every other thread that calls signal or sigaction
> would instead perform the late-stage SIG_DFL-for-SIGABRT part of abort?
> It probably still needs some fiddling with sigprocmask.

There's a race between checking the flag and acting on it. If thread A
has already called signal(SIGABRT,foo) and gotten past the "are we
aborting?" check, then thread B calls abort(), thread A can reset the
disposition of SIGABRT to foo after thread B sets it to SIG_DFL, but
before thread B re-raises, unblocks, and acts on the signal.

Rich
  
Rich Felker Oct. 10, 2020, 12:26 a.m. UTC | #12
On Wed, Sep 30, 2020 at 10:30:19PM -0400, Rich Felker wrote:
> On Tue, Sep 29, 2020 at 10:42:07AM -0400, Rich Felker wrote:
> > On Tue, Sep 29, 2020 at 08:54:59AM +0200, Florian Weimer wrote:
> > > * Rich Felker:
> > > 
> > > > Is there a reason to take the lock across fork rather than just
> > > > resetting it in the child? After seeing this I'm working on fixing the
> > > > same issue in musl and was about to take the lock, but realized ours
> > > > isn't actually protecting any userspace data state, just excluding
> > > > sigaction on SIGABRT during abort.
> > > 
> > > It's also necessary to stop the fork because the subprocess could
> > > otherwise observe the impossible SIG_DFL state.  In case the signal
> > > handler returns, the implementation needs to produce a termination
> > > status with SIGABRT as the termination signal, and the only way I can
> > > see to achieve that is to remove the signal handler and send the
> > > signal again.  This suggests that a lock in sigaction is needed as
> > > well.
> > 
> > Yes, in musl we already have the lock in sigaction -- that's the whole
> > point of the lock. To prevent other threads from fighting to change
> > the disposition back to SIG_IGN or a signal handler while abort is
> > trying to change it to SIG_DFL.
> > 
> > > But for the fork case, restting the lock in the new subprocess should
> > > be sufficient.
> > 
> > I don't follow. Do you mean taking the lock in the parent, but just
> > resetting it in the child? That should work but I don't see how it has
> > any advantage over just releasing it in the child.
> 
> OK, this is a lot worse than you thought:
> 
> Even without fork, execve and posix_spawn can also see the SIGABRT
> disposition change made by abort(), passing it on to a process that
> should have started with a disposition of SIG_IGN if you hit exactly
> the wrong spot in the race.
> 
> So, to fix this, these interfaces also have to take the abort lock,
> and to make it AS-safe (since execve is required to be), need to block
> all signals to take the lock. But execve can't leave signals blocked
> or the new process image would inherit that state. So it has to
> unblock them after taking the lock. But then a signal handler can
> interrupt between taking the lock and the execve syscall, making abort
> deadlock if called from the signal handler.
> 
> So how to solve this? Having the abort lock be recursive sounds like
> it helps (avoid the deadlock above), but then the signal handler that
> runs between taking the abort lock and making the execve syscall still
> delays abort by other threads for an unbounded length of time, and in
> fact it could even longjmp out, leaving a stale lock owner that
> prevents any other thread from ever calling abort. Ultimately this
> boils down to a general principle: you can't make AS-safe locks that
> allow arbitrary application code to run while they're held.
> 
> I really don't see any way out without giving abort a mechanism to
> "seize" other threads before changing the signal disposition. This
> could for example be done with the same mechanism used for
> multithreaded set*id (broadcast signal of an implementation-internal,
> unblockable signal) or maybe with some seccomp hacks on a recent
> enough kernel. Is there some better approach I'm missing??

There is something else I was missing, if we're willing to wrap signal
handlers -- something I've long suspected would be useful. The
primitive needed to solve this problem without a seize-all-threads
operation is restartable critical sections that restart if interrupted
by a signal, something like a "userspace EINTR". Here, any signal
arriving between execve taking the lock and making the execve syscall
would cause the lock to be released and the saved program counter to
revert to just before the lock was taken before the application's
signal handler runs.

How would such a thing be implemented? The wrapper for the signal
handler would examine TLS to observe that there's a restartable
critical section in progress, and modify the saved ucontext's
call-saved registers and uc_sigmask based on a sigjmp_buf saved before
the restartable section. It would then reverse the lock (probably via
a function pointer setup as part of the critical section) and
tail-call to the real signal handler. On return, the kernel then
automatically returns to just before the critical section, and the
lock is re-acquired before moving forward again.

Among other things, this allows implementing arbitrary "atomic unmask
signals and make syscall" operations (ala pselect) without need for
the kernel's help. It could also be used as the main ingredient of an
alternate thread cancellation design.

Is this a good idea? I don't know. But at least it means there seem to
be two (or more) possible solutions available.

Rich
  

Patch

diff --git a/include/stdlib.h b/include/stdlib.h
index ffcefd7b85..a2431f9139 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -315,6 +315,10 @@  extern __typeof (unsetenv) unsetenv attribute_hidden;
 extern __typeof (__strtoul_internal) __strtoul_internal attribute_hidden;
 # endif
 
-#endif
+/* Coordinate locks with fork to make abort AS-safe.  */
+void __abort_fork_lock (void) attribute_hidden;
+void __abort_fork_unlock (void) attribute_hidden;
+
+#endif /* !defined _ISOMAC */
 
 #endif  /* include/stdlib.h */
diff --git a/manual/startup.texi b/manual/startup.texi
index 21c48cd037..e38cdc0e28 100644
--- a/manual/startup.texi
+++ b/manual/startup.texi
@@ -992,10 +992,7 @@  for this function is in @file{stdlib.h}.
 
 @deftypefun void abort (void)
 @standards{ISO, stdlib.h}
-@safety{@prelim{}@mtsafe{}@asunsafe{@asucorrupt{}}@acunsafe{@aculock{} @acucorrupt{}}}
-@c The implementation takes a recursive lock and attempts to support
-@c calls from signal handlers, but if we're in the middle of flushing or
-@c using streams, we may encounter them in inconsistent states.
+@safety{@prelim{}@mtsafe{}@assafe{}@acunsafe{@aculock{} @acucorrupt{}}}
 The @code{abort} function causes abnormal program termination.  This
 does not execute cleanup functions registered with @code{atexit} or
 @code{on_exit}.
diff --git a/stdlib/Makefile b/stdlib/Makefile
index 4615f6dfe7..cd470e53ac 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -87,7 +87,7 @@  tests		:= tst-strtol tst-strtod testmb testrand testsort testdiv   \
 		   tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
 		   tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
 		   tst-setcontext6 tst-setcontext7 tst-setcontext8 \
-		   tst-setcontext9 tst-bz20544
+		   tst-setcontext9 tst-bz20544 tst-threaded-fork-abort \
 
 tests-internal	:= tst-strtod1i tst-strtod3 tst-strtod4 tst-strtod5i \
 		   tst-tls-atexit tst-tls-atexit-nodelete
@@ -243,3 +243,5 @@  $(objpfx)tst-setcontext3.out: tst-setcontext3.sh $(objpfx)tst-setcontext3
 	$(evaluate-test)
 
 $(objpfx)tst-makecontext: $(libdl)
+
+LDLIBS-tst-threaded-fork-abort = $(shared-thread-library)
diff --git a/stdlib/abort.c b/stdlib/abort.c
index df98782dd7..e7f74319d6 100644
--- a/stdlib/abort.c
+++ b/stdlib/abort.c
@@ -42,6 +42,17 @@  static int stage;
 /* We should be prepared for multiple threads trying to run abort.  */
 __libc_lock_define_initialized_recursive (static, lock);
 
+void
+__abort_fork_lock (void)
+{
+  __libc_lock_lock_recursive (lock);
+}
+
+void
+__abort_fork_unlock (void)
+{
+  __libc_lock_unlock_recursive (lock);
+}
 
 /* Cause an abnormal program termination with core-dump.  */
 void
diff --git a/stdlib/tst-threaded-fork-abort.c b/stdlib/tst-threaded-fork-abort.c
new file mode 100644
index 0000000000..67f0b09086
--- /dev/null
+++ b/stdlib/tst-threaded-fork-abort.c
@@ -0,0 +1,168 @@ 
+/* Verify abort is AS-safe (Bug 26275).
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <support/xthread.h>
+#include <support/xsignal.h>
+#include <support/xunistd.h>
+#include <support/support.h>
+#include <support/test-driver.h>
+#include <support/timespec.h>
+#include <support/capture_subprocess.h>
+
+static void
+handle_abort (int sig)
+{
+  int wstatus;
+  pid_t ret;
+  /* The parent has called abort, and now we wait on the child to call
+     abort.  If the child never calls abort because the abort is live
+     locked on the abort lock then the test times out with an error.
+     If the child calls abort we see that, capture it here, and exit
+     the test successfully.  We loop because we might have called abort
+     before the child was created and so we have to try again.  */
+  do
+    {
+      ret = wait (&wstatus);
+    }
+  while (ret == -1);
+
+  /* The child must have exited with SIGABRT or we fail the test.  */
+  if (WIFSIGNALED (wstatus))
+    if (WTERMSIG (wstatus) == SIGABRT)
+      _exit (0);
+
+  /* The child exit condition is not what we expected.  */
+  _exit (1);
+}
+
+static void *
+thread_abort (void *closure)
+{
+  pthread_barrier_t *barrier = (pthread_barrier_t *) closure;
+  xpthread_barrier_wait (barrier);
+  /* Attempt to abort.  */
+  abort ();
+  return NULL;
+}
+
+static void *
+thread_fork (void *closure)
+{
+  pthread_barrier_t *barrier = (pthread_barrier_t *) closure;
+  pid_t child;
+
+  /* Bug 26275: In the child the abort hangs.  This allows the parent
+     to wait for the child termination in its own SIGABRT handler.
+     We want to fork just after the aborting thread has taken the
+     abort handler lock, but we can't always time it right.  */
+  xpthread_barrier_wait (barrier);
+  child = xfork ();
+
+  if (child == 0)
+    {
+      /* Reset default disposition for SIGABRT so our own abort will
+	 terminate the child.  */
+      signal (SIGABRT, SIG_DFL);
+      /* Without the fix this hangs because the forked child observes
+	 the abort lock as held.  */
+      abort ();
+    }
+
+  return NULL;
+}
+
+/* The goal of this test is to call fork from thread B while thread A is
+   in the process of aborting.  Bug 26275 shows that if thread A is
+   holding the abort lock while thread B forks then thread B will be
+   unable to abort (livelock on the abort lock).  The fix is for
+   thread B to take the abort lock during fork setup, and then release
+   it after the fork (in both child and parent).  This test is
+   probabilistic and may not trigger the bug, but we run for long enough
+   that any issues during this abort/fork sequencing should show up in
+   long-term testing as this test failing every once in a while.
+
+   On a x86_64 VM (4 vCPU @ 3.5GHz) the test fails once every 100-600
+   seconds when run continuosly in a loop.  We want this test to fail
+   with a probability of say 10% so develpers see it at least once in
+   a one or two week development period, or at least once during the
+   release window. To achieve that we run for at least 20 seconds,
+   and set the timeout at 30 seconds (gives time for a last iteration
+   to complete).  */
+static void
+test_fork_abort (void *closure __attribute__ ((unused)))
+{
+  struct sigaction sact;
+  struct sigaction oact;
+  pthread_barrier_t barrier;
+
+  /* Wait for both threads to get in the right spot.  */
+  xpthread_barrier_init (&barrier, NULL, 2);
+
+  /* Handle the abort.  */
+  sact.sa_handler = handle_abort;
+  xsigaction (SIGABRT, &sact, &oact);
+
+  /* Start the abort thread.  */
+  xpthread_create (NULL, thread_abort, &barrier);
+
+  /* Start the fork thread.  */
+  xpthread_create (NULL, thread_fork, &barrier);
+
+  /* Wait indefinately.  We exit via the abort handler.  */
+  while (1)
+    pause ();
+}
+
+static int
+do_test (void)
+{
+  struct support_capture_subprocess result;
+  struct timespec before, after, running, end;
+
+  /* Total runtime is 20 seconds.  */
+  end = make_timespec (20, 0);
+
+  /* Measure start time.  */
+  xclock_gettime (CLOCK_MONOTONIC, &before);
+
+  /* Run test for 20 seconds.  */
+  do
+    {
+      result = support_capture_subprocess (test_fork_abort, NULL);
+      support_capture_subprocess_check (&result, "test_fork_abort", 0,
+					sc_allow_none);
+      xclock_gettime (CLOCK_MONOTONIC, &after);
+      running = timespec_sub (support_timespec_normalize (after),
+			      support_timespec_normalize (before));
+    }
+  while (TIMESPEC_BEFORE (running, end));
+
+  /* Success.  */
+  return 0;
+}
+
+#define TIMEOUT 30
+#define TEST_FUNCTION do_test
+#include <support/test-driver.c>
diff --git a/support/timespec.c b/support/timespec.c
index edbdb165ec..3d2cdfca5b 100644
--- a/support/timespec.c
+++ b/support/timespec.c
@@ -27,18 +27,17 @@  test_timespec_before_impl (const char *file, int line,
 			   const struct timespec left,
 			   const struct timespec right)
 {
-  if (left.tv_sec > right.tv_sec
-      || (left.tv_sec == right.tv_sec
-	  && left.tv_nsec > right.tv_nsec)) {
-    support_record_failure ();
-    const struct timespec diff = timespec_sub (left, right);
-    printf ("%s:%d: %jd.%09jds not before %jd.%09jds "
-	    "(difference %jd.%09jds)\n",
-	    file, line,
-	    (intmax_t) left.tv_sec, (intmax_t) left.tv_nsec,
-	    (intmax_t) right.tv_sec, (intmax_t) right.tv_nsec,
-	    (intmax_t) diff.tv_sec, (intmax_t) diff.tv_nsec);
-  }
+  if (!TIMESPEC_BEFORE(left, right))
+    {
+      support_record_failure ();
+      const struct timespec diff = timespec_sub (left, right);
+      printf ("%s:%d: %jd.%09jds not before %jd.%09jds "
+	      "(difference %jd.%09jds)\n",
+	      file, line,
+	      (intmax_t) left.tv_sec, (intmax_t) left.tv_nsec,
+	      (intmax_t) right.tv_sec, (intmax_t) right.tv_nsec,
+	      (intmax_t) diff.tv_sec, (intmax_t) diff.tv_nsec);
+    }
 }
 
 void
diff --git a/support/timespec.h b/support/timespec.h
index 1a6775a882..280844f2c0 100644
--- a/support/timespec.h
+++ b/support/timespec.h
@@ -55,6 +55,11 @@  struct timespec support_timespec_normalize (struct timespec time);
 int support_timespec_check_in_range (struct timespec expected, struct timespec observed,
 				  double lower_bound, double upper_bound);
 
+/* True if left is before right, false otherwise.  */
+#define TIMESPEC_BEFORE(left, right) \
+  ((left.tv_sec < right.tv_sec)					\
+   || (left.tv_sec == right.tv_sec				\
+       && left.tv_nsec < right.tv_nsec))
 
 /* Check that the timespec on the left represents a time before the
    time on the right. */
diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
index 5091a000e3..8fdc6589e5 100644
--- a/sysdeps/nptl/fork.c
+++ b/sysdeps/nptl/fork.c
@@ -66,6 +66,11 @@  __libc_fork (void)
     {
       _IO_list_lock ();
 
+      /* Acquire the abort lock.  We need to prevent other threads
+	 from aborting while we fork.  We must keep abort AS-safe
+	 and to do that we need to own the lock in the child.  */
+      __abort_fork_lock ();
+
       /* Acquire malloc locks.  This needs to come last because fork
 	 handlers may use malloc, and the libio list lock has an
 	 indirect malloc dependency as well (via the getdelim
@@ -113,6 +118,9 @@  __libc_fork (void)
 	  /* Release malloc locks.  */
 	  call_function_static_weak (__malloc_fork_unlock_child);
 
+	  /* Release the abort lock.  */
+	  __abort_fork_unlock ();
+
 	  /* Reset the file list.  These are recursive mutexes.  */
 	  fresetlockfiles ();
 
@@ -134,6 +142,9 @@  __libc_fork (void)
 	  /* Release malloc locks, parent process variant.  */
 	  call_function_static_weak (__malloc_fork_unlock_parent);
 
+	  /* Release the abort lock.  */
+	  __abort_fork_unlock ();
+
 	  /* We execute this even if the 'fork' call failed.  */
 	  _IO_list_unlock ();
 	}