stdlib: Fix data race in __run_exit_handlers
Commit Message
Read f->func.cxa under the lock.
There is a clear data race:
thread 0: __run_exit_handlers unlock __exit_funcs_lock
thread 1: __internal_atexit locks __exit_funcs_lock
thread 0: f->flavor = ef_free;
thread 1: sees ef_free and use it as new
thread 1: new->func.cxa.fn = (void (*) (void *, int)) func;
thread 1: new->func.cxa.arg = arg;
thread 1: new->flavor = ef_cxa;
thread 0: cxafct = f->func.cxa.fn; // it's wrong fn!
thread 0: cxafct (f->func.cxa.arg, status); // it's wrong arg!
thread 0: goto restart;
thread 0: call the same exit_function again as it's ef_cxa
It's incredibly rare so the test without patch fails only about 10%.
---
stdlib/Makefile | 4 +-
stdlib/exit.c | 20 +++++---
stdlib/test-cxa_atexit-race2.c | 94 ++++++++++++++++++++++++++++++++++
3 files changed, 111 insertions(+), 7 deletions(-)
create mode 100644 stdlib/test-cxa_atexit-race2.c
Comments
On Sun, Sep 20, 2020 at 5:10 AM Vitaly Buka via Libc-alpha
<libc-alpha@sourceware.org> wrote:
> +static void *
> +threadfunc (void *unused)
> +{
> + for (; done < 1e6;)
> + {
> + if (added < done + 100)
> + {
> + __cxa_atexit (&atexitcb, (void *)(++added), __dso_handle);
Isn't there a data race on "added" here (in addition to a data race on "done")?
What prevents two threads from observing "added == 100" at the same
time and adding two calls with value of 101, which would later trigger
abort() in exitcb()?
> + /* With default 8MiB Linux stack size, creating 1024 threads can cause
> + VM exhausiton on 32-bit machines. Reduce stack size of each thread to
> + 128KiB for a maximum required VM size of 128MiB. */
This comment is far removed from the computation of kStacksize (and
the name violates the naming conventions used here).
I suggest:
size_t stack_size = 128 << 10; /* 128KiB */
if (stack_size < PTHREAD_STACK_MIN) stack_size = PTHREAD_STACK_MIN;
Also, I suspect that 32KiB would be more than enough for stack size here.
> + for (i = 0; i < kNumThreads; ++i)
Since kNumThreads isn't used anywhere else, I suggest making it a local:
const int num_threads = 50;
On Sun, 20 Sep 2020 at 13:42, Paul Pluzhnikov <ppluzhnikov@google.com>
wrote:
> On Sun, Sep 20, 2020 at 5:10 AM Vitaly Buka via Libc-alpha
> <libc-alpha@sourceware.org> wrote:
>
> > +static void *
> > +threadfunc (void *unused)
> > +{
> > + for (; done < 1e6;)
> > + {
> > + if (added < done + 100)
> > + {
> > + __cxa_atexit (&atexitcb, (void *)(++added), __dso_handle);
>
> Isn't there a data race on "added" here (in addition to a data race on
> "done")?
> What prevents two threads from observing "added == 100" at the same
> time and adding two calls with value of 101, which would later trigger
> abort() in exitcb()?
>
They are atomic. Isn't (++added) guarantee to return different values in
all threads?
>
> > + /* With default 8MiB Linux stack size, creating 1024 threads can cause
> > + VM exhausiton on 32-bit machines. Reduce stack size of each
> thread to
> > + 128KiB for a maximum required VM size of 128MiB. */
>
> This comment is far removed from the computation of kStacksize (and
> the name violates the naming conventions used here).
>
> I suggest:
>
> size_t stack_size = 128 << 10; /* 128KiB */
> if (stack_size < PTHREAD_STACK_MIN) stack_size = PTHREAD_STACK_MIN;
>
> Also, I suspect that 32KiB would be more than enough for stack size here.
>
> > + for (i = 0; i < kNumThreads; ++i)
>
> Since kNumThreads isn't used anywhere else, I suggest making it a local:
>
> const int num_threads = 50;
>
> --
> Paul Pluzhnikov
>
Oh, this atomics are not what I thought. So the test reproduced bugs in the
test.
I tried different approaches and I failed to reproduce the data race.
On Sun, 20 Sep 2020 at 14:26, Vitaly Buka <vitalybuka@google.com> wrote:
>
>
> On Sun, 20 Sep 2020 at 13:42, Paul Pluzhnikov <ppluzhnikov@google.com>
> wrote:
>
>> On Sun, Sep 20, 2020 at 5:10 AM Vitaly Buka via Libc-alpha
>> <libc-alpha@sourceware.org> wrote:
>>
>> > +static void *
>> > +threadfunc (void *unused)
>> > +{
>> > + for (; done < 1e6;)
>> > + {
>> > + if (added < done + 100)
>> > + {
>> > + __cxa_atexit (&atexitcb, (void *)(++added), __dso_handle);
>>
>> Isn't there a data race on "added" here (in addition to a data race on
>> "done")?
>> What prevents two threads from observing "added == 100" at the same
>> time and adding two calls with value of 101, which would later trigger
>> abort() in exitcb()?
>>
>
> They are atomic. Isn't (++added) guarantee to return different values in
> all threads?
>
>
>>
>> > + /* With default 8MiB Linux stack size, creating 1024 threads can
>> cause
>> > + VM exhausiton on 32-bit machines. Reduce stack size of each
>> thread to
>> > + 128KiB for a maximum required VM size of 128MiB. */
>>
>> This comment is far removed from the computation of kStacksize (and
>> the name violates the naming conventions used here).
>>
>> I suggest:
>>
>> size_t stack_size = 128 << 10; /* 128KiB */
>> if (stack_size < PTHREAD_STACK_MIN) stack_size = PTHREAD_STACK_MIN;
>>
>> Also, I suspect that 32KiB would be more than enough for stack size here.
>>
>> > + for (i = 0; i < kNumThreads; ++i)
>>
>> Since kNumThreads isn't used anywhere else, I suggest making it a local:
>>
>> const int num_threads = 50;
>>
>> --
>> Paul Pluzhnikov
>>
>
@@ -82,7 +82,8 @@ tests := tst-strtol tst-strtod testmb testrand testsort testdiv \
tst-width-stdint tst-strfrom tst-strfrom-locale \
tst-getrandom tst-atexit tst-at_quick_exit \
tst-cxa_atexit tst-on_exit test-atexit-race \
- test-at_quick_exit-race test-cxa_atexit-race \
+ test-at_quick_exit-race test-cxa_atexit-race \
+ test-cxa_atexit-race2 \
test-on_exit-race test-dlclose-exit-race \
tst-makecontext-align test-bz22786 tst-strtod-nan-sign \
tst-swapcontext1 tst-setcontext4 tst-setcontext5 \
@@ -101,6 +102,7 @@ endif
LDLIBS-test-atexit-race = $(shared-thread-library)
LDLIBS-test-at_quick_exit-race = $(shared-thread-library)
LDLIBS-test-cxa_atexit-race = $(shared-thread-library)
+LDLIBS-test-cxa_atexit-race2 = $(shared-thread-library)
LDLIBS-test-on_exit-race = $(shared-thread-library)
LDLIBS-test-dlclose-exit-race = $(shared-thread-library) $(libdl)
@@ -72,44 +72,52 @@ __run_exit_handlers (int status, struct exit_function_list **listp,
struct exit_function *const f = &cur->fns[--cur->idx];
const uint64_t new_exitfn_called = __new_exitfn_called;
- /* Unlock the list while we call a foreign function. */
- __libc_lock_unlock (__exit_funcs_lock);
switch (f->flavor)
{
void (*atfct) (void);
void (*onfct) (int status, void *arg);
void (*cxafct) (void *arg, int status);
+ void *arg;
case ef_free:
case ef_us:
break;
case ef_on:
onfct = f->func.on.fn;
+ arg = f->func.on.arg;
+ /* Unlock the list while we call a foreign function. */
+ __libc_lock_unlock (__exit_funcs_lock);
#ifdef PTR_DEMANGLE
PTR_DEMANGLE (onfct);
#endif
- onfct (status, f->func.on.arg);
+ onfct (status, arg);
+ __libc_lock_lock (__exit_funcs_lock);
break;
case ef_at:
atfct = f->func.at;
+ /* Unlock the list while we call a foreign function. */
+ __libc_lock_unlock (__exit_funcs_lock);
#ifdef PTR_DEMANGLE
PTR_DEMANGLE (atfct);
#endif
atfct ();
+ __libc_lock_lock (__exit_funcs_lock);
break;
case ef_cxa:
/* To avoid dlclose/exit race calling cxafct twice (BZ 22180),
we must mark this function as ef_free. */
f->flavor = ef_free;
cxafct = f->func.cxa.fn;
+ arg = f->func.cxa.arg;
+ /* Unlock the list while we call a foreign function. */
+ __libc_lock_unlock (__exit_funcs_lock);
#ifdef PTR_DEMANGLE
PTR_DEMANGLE (cxafct);
#endif
- cxafct (f->func.cxa.arg, status);
+ cxafct (arg, status);
+ __libc_lock_lock (__exit_funcs_lock);
break;
}
- /* Re-lock again before looking at global state. */
- __libc_lock_lock (__exit_funcs_lock);
if (__glibc_unlikely (new_exitfn_called != __new_exitfn_called))
/* The last exit function, or another thread, has registered
new file mode 100644
@@ -0,0 +1,94 @@
+/* Support file for atexit/exit, etc. race tests.
+ Copyright (C) 2017-2020 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+/* This file must be run from within a directory called "stdlib". */
+
+/* The atexit/exit, at_quick_exit/quick_exit, __cxa_atexit/exit, etc.
+ exhibited data race while accessing destructor function list (Bug 14333).
+
+ This test spawns large number of threads, and check the same
+ desctructor is not called more then once. */
+
+#include <limits.h>
+#include <stdatomic.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <support/xthread.h>
+
+extern void *__dso_handle;
+
+const size_t kNumThreads = 50;
+const size_t kStacksize =
+#ifdef PTHREAD_STACK_MIN
+ 0x20000 < PTHREAD_STACK_MIN ? PTHREAD_STACK_MIN :
+#endif
+ 0x20000;
+
+static atomic_intptr_t added;
+static atomic_intptr_t done;
+static void *prev;
+
+static void
+atexitcb (void *arg)
+{
+ if (arg == prev)
+ abort ();
+ prev = arg;
+ ++done;
+}
+
+int __cxa_atexit (void (*func) (void *), void *arg, void *d);
+
+static void *
+threadfunc (void *unused)
+{
+ for (; done < 1e6;)
+ {
+ if (added < done + 100)
+ {
+ __cxa_atexit (&atexitcb, (void *)(++added), __dso_handle);
+ }
+ }
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ size_t i;
+ pthread_attr_t attr;
+
+ xpthread_attr_init (&attr);
+ xpthread_attr_setdetachstate (&attr, 1);
+
+ /* With default 8MiB Linux stack size, creating 1024 threads can cause
+ VM exhausiton on 32-bit machines. Reduce stack size of each thread to
+ 128KiB for a maximum required VM size of 128MiB. */
+ xpthread_attr_setstacksize (&attr, kStacksize);
+
+ for (i = 0; i < kNumThreads; ++i)
+ {
+ xpthread_create (&attr, threadfunc, NULL);
+ }
+ xpthread_attr_destroy (&attr);
+
+ exit (0);
+}
+
+#define TEST_FUNCTION do_test
+#include <support/test-driver.c>