libstdc++: Fix std::barrier for constant initialization [PR118395]
Checks
Context |
Check |
Description |
linaro-tcwg-bot/tcwg_gcc_build--master-arm |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-arm |
success
|
Test passed
|
linaro-tcwg-bot/tcwg_gcc_build--master-aarch64 |
success
|
Build passed
|
linaro-tcwg-bot/tcwg_gcc_check--master-aarch64 |
success
|
Test passed
|
Commit Message
The std::barrier constructor should be constexpr, which means we need to
defer the dynamic allocation if the constructor is called during
constant-initialization. We can defer it to the first call to
barrier::arrive, using compare-and-swap on an atomic<T*> (instead of the
unique_ptr<T[]> currently used).
Also add precondition checks to the constructor and arrive member
function. Also implement the proposed resolution of LWG 3898.
libstdc++-v3/ChangeLog:
PR libstdc++/118395
PR libstdc++/108974
PR libstdc++/98749
* include/std/barrier (__tree_barrier): Use default
member-initializers. Change _M_state member from
unique_ptr<__state_t[]> to atomic<__state_t*>. Add
no_unique_address attribute to _M_completion.
(__tree_barrier::_M_arrive): Load value from _M_state.
(__tree_barrier::_M_invoke_completion): New member function to
ensure a throwing completion function will terminate, as
proposed in LWG 3898.
(__tree_barrier::max): Reduce by one to avoid overflow.
(__tree_barrier::__tree_barrier): Add constexpr. Qualify call to
std::move. Remove mem-initializers made unnecessary by default
member-initializers. Add precondition check. Only allocate state
array if not constant evaluated.
(__tree_barrier::arrive): Add precondition check. Do deferred
initialization of _M_state if needed.
(barrier): Add static_assert, as proposed in LWG 3898.
(barrier::barrier): Add constexpr.
* testsuite/30_threads/barrier/cons.cc: New test.
* testsuite/30_threads/barrier/lwg3898.cc: New test.
---
Tested x86_64-linux.
libstdc++-v3/include/std/barrier | 57 ++++++++++++++-----
.../testsuite/30_threads/barrier/cons.cc | 6 ++
.../testsuite/30_threads/barrier/lwg3898.cc | 45 +++++++++++++++
3 files changed, 93 insertions(+), 15 deletions(-)
create mode 100644 libstdc++-v3/testsuite/30_threads/barrier/cons.cc
create mode 100644 libstdc++-v3/testsuite/30_threads/barrier/lwg3898.cc
@@ -96,11 +96,11 @@ It looks different from literature pseudocode for two main reasons:
};
ptrdiff_t _M_expected;
- unique_ptr<__state_t[]> _M_state;
- __atomic_base<ptrdiff_t> _M_expected_adjustment;
- _CompletionF _M_completion;
+ __atomic_base<__state_t*> _M_state{nullptr};
+ __atomic_base<ptrdiff_t> _M_expected_adjustment{0};
+ [[no_unique_address]] _CompletionF _M_completion;
- alignas(__phase_alignment) __barrier_phase_t _M_phase;
+ alignas(__phase_alignment) __barrier_phase_t _M_phase{};
bool
_M_arrive(__barrier_phase_t __old_phase, size_t __current)
@@ -114,6 +114,8 @@ It looks different from literature pseudocode for two main reasons:
size_t __current_expected = _M_expected;
__current %= ((_M_expected + 1) >> 1);
+ __state_t* const __state = _M_state.load(memory_order_relaxed);
+
for (int __round = 0; ; ++__round)
{
if (__current_expected <= 1)
@@ -125,7 +127,7 @@ It looks different from literature pseudocode for two main reasons:
if (__current == __end_node)
__current = 0;
auto __expect = __old_phase;
- __atomic_phase_ref_t __phase(_M_state[__current]
+ __atomic_phase_ref_t __phase(__state[__current]
.__tickets[__round]);
if (__current == __last_node && (__current_expected & 1))
{
@@ -150,36 +152,59 @@ It looks different from literature pseudocode for two main reasons:
}
}
+ // _GLIBCXX_RESOLVE_LIB_DEFECTS
+ // 3898. Possibly unintended preconditions for completion functions
+ void _M_invoke_completion() noexcept { _M_completion(); }
+
public:
using arrival_token = __barrier_phase_t;
static constexpr ptrdiff_t
max() noexcept
- { return __PTRDIFF_MAX__; }
+ { return __PTRDIFF_MAX__ - 1; }
+ constexpr
__tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
- : _M_expected(__expected), _M_expected_adjustment(0),
- _M_completion(move(__completion)),
- _M_phase(static_cast<__barrier_phase_t>(0))
+ : _M_expected(__expected), _M_completion(std::move(__completion))
{
- size_t const __count = (_M_expected + 1) >> 1;
+ __glibcxx_assert(__expected >= 0 && __expected <= max());
- _M_state = std::make_unique<__state_t[]>(__count);
+ if (!std::is_constant_evaluated())
+ {
+ size_t const __count = (_M_expected + 1) >> 1;
+ _M_state.store(new __state_t[__count], memory_order_release);
+ }
}
[[nodiscard]] arrival_token
arrive(ptrdiff_t __update)
{
+ __glibcxx_assert(__update > 0);
+ // FIXME: Check that update is less than or equal to the expected count
+ // for the current barrier phase.
+
std::hash<std::thread::id> __hasher;
size_t __current = __hasher(std::this_thread::get_id());
__atomic_phase_ref_t __phase(_M_phase);
const auto __old_phase = __phase.load(memory_order_relaxed);
const auto __cur = static_cast<unsigned char>(__old_phase);
- for(; __update; --__update)
+
+ if (__cur == 0 && !_M_state.load(memory_order_relaxed)) [[unlikely]]
{
- if(_M_arrive(__old_phase, __current))
+ size_t const __count = (_M_expected + 1) >> 1;
+ auto __p = make_unique<__state_t[]>(__count);
+ __state_t* __val = nullptr;
+ if (_M_state.compare_exchange_strong(__val, __p.get(),
+ memory_order_seq_cst,
+ memory_order_acquire))
+ __p.release();
+ }
+
+ for (; __update; --__update)
+ {
+ if (_M_arrive(__old_phase, __current))
{
- _M_completion();
+ _M_invoke_completion();
_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
_M_expected_adjustment.store(0, memory_order_relaxed);
auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
@@ -208,6 +233,8 @@ It looks different from literature pseudocode for two main reasons:
template<typename _CompletionF = __empty_completion>
class barrier
{
+ static_assert(is_invocable_v<_CompletionF&>);
+
// Note, we may introduce a "central" barrier algorithm at some point
// for more space constrained targets
using __algorithm_t = __tree_barrier<_CompletionF>;
@@ -232,7 +259,7 @@ It looks different from literature pseudocode for two main reasons:
max() noexcept
{ return __algorithm_t::max(); }
- explicit
+ constexpr explicit
barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
: _M_b(__count, std::move(__completion))
{ }
new file mode 100644
@@ -0,0 +1,6 @@
+// { dg-do compile { target c++20 } }
+
+#include <barrier>
+
+// PR 118395 Constructor of std::barrier is not constexpr
+constinit std::barrier<> b(std::barrier<>::max());
new file mode 100644
@@ -0,0 +1,45 @@
+// { dg-do run { target c++20 } }
+// { dg-require-effective-target gthreads }
+
+#include <barrier>
+#include <exception>
+#include <cstdlib>
+#if !_GLIBCXX_USE_C99_STDLIB && defined _GLIBCXX_HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
+void handle_terminate()
+{
+#if _GLIBCXX_USE_C99_STDLIB
+ std::_Exit(0);
+#elif defined _GLIBCXX_HAVE_UNISTD_H
+ _exit(0);
+#else
+ std::exit(0);
+#endif
+}
+
+struct F
+{
+ void operator()()
+ {
+ std::set_terminate(handle_terminate);
+ throw 1;
+ }
+};
+
+void
+test_lwg3898()
+{
+ std::barrier<F> b(1, F{});
+ // This should call the terminate handler and exit with zero status:
+ b.arrive_and_wait();
+ // Should not reach here:
+ std::abort();
+}
+
+int
+main()
+{
+ test_lwg3898();
+}