llvm-project
308 строк · 10.8 Кб
1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14barrier synopsis
15
16namespace std
17{
18
19template<class CompletionFunction = see below>
20class barrier
21{
22public:
23using arrival_token = see below;
24
25static constexpr ptrdiff_t max() noexcept;
26
27constexpr explicit barrier(ptrdiff_t phase_count,
28CompletionFunction f = CompletionFunction());
29~barrier();
30
31barrier(const barrier&) = delete;
32barrier& operator=(const barrier&) = delete;
33
34[[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35void wait(arrival_token&& arrival) const;
36
37void arrive_and_wait();
38void arrive_and_drop();
39
40private:
41CompletionFunction completion; // exposition only
42};
43
44}
45
46*/
47
48#include <__config>
49
50#ifdef _LIBCPP_HAS_NO_THREADS
51# error "<barrier> is not supported since libc++ has been configured without support for threads."
52#endif
53
54#include <__assert>
55#include <__atomic/atomic_base.h>
56#include <__atomic/memory_order.h>
57#include <__memory/unique_ptr.h>
58#include <__thread/poll_with_backoff.h>
59#include <__thread/timed_backoff_policy.h>
60#include <__utility/move.h>
61#include <cstddef>
62#include <cstdint>
63#include <limits>
64#include <version>
65
66#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
67# pragma GCC system_header
68#endif
69
70_LIBCPP_PUSH_MACROS
71#include <__undef_macros>
72
73#if _LIBCPP_STD_VER >= 14
74
75_LIBCPP_BEGIN_NAMESPACE_STD
76
77struct __empty_completion {
78inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
79};
80
81# ifndef _LIBCPP_HAS_NO_TREE_BARRIER
82
83/*
84
85The default implementation of __barrier_base is a classic tree barrier.
86
87It looks different from literature pseudocode for two main reasons:
881. Threads that call into std::barrier functions do not provide indices,
89so a numbering step is added before the actual barrier algorithm,
90appearing as an N+1 round to the N rounds of the tree barrier.
912. A great deal of attention has been paid to avoid cache line thrashing
92by flattening the tree structure into cache-line sized arrays, that
93are indexed in an efficient way.
94
95*/
96
97using __barrier_phase_t = uint8_t;
98
99class __barrier_algorithm_base;
100
101_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
102__construct_barrier_algorithm_base(ptrdiff_t& __expected);
103
104_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
105__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept;
106
107_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
108__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) noexcept;
109
110template <class _CompletionF>
111class __barrier_base {
112ptrdiff_t __expected_;
113unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
114__atomic_base<ptrdiff_t> __expected_adjustment_;
115_CompletionF __completion_;
116__atomic_base<__barrier_phase_t> __phase_;
117
118public:
119using arrival_token = __barrier_phase_t;
120
121static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
122
123_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
124__barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
125: __expected_(__expected),
126__base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
127__expected_adjustment_(0),
128__completion_(std::move(__completion)),
129__phase_(0) {}
130_LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
131_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
132__update <= __expected_, "update is greater than the expected count for the current barrier phase");
133
134auto const __old_phase = __phase_.load(memory_order_relaxed);
135for (; __update; --__update)
136if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
137__completion_();
138__expected_ += __expected_adjustment_.load(memory_order_relaxed);
139__expected_adjustment_.store(0, memory_order_relaxed);
140__phase_.store(__old_phase + 2, memory_order_release);
141__phase_.notify_all();
142}
143return __old_phase;
144}
145_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
146auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
147std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
148}
149_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
150__expected_adjustment_.fetch_sub(1, memory_order_relaxed);
151(void)arrive(1);
152}
153};
154
155# else
156
157/*
158
159The alternative implementation of __barrier_base is a central barrier.
160
161Two versions of this algorithm are provided:
1621. A fairly straightforward implementation of the litterature for the
163general case where the completion function is not empty.
1642. An optimized implementation that exploits 2's complement arithmetic
165and well-defined overflow in atomic arithmetic, to handle the phase
166roll-over for free.
167
168*/
169
170template <class _CompletionF>
171class __barrier_base {
172__atomic_base<ptrdiff_t> __expected;
173__atomic_base<ptrdiff_t> __arrived;
174_CompletionF __completion;
175__atomic_base<bool> __phase;
176
177public:
178using arrival_token = bool;
179
180static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
181
182_LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
183: __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
184[[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
185auto const __old_phase = __phase.load(memory_order_relaxed);
186auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
187auto const new_expected = __expected.load(memory_order_relaxed);
188
189_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
190update <= new_expected, "update is greater than the expected count for the current barrier phase");
191
192if (0 == __result) {
193__completion();
194__arrived.store(new_expected, memory_order_relaxed);
195__phase.store(!__old_phase, memory_order_release);
196__phase.notify_all();
197}
198return __old_phase;
199}
200_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
201__phase.wait(__old_phase, memory_order_acquire);
202}
203_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
204__expected.fetch_sub(1, memory_order_relaxed);
205(void)arrive(1);
206}
207};
208
209template <>
210class __barrier_base<__empty_completion> {
211static constexpr uint64_t __expected_unit = 1ull;
212static constexpr uint64_t __arrived_unit = 1ull << 32;
213static constexpr uint64_t __expected_mask = __arrived_unit - 1;
214static constexpr uint64_t __phase_bit = 1ull << 63;
215static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
216
217__atomic_base<uint64_t> __phase_arrived_expected;
218
219static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
220return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
221}
222
223public:
224using arrival_token = uint64_t;
225
226static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
227
228_LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
229: __phase_arrived_expected(__init(__count)) {}
230[[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
231auto const __inc = __arrived_unit * update;
232auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
233
234_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
235update <= __old, "update is greater than the expected count for the current barrier phase");
236
237if ((__old ^ (__old + __inc)) & __phase_bit) {
238__phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
239__phase_arrived_expected.notify_all();
240}
241return __old & __phase_bit;
242}
243inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
244auto const __test_fn = [=]() -> bool {
245uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
246return ((__current & __phase_bit) != __phase);
247};
248__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
249}
250inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
251__phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
252(void)arrive(1);
253}
254};
255
256# endif // !_LIBCPP_HAS_NO_TREE_BARRIER
257
258template <class _CompletionF = __empty_completion>
259class _LIBCPP_DEPRECATED_ATOMIC_SYNC barrier {
260__barrier_base<_CompletionF> __b_;
261
262public:
263using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
264
265static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
266
267_LIBCPP_AVAILABILITY_SYNC
268_LIBCPP_HIDE_FROM_ABI explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
269: __b_(__count, std::move(__completion)) {
270_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
271__count >= 0,
272"barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
273_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
274__count <= max(),
275"barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
276"a value greater than max()");
277}
278
279barrier(barrier const&) = delete;
280barrier& operator=(barrier const&) = delete;
281
282_LIBCPP_NODISCARD _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
283_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0");
284return __b_.arrive(__update);
285}
286_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
287__b_.wait(std::move(__phase));
288}
289_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
290_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
291};
292
293_LIBCPP_END_NAMESPACE_STD
294
295#endif // _LIBCPP_STD_VER >= 14
296
297_LIBCPP_POP_MACROS
298
299#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
300# include <atomic>
301# include <concepts>
302# include <iterator>
303# include <memory>
304# include <stdexcept>
305# include <variant>
306#endif
307
308#endif //_LIBCPP_BARRIER
309