1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14    barrier synopsis
15
16namespace std
17{
18
19  template<class CompletionFunction = see below>
20  class barrier
21  {
22  public:
23    using arrival_token = see below;
24
25    static constexpr ptrdiff_t max() noexcept;
26
27    constexpr explicit barrier(ptrdiff_t phase_count,
28                               CompletionFunction f = CompletionFunction());
29    ~barrier();
30
31    barrier(const barrier&) = delete;
32    barrier& operator=(const barrier&) = delete;
33
34    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35    void wait(arrival_token&& arrival) const;
36
37    void arrive_and_wait();
38    void arrive_and_drop();
39
40  private:
41    CompletionFunction completion; // exposition only
42  };
43
44}
45
46*/
47
48#include <__assert> // all public C++ headers provide the assertion handler
49#include <__atomic/atomic_base.h>
50#include <__atomic/memory_order.h>
51#include <__availability>
52#include <__config>
53#include <__memory/unique_ptr.h>
54#include <__thread/poll_with_backoff.h>
55#include <__thread/timed_backoff_policy.h>
56#include <__utility/move.h>
57#include <cstddef>
58#include <cstdint>
59#include <limits>
60#include <version>
61
62#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
63#  pragma GCC system_header
64#endif
65
66#ifdef _LIBCPP_HAS_NO_THREADS
67# error "<barrier> is not supported since libc++ has been configured without support for threads."
68#endif
69
70_LIBCPP_PUSH_MACROS
71#include <__undef_macros>
72
73#if _LIBCPP_STD_VER >= 14
74
75_LIBCPP_BEGIN_NAMESPACE_STD
76
77struct __empty_completion
78{
79    inline _LIBCPP_INLINE_VISIBILITY
80    void operator()() noexcept
81    {
82    }
83};
84
85#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
86
87/*
88
89The default implementation of __barrier_base is a classic tree barrier.
90
91It looks different from literature pseudocode for two main reasons:
92 1. Threads that call into std::barrier functions do not provide indices,
93    so a numbering step is added before the actual barrier algorithm,
94    appearing as an N+1 round to the N rounds of the tree barrier.
95 2. A great deal of attention has been paid to avoid cache line thrashing
96    by flattening the tree structure into cache-line sized arrays, that
97    are indexed in an efficient way.
98
99*/
100
101using __barrier_phase_t = uint8_t;
102
103class __barrier_algorithm_base;
104
105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
106__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
107
108_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
109bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
110                                     __barrier_phase_t __old_phase);
111
112_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
113void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
114
115template<class _CompletionF>
116class __barrier_base {
117    ptrdiff_t                                               __expected_;
118    unique_ptr<__barrier_algorithm_base,
119               void (*)(__barrier_algorithm_base*)>         __base_;
120    __atomic_base<ptrdiff_t>                                __expected_adjustment_;
121    _CompletionF                                            __completion_;
122    __atomic_base<__barrier_phase_t>                        __phase_;
123
124public:
125    using arrival_token = __barrier_phase_t;
126
127    static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
128        return numeric_limits<ptrdiff_t>::max();
129    }
130
131    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
132    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
133            : __expected_(__expected), __base_(std::__construct_barrier_algorithm_base(this->__expected_),
134                                               &__destroy_barrier_algorithm_base),
135              __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0)
136    {
137    }
138    [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
139    arrival_token arrive(ptrdiff_t __update)
140    {
141        _LIBCPP_ASSERT_UNCATEGORIZED(
142            __update <= __expected_, "update is greater than the expected count for the current barrier phase");
143
144        auto const __old_phase = __phase_.load(memory_order_relaxed);
145        for(; __update; --__update)
146            if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
147                __completion_();
148                __expected_ += __expected_adjustment_.load(memory_order_relaxed);
149                __expected_adjustment_.store(0, memory_order_relaxed);
150                __phase_.store(__old_phase + 2, memory_order_release);
151                __phase_.notify_all();
152            }
153        return __old_phase;
154    }
155    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
156    void wait(arrival_token&& __old_phase) const
157    {
158        auto const __test_fn = [this, __old_phase]() -> bool {
159            return __phase_.load(memory_order_acquire) != __old_phase;
160        };
161        std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
162    }
163    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
164    void arrive_and_drop()
165    {
166        __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
167        (void)arrive(1);
168    }
169};
170
171#else
172
173/*
174
175The alternative implementation of __barrier_base is a central barrier.
176
177Two versions of this algorithm are provided:
178 1. A fairly straightforward implementation of the litterature for the
179    general case where the completion function is not empty.
180 2. An optimized implementation that exploits 2's complement arithmetic
181    and well-defined overflow in atomic arithmetic, to handle the phase
182    roll-over for free.
183
184*/
185
186template<class _CompletionF>
187class __barrier_base {
188
189    __atomic_base<ptrdiff_t> __expected;
190    __atomic_base<ptrdiff_t> __arrived;
191    _CompletionF             __completion;
192    __atomic_base<bool>      __phase;
193public:
194    using arrival_token = bool;
195
196    static constexpr ptrdiff_t max() noexcept {
197        return numeric_limits<ptrdiff_t>::max();
198    }
199
200    _LIBCPP_INLINE_VISIBILITY
201    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
202        : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false)
203    {
204    }
205    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
206    arrival_token arrive(ptrdiff_t update)
207    {
208        auto const __old_phase = __phase.load(memory_order_relaxed);
209        auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
210        auto const new_expected = __expected.load(memory_order_relaxed);
211
212        _LIBCPP_ASSERT_UNCATEGORIZED(
213            update <= new_expected, "update is greater than the expected count for the current barrier phase");
214
215        if (0 == __result) {
216            __completion();
217            __arrived.store(new_expected, memory_order_relaxed);
218            __phase.store(!__old_phase, memory_order_release);
219            __phase.notify_all();
220        }
221        return __old_phase;
222    }
223    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
224    void wait(arrival_token&& __old_phase) const
225    {
226        __phase.wait(__old_phase, memory_order_acquire);
227    }
228    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
229    void arrive_and_drop()
230    {
231        __expected.fetch_sub(1, memory_order_relaxed);
232        (void)arrive(1);
233    }
234};
235
236template<>
237class __barrier_base<__empty_completion> {
238
239    static constexpr uint64_t __expected_unit = 1ull;
240    static constexpr uint64_t __arrived_unit = 1ull << 32;
241    static constexpr uint64_t __expected_mask = __arrived_unit - 1;
242    static constexpr uint64_t __phase_bit = 1ull << 63;
243    static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
244
245    __atomic_base<uint64_t>   __phase_arrived_expected;
246
247    static _LIBCPP_INLINE_VISIBILITY
248    constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
249    {
250        return ((uint64_t(1u << 31) - __count) << 32)
251              | (uint64_t(1u << 31) - __count);
252    }
253
254public:
255    using arrival_token = uint64_t;
256
257    static constexpr ptrdiff_t max() noexcept {
258        return ptrdiff_t(1u << 31) - 1;
259    }
260
261    _LIBCPP_INLINE_VISIBILITY
262    explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
263        : __phase_arrived_expected(__init(__count))
264    {
265    }
266    [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
267    arrival_token arrive(ptrdiff_t update)
268    {
269        auto const __inc = __arrived_unit * update;
270        auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
271
272        _LIBCPP_ASSERT_UNCATEGORIZED(
273            update <= __old, "update is greater than the expected count for the current barrier phase");
274
275        if ((__old ^ (__old + __inc)) & __phase_bit) {
276            __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
277            __phase_arrived_expected.notify_all();
278        }
279        return __old & __phase_bit;
280    }
281    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
282    void wait(arrival_token&& __phase) const
283    {
284        auto const __test_fn = [=]() -> bool {
285            uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
286            return ((__current & __phase_bit) != __phase);
287        };
288        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
289    }
290    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
291    void arrive_and_drop()
292    {
293        __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
294        (void)arrive(1);
295    }
296};
297
298#endif // !_LIBCPP_HAS_NO_TREE_BARRIER
299
300template<class _CompletionF = __empty_completion>
301class barrier {
302
303    __barrier_base<_CompletionF> __b_;
304public:
305    using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
306
307    static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
308        return __barrier_base<_CompletionF>::max();
309    }
310
311    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
312    explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
313        : __b_(__count, _VSTD::move(__completion)) {
314        _LIBCPP_ASSERT_UNCATEGORIZED(
315            __count >= 0,
316            "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
317        _LIBCPP_ASSERT_UNCATEGORIZED(
318            __count <= max(),
319            "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
320            "a value greater than max()");
321    }
322
323    barrier(barrier const&) = delete;
324    barrier& operator=(barrier const&) = delete;
325
326    [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
327    arrival_token arrive(ptrdiff_t __update = 1)
328    {
329        _LIBCPP_ASSERT_UNCATEGORIZED(__update > 0, "barrier:arrive must be called with a value greater than 0");
330        return __b_.arrive(__update);
331    }
332    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
333    void wait(arrival_token&& __phase) const
334    {
335        __b_.wait(_VSTD::move(__phase));
336    }
337    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
338    void arrive_and_wait()
339    {
340        wait(arrive());
341    }
342    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
343    void arrive_and_drop()
344    {
345        __b_.arrive_and_drop();
346    }
347};
348
349_LIBCPP_END_NAMESPACE_STD
350
351#endif // _LIBCPP_STD_VER >= 14
352
353_LIBCPP_POP_MACROS
354
355#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
356#  include <atomic>
357#  include <concepts>
358#  include <iterator>
359#  include <memory>
360#  include <stdexcept>
361#  include <variant>
362#endif
363
364#endif //_LIBCPP_BARRIER
365