1// -*- C++ -*-
2//===--------------------------- barrier ----------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14    barrier synopsis
15
16namespace std
17{
18
19  template<class CompletionFunction = see below>
20  class barrier
21  {
22  public:
23    using arrival_token = see below;
24
25    static constexpr ptrdiff_t max() noexcept;
26
27    constexpr explicit barrier(ptrdiff_t phase_count,
28                               CompletionFunction f = CompletionFunction());
29    ~barrier();
30
31    barrier(const barrier&) = delete;
32    barrier& operator=(const barrier&) = delete;
33
34    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35    void wait(arrival_token&& arrival) const;
36
37    void arrive_and_wait();
38    void arrive_and_drop();
39
40  private:
41    CompletionFunction completion; // exposition only
42  };
43
44}
45
46*/
47
48#include <__availability>
49#include <__config>
50#include <atomic>
51#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
52# include <memory>
53#endif
54
55#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
56#pragma GCC system_header
57#endif
58
59#ifdef _LIBCPP_HAS_NO_THREADS
60# error <barrier> is not supported on this single threaded system
61#endif
62
63_LIBCPP_PUSH_MACROS
64#include <__undef_macros>
65
66#if _LIBCPP_STD_VER >= 14
67
68_LIBCPP_BEGIN_NAMESPACE_STD
69
70struct __empty_completion
71{
72    inline _LIBCPP_INLINE_VISIBILITY
73    void operator()() noexcept
74    {
75    }
76};
77
78#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
79
80/*
81
82The default implementation of __barrier_base is a classic tree barrier.
83
84It looks different from literature pseudocode for two main reasons:
85 1. Threads that call into std::barrier functions do not provide indices,
86    so a numbering step is added before the actual barrier algorithm,
87    appearing as an N+1 round to the N rounds of the tree barrier.
88 2. A great deal of attention has been paid to avoid cache line thrashing
89    by flattening the tree structure into cache-line sized arrays, that
90    are indexed in an efficient way.
91
92*/
93
94using __barrier_phase_t = uint8_t;
95
96class __barrier_algorithm_base;
97
98_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
99__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
100
101_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
102bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
103                                     __barrier_phase_t __old_phase);
104
105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
106void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
107
108template<class _CompletionF>
109class __barrier_base {
110    ptrdiff_t                                               __expected;
111    unique_ptr<__barrier_algorithm_base,
112               void (*)(__barrier_algorithm_base*)>         __base;
113    __atomic_base<ptrdiff_t>                                __expected_adjustment;
114    _CompletionF                                            __completion;
115    __atomic_base<__barrier_phase_t>                        __phase;
116
117public:
118    using arrival_token = __barrier_phase_t;
119
120    static constexpr ptrdiff_t max() noexcept {
121        return numeric_limits<ptrdiff_t>::max();
122    }
123
124    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
125    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
126            : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected),
127                                             &__destroy_barrier_algorithm_base),
128              __expected_adjustment(0), __completion(move(__completion)), __phase(0)
129    {
130    }
131    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
132    arrival_token arrive(ptrdiff_t update)
133    {
134        auto const __old_phase = __phase.load(memory_order_relaxed);
135        for(; update; --update)
136            if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) {
137                __completion();
138                __expected += __expected_adjustment.load(memory_order_relaxed);
139                __expected_adjustment.store(0, memory_order_relaxed);
140                __phase.store(__old_phase + 2, memory_order_release);
141                __phase.notify_all();
142            }
143        return __old_phase;
144    }
145    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
146    void wait(arrival_token&& __old_phase) const
147    {
148        auto const __test_fn = [this, __old_phase]() -> bool {
149            return __phase.load(memory_order_acquire) != __old_phase;
150        };
151        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
152    }
153    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
154    void arrive_and_drop()
155    {
156        __expected_adjustment.fetch_sub(1, memory_order_relaxed);
157        (void)arrive(1);
158    }
159};
160
161#else
162
163/*
164
165The alternative implementation of __barrier_base is a central barrier.
166
167Two versions of this algorithm are provided:
168 1. A fairly straightforward implementation of the litterature for the
169    general case where the completion function is not empty.
170 2. An optimized implementation that exploits 2's complement arithmetic
171    and well-defined overflow in atomic arithmetic, to handle the phase
172    roll-over for free.
173
174*/
175
176template<class _CompletionF>
177class __barrier_base {
178
179    __atomic_base<ptrdiff_t> __expected;
180    __atomic_base<ptrdiff_t> __arrived;
181    _CompletionF             __completion;
182    __atomic_base<bool>      __phase;
183public:
184    using arrival_token = bool;
185
186    static constexpr ptrdiff_t max() noexcept {
187        return numeric_limits<ptrdiff_t>::max();
188    }
189
190    _LIBCPP_INLINE_VISIBILITY
191    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
192        : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false)
193    {
194    }
195    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
196    arrival_token arrive(ptrdiff_t update)
197    {
198        auto const __old_phase = __phase.load(memory_order_relaxed);
199        auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
200        auto const new_expected = __expected.load(memory_order_relaxed);
201        if(0 == __result) {
202            __completion();
203            __arrived.store(new_expected, memory_order_relaxed);
204            __phase.store(!__old_phase, memory_order_release);
205            __phase.notify_all();
206        }
207        return __old_phase;
208    }
209    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
210    void wait(arrival_token&& __old_phase) const
211    {
212        __phase.wait(__old_phase, memory_order_acquire);
213    }
214    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
215    void arrive_and_drop()
216    {
217        __expected.fetch_sub(1, memory_order_relaxed);
218        (void)arrive(1);
219    }
220};
221
222template<>
223class __barrier_base<__empty_completion> {
224
225    static constexpr uint64_t __expected_unit = 1ull;
226    static constexpr uint64_t __arrived_unit = 1ull << 32;
227    static constexpr uint64_t __expected_mask = __arrived_unit - 1;
228    static constexpr uint64_t __phase_bit = 1ull << 63;
229    static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
230
231    __atomic_base<uint64_t>   __phase_arrived_expected;
232
233    static _LIBCPP_INLINE_VISIBILITY
234    constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
235    {
236        return ((uint64_t(1u << 31) - __count) << 32)
237              | (uint64_t(1u << 31) - __count);
238    }
239
240public:
241    using arrival_token = uint64_t;
242
243    static constexpr ptrdiff_t max() noexcept {
244        return ptrdiff_t(1u << 31) - 1;
245    }
246
247    _LIBCPP_INLINE_VISIBILITY
248    explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
249        : __phase_arrived_expected(__init(__count))
250    {
251    }
252    [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
253    arrival_token arrive(ptrdiff_t update)
254    {
255        auto const __inc = __arrived_unit * update;
256        auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
257        if((__old ^ (__old + __inc)) & __phase_bit) {
258            __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
259            __phase_arrived_expected.notify_all();
260        }
261        return __old & __phase_bit;
262    }
263    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
264    void wait(arrival_token&& __phase) const
265    {
266        auto const __test_fn = [=]() -> bool {
267            uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
268            return ((__current & __phase_bit) != __phase);
269        };
270        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
271    }
272    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
273    void arrive_and_drop()
274    {
275        __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
276        (void)arrive(1);
277    }
278};
279
280#endif //_LIBCPP_HAS_NO_TREE_BARRIER
281
282template<class _CompletionF = __empty_completion>
283class barrier {
284
285    __barrier_base<_CompletionF> __b;
286public:
287    using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
288
289    static constexpr ptrdiff_t max() noexcept {
290        return __barrier_base<_CompletionF>::max();
291    }
292
293    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
294    barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
295        : __b(__count, _VSTD::move(__completion)) {
296    }
297
298    barrier(barrier const&) = delete;
299    barrier& operator=(barrier const&) = delete;
300
301    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
302    arrival_token arrive(ptrdiff_t update = 1)
303    {
304        return __b.arrive(update);
305    }
306    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
307    void wait(arrival_token&& __phase) const
308    {
309        __b.wait(_VSTD::move(__phase));
310    }
311    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
312    void arrive_and_wait()
313    {
314        wait(arrive());
315    }
316    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
317    void arrive_and_drop()
318    {
319        __b.arrive_and_drop();
320    }
321};
322
323_LIBCPP_END_NAMESPACE_STD
324
325#endif // _LIBCPP_STD_VER >= 14
326
327_LIBCPP_POP_MACROS
328
329#endif //_LIBCPP_BARRIER
330