1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14    barrier synopsis
15
16namespace std
17{
18
19  template<class CompletionFunction = see below>
20  class barrier
21  {
22  public:
23    using arrival_token = see below;
24
25    static constexpr ptrdiff_t max() noexcept;
26
27    constexpr explicit barrier(ptrdiff_t phase_count,
28                               CompletionFunction f = CompletionFunction());
29    ~barrier();
30
31    barrier(const barrier&) = delete;
32    barrier& operator=(const barrier&) = delete;
33
34    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35    void wait(arrival_token&& arrival) const;
36
37    void arrive_and_wait();
38    void arrive_and_drop();
39
40  private:
41    CompletionFunction completion; // exposition only
42  };
43
44}
45
46*/
47
48#include <__assert> // all public C++ headers provide the assertion handler
49#include <__availability>
50#include <__config>
51#include <__thread/timed_backoff_policy.h>
52#include <atomic>
53#include <limits>
54#include <memory>
55
56#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
57#  pragma GCC system_header
58#endif
59
60#ifdef _LIBCPP_HAS_NO_THREADS
61# error "<barrier> is not supported since libc++ has been configured without support for threads."
62#endif
63
64_LIBCPP_PUSH_MACROS
65#include <__undef_macros>
66
67#if _LIBCPP_STD_VER >= 14
68
69_LIBCPP_BEGIN_NAMESPACE_STD
70
71struct __empty_completion
72{
73    inline _LIBCPP_INLINE_VISIBILITY
74    void operator()() noexcept
75    {
76    }
77};
78
79#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
80
81/*
82
83The default implementation of __barrier_base is a classic tree barrier.
84
85It looks different from literature pseudocode for two main reasons:
86 1. Threads that call into std::barrier functions do not provide indices,
87    so a numbering step is added before the actual barrier algorithm,
88    appearing as an N+1 round to the N rounds of the tree barrier.
89 2. A great deal of attention has been paid to avoid cache line thrashing
90    by flattening the tree structure into cache-line sized arrays, that
91    are indexed in an efficient way.
92
93*/
94
95using __barrier_phase_t = uint8_t;
96
97class __barrier_algorithm_base;
98
99_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
100__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
101
102_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
103bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
104                                     __barrier_phase_t __old_phase);
105
106_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
107void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
108
109template<class _CompletionF>
110class __barrier_base {
111    ptrdiff_t                                               __expected_;
112    unique_ptr<__barrier_algorithm_base,
113               void (*)(__barrier_algorithm_base*)>         __base_;
114    __atomic_base<ptrdiff_t>                                __expected_adjustment_;
115    _CompletionF                                            __completion_;
116    __atomic_base<__barrier_phase_t>                        __phase_;
117
118public:
119    using arrival_token = __barrier_phase_t;
120
121    static constexpr ptrdiff_t max() noexcept {
122        return numeric_limits<ptrdiff_t>::max();
123    }
124
125    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
126    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
127            : __expected_(__expected), __base_(__construct_barrier_algorithm_base(this->__expected_),
128                                               &__destroy_barrier_algorithm_base),
129              __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0)
130    {
131    }
132    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
133    arrival_token arrive(ptrdiff_t __update)
134    {
135        auto const __old_phase = __phase_.load(memory_order_relaxed);
136        for(; __update; --__update)
137            if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
138                __completion_();
139                __expected_ += __expected_adjustment_.load(memory_order_relaxed);
140                __expected_adjustment_.store(0, memory_order_relaxed);
141                __phase_.store(__old_phase + 2, memory_order_release);
142                __phase_.notify_all();
143            }
144        return __old_phase;
145    }
146    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
147    void wait(arrival_token&& __old_phase) const
148    {
149        auto const __test_fn = [this, __old_phase]() -> bool {
150            return __phase_.load(memory_order_acquire) != __old_phase;
151        };
152        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
153    }
154    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
155    void arrive_and_drop()
156    {
157        __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
158        (void)arrive(1);
159    }
160};
161
162#else
163
164/*
165
166The alternative implementation of __barrier_base is a central barrier.
167
168Two versions of this algorithm are provided:
169 1. A fairly straightforward implementation of the litterature for the
170    general case where the completion function is not empty.
171 2. An optimized implementation that exploits 2's complement arithmetic
172    and well-defined overflow in atomic arithmetic, to handle the phase
173    roll-over for free.
174
175*/
176
177template<class _CompletionF>
178class __barrier_base {
179
180    __atomic_base<ptrdiff_t> __expected;
181    __atomic_base<ptrdiff_t> __arrived;
182    _CompletionF             __completion;
183    __atomic_base<bool>      __phase;
184public:
185    using arrival_token = bool;
186
187    static constexpr ptrdiff_t max() noexcept {
188        return numeric_limits<ptrdiff_t>::max();
189    }
190
191    _LIBCPP_INLINE_VISIBILITY
192    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
193        : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false)
194    {
195    }
196    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
197    arrival_token arrive(ptrdiff_t update)
198    {
199        auto const __old_phase = __phase.load(memory_order_relaxed);
200        auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
201        auto const new_expected = __expected.load(memory_order_relaxed);
202        if(0 == __result) {
203            __completion();
204            __arrived.store(new_expected, memory_order_relaxed);
205            __phase.store(!__old_phase, memory_order_release);
206            __phase.notify_all();
207        }
208        return __old_phase;
209    }
210    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
211    void wait(arrival_token&& __old_phase) const
212    {
213        __phase.wait(__old_phase, memory_order_acquire);
214    }
215    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
216    void arrive_and_drop()
217    {
218        __expected.fetch_sub(1, memory_order_relaxed);
219        (void)arrive(1);
220    }
221};
222
223template<>
224class __barrier_base<__empty_completion> {
225
226    static constexpr uint64_t __expected_unit = 1ull;
227    static constexpr uint64_t __arrived_unit = 1ull << 32;
228    static constexpr uint64_t __expected_mask = __arrived_unit - 1;
229    static constexpr uint64_t __phase_bit = 1ull << 63;
230    static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
231
232    __atomic_base<uint64_t>   __phase_arrived_expected;
233
234    static _LIBCPP_INLINE_VISIBILITY
235    constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
236    {
237        return ((uint64_t(1u << 31) - __count) << 32)
238              | (uint64_t(1u << 31) - __count);
239    }
240
241public:
242    using arrival_token = uint64_t;
243
244    static constexpr ptrdiff_t max() noexcept {
245        return ptrdiff_t(1u << 31) - 1;
246    }
247
248    _LIBCPP_INLINE_VISIBILITY
249    explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
250        : __phase_arrived_expected(__init(__count))
251    {
252    }
253    [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
254    arrival_token arrive(ptrdiff_t update)
255    {
256        auto const __inc = __arrived_unit * update;
257        auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
258        if((__old ^ (__old + __inc)) & __phase_bit) {
259            __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
260            __phase_arrived_expected.notify_all();
261        }
262        return __old & __phase_bit;
263    }
264    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
265    void wait(arrival_token&& __phase) const
266    {
267        auto const __test_fn = [=]() -> bool {
268            uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
269            return ((__current & __phase_bit) != __phase);
270        };
271        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
272    }
273    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
274    void arrive_and_drop()
275    {
276        __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
277        (void)arrive(1);
278    }
279};
280
281#endif // !_LIBCPP_HAS_NO_TREE_BARRIER
282
283template<class _CompletionF = __empty_completion>
284class barrier {
285
286    __barrier_base<_CompletionF> __b;
287public:
288    using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
289
290    static constexpr ptrdiff_t max() noexcept {
291        return __barrier_base<_CompletionF>::max();
292    }
293
294    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
295    barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
296        : __b(__count, _VSTD::move(__completion)) {
297    }
298
299    barrier(barrier const&) = delete;
300    barrier& operator=(barrier const&) = delete;
301
302    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
303    arrival_token arrive(ptrdiff_t __update = 1)
304    {
305        return __b.arrive(__update);
306    }
307    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
308    void wait(arrival_token&& __phase) const
309    {
310        __b.wait(_VSTD::move(__phase));
311    }
312    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
313    void arrive_and_wait()
314    {
315        wait(arrive());
316    }
317    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
318    void arrive_and_drop()
319    {
320        __b.arrive_and_drop();
321    }
322};
323
324_LIBCPP_END_NAMESPACE_STD
325
326#endif // _LIBCPP_STD_VER >= 14
327
328_LIBCPP_POP_MACROS
329
330#endif //_LIBCPP_BARRIER
331