1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14    barrier synopsis
15
16namespace std
17{
18
19  template<class CompletionFunction = see below>
20  class barrier
21  {
22  public:
23    using arrival_token = see below;
24
25    static constexpr ptrdiff_t max() noexcept;
26
27    constexpr explicit barrier(ptrdiff_t phase_count,
28                               CompletionFunction f = CompletionFunction());
29    ~barrier();
30
31    barrier(const barrier&) = delete;
32    barrier& operator=(const barrier&) = delete;
33
34    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35    void wait(arrival_token&& arrival) const;
36
37    void arrive_and_wait();
38    void arrive_and_drop();
39
40  private:
41    CompletionFunction completion; // exposition only
42  };
43
44}
45
46*/
47
48#include <__config>
49
50#ifdef _LIBCPP_HAS_NO_THREADS
51#  error "<barrier> is not supported since libc++ has been configured without support for threads."
52#endif
53
54#include <__assert> // all public C++ headers provide the assertion handler
55#include <__atomic/atomic_base.h>
56#include <__atomic/memory_order.h>
57#include <__availability>
58#include <__memory/unique_ptr.h>
59#include <__thread/poll_with_backoff.h>
60#include <__thread/timed_backoff_policy.h>
61#include <__utility/move.h>
62#include <cstddef>
63#include <cstdint>
64#include <limits>
65#include <version>
66
67#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
68#  pragma GCC system_header
69#endif
70
71_LIBCPP_PUSH_MACROS
72#include <__undef_macros>
73
74#if _LIBCPP_STD_VER >= 14
75
76_LIBCPP_BEGIN_NAMESPACE_STD
77
78struct __empty_completion {
79  inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
80};
81
82#  ifndef _LIBCPP_HAS_NO_TREE_BARRIER
83
84/*
85
86The default implementation of __barrier_base is a classic tree barrier.
87
88It looks different from literature pseudocode for two main reasons:
89 1. Threads that call into std::barrier functions do not provide indices,
90    so a numbering step is added before the actual barrier algorithm,
91    appearing as an N+1 round to the N rounds of the tree barrier.
92 2. A great deal of attention has been paid to avoid cache line thrashing
93    by flattening the tree structure into cache-line sized arrays, that
94    are indexed in an efficient way.
95
96*/
97
98using __barrier_phase_t = uint8_t;
99
100class __barrier_algorithm_base;
101
102_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
103__construct_barrier_algorithm_base(ptrdiff_t& __expected);
104
105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
106__arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase);
107
108_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
109__destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
110
111template <class _CompletionF>
112class __barrier_base {
113  ptrdiff_t __expected_;
114  unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
115  __atomic_base<ptrdiff_t> __expected_adjustment_;
116  _CompletionF __completion_;
117  __atomic_base<__barrier_phase_t> __phase_;
118
119public:
120  using arrival_token = __barrier_phase_t;
121
122  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
123
124  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
125  __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
126      : __expected_(__expected),
127        __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
128        __expected_adjustment_(0),
129        __completion_(std::move(__completion)),
130        __phase_(0) {}
131  [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
132    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
133        __update <= __expected_, "update is greater than the expected count for the current barrier phase");
134
135    auto const __old_phase = __phase_.load(memory_order_relaxed);
136    for (; __update; --__update)
137      if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
138        __completion_();
139        __expected_ += __expected_adjustment_.load(memory_order_relaxed);
140        __expected_adjustment_.store(0, memory_order_relaxed);
141        __phase_.store(__old_phase + 2, memory_order_release);
142        __phase_.notify_all();
143      }
144    return __old_phase;
145  }
146  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
147    auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
148    std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
149  }
150  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
151    __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
152    (void)arrive(1);
153  }
154};
155
156#  else
157
158/*
159
160The alternative implementation of __barrier_base is a central barrier.
161
162Two versions of this algorithm are provided:
163 1. A fairly straightforward implementation of the litterature for the
164    general case where the completion function is not empty.
165 2. An optimized implementation that exploits 2's complement arithmetic
166    and well-defined overflow in atomic arithmetic, to handle the phase
167    roll-over for free.
168
169*/
170
171template <class _CompletionF>
172class __barrier_base {
173  __atomic_base<ptrdiff_t> __expected;
174  __atomic_base<ptrdiff_t> __arrived;
175  _CompletionF __completion;
176  __atomic_base<bool> __phase;
177
178public:
179  using arrival_token = bool;
180
181  static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
182
183  _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
184      : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
185  [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
186    auto const __old_phase  = __phase.load(memory_order_relaxed);
187    auto const __result     = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
188    auto const new_expected = __expected.load(memory_order_relaxed);
189
190    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
191        update <= new_expected, "update is greater than the expected count for the current barrier phase");
192
193    if (0 == __result) {
194      __completion();
195      __arrived.store(new_expected, memory_order_relaxed);
196      __phase.store(!__old_phase, memory_order_release);
197      __phase.notify_all();
198    }
199    return __old_phase;
200  }
201  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
202    __phase.wait(__old_phase, memory_order_acquire);
203  }
204  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
205    __expected.fetch_sub(1, memory_order_relaxed);
206    (void)arrive(1);
207  }
208};
209
210template <>
211class __barrier_base<__empty_completion> {
212  static constexpr uint64_t __expected_unit = 1ull;
213  static constexpr uint64_t __arrived_unit  = 1ull << 32;
214  static constexpr uint64_t __expected_mask = __arrived_unit - 1;
215  static constexpr uint64_t __phase_bit     = 1ull << 63;
216  static constexpr uint64_t __arrived_mask  = (__phase_bit - 1) & ~__expected_mask;
217
218  __atomic_base<uint64_t> __phase_arrived_expected;
219
220  static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
221    return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
222  }
223
224public:
225  using arrival_token = uint64_t;
226
227  static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
228
229  _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
230      : __phase_arrived_expected(__init(__count)) {}
231  [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
232    auto const __inc = __arrived_unit * update;
233    auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
234
235    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
236        update <= __old, "update is greater than the expected count for the current barrier phase");
237
238    if ((__old ^ (__old + __inc)) & __phase_bit) {
239      __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
240      __phase_arrived_expected.notify_all();
241    }
242    return __old & __phase_bit;
243  }
244  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
245    auto const __test_fn = [=]() -> bool {
246      uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
247      return ((__current & __phase_bit) != __phase);
248    };
249    __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
250  }
251  inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
252    __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
253    (void)arrive(1);
254  }
255};
256
257#  endif // !_LIBCPP_HAS_NO_TREE_BARRIER
258
259template <class _CompletionF = __empty_completion>
260class barrier {
261  __barrier_base<_CompletionF> __b_;
262
263public:
264  using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
265
266  static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
267
268  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI explicit barrier(
269      ptrdiff_t __count, _CompletionF __completion = _CompletionF())
270      : __b_(__count, std::move(__completion)) {
271    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
272        __count >= 0,
273        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
274    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
275        __count <= max(),
276        "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
277        "a value greater than max()");
278  }
279
280  barrier(barrier const&)            = delete;
281  barrier& operator=(barrier const&) = delete;
282
283  [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
284    _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0");
285    return __b_.arrive(__update);
286  }
287  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
288    __b_.wait(std::move(__phase));
289  }
290  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
291  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
292};
293
294_LIBCPP_END_NAMESPACE_STD
295
296#endif // _LIBCPP_STD_VER >= 14
297
298_LIBCPP_POP_MACROS
299
300#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
301#  include <atomic>
302#  include <concepts>
303#  include <iterator>
304#  include <memory>
305#  include <stdexcept>
306#  include <variant>
307#endif
308
309#endif //_LIBCPP_BARRIER
310