1// <barrier> -*- C++ -*-
2
3// Copyright (C) 2020-2021 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14// GNU General Public License for more details.
15
16// You should have received a copy of the GNU General Public License along
17// with this library; see the file COPYING3.  If not see
18// <http://www.gnu.org/licenses/>.
19
20// This implementation is based on libcxx/include/barrier
21//===-- barrier.h --------------------------------------------------===//
22//
23// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
24// See https://llvm.org/LICENSE.txt for license information.
25// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
26//
27//===---------------------------------------------------------------===//
28
29/** @file include/barrier
30 *  This is a Standard C++ Library header.
31 */
32
33#ifndef _GLIBCXX_BARRIER
34#define _GLIBCXX_BARRIER 1
35
36#pragma GCC system_header
37
38#if __cplusplus > 201703L
39#include <bits/atomic_base.h>
40#if __cpp_lib_atomic_wait && __cpp_aligned_new
41#include <bits/std_thread.h>
42#include <bits/unique_ptr.h>
43
44#include <array>
45
46#define __cpp_lib_barrier 201907L
47
48namespace std _GLIBCXX_VISIBILITY(default)
49{
50_GLIBCXX_BEGIN_NAMESPACE_VERSION
51
52  struct __empty_completion
53  {
54    _GLIBCXX_ALWAYS_INLINE void
55    operator()() noexcept
56    { }
57  };
58
59/*
60
61The default implementation of __tree_barrier is a classic tree barrier.
62
63It looks different from literature pseudocode for two main reasons:
64 1. Threads that call into std::barrier functions do not provide indices,
65    so a numbering step is added before the actual barrier algorithm,
66    appearing as an N+1 round to the N rounds of the tree barrier.
67 2. A great deal of attention has been paid to avoid cache line thrashing
68    by flattening the tree structure into cache-line sized arrays, that
69    are indexed in an efficient way.
70
71*/
72
73  enum class __barrier_phase_t : unsigned char { };
74
75  template<typename _CompletionF>
76    class __tree_barrier
77    {
78      using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
79      using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
80      static constexpr auto __phase_alignment =
81		      __atomic_phase_ref_t::required_alignment;
82
83      using __tickets_t = std::array<__barrier_phase_t, 64>;
84      struct alignas(64) /* naturally-align the heap state */ __state_t
85      {
86	alignas(__phase_alignment) __tickets_t __tickets;
87      };
88
89      ptrdiff_t _M_expected;
90      unique_ptr<__state_t[]> _M_state;
91      __atomic_base<ptrdiff_t> _M_expected_adjustment;
92      _CompletionF _M_completion;
93
94      alignas(__phase_alignment) __barrier_phase_t  _M_phase;
95
96      bool
97      _M_arrive(__barrier_phase_t __old_phase, size_t __current)
98      {
99	const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
100	const auto __half_step =
101			   static_cast<__barrier_phase_t>(__old_phase_val + 1);
102	const auto __full_step =
103			   static_cast<__barrier_phase_t>(__old_phase_val + 2);
104
105	size_t __current_expected = _M_expected;
106	__current %= ((_M_expected + 1) >> 1);
107
108	for (int __round = 0; ; ++__round)
109	  {
110	    if (__current_expected <= 1)
111		return true;
112	    size_t const __end_node = ((__current_expected + 1) >> 1),
113			 __last_node = __end_node - 1;
114	    for ( ; ; ++__current)
115	      {
116		if (__current == __end_node)
117		  __current = 0;
118		auto __expect = __old_phase;
119		__atomic_phase_ref_t __phase(_M_state[__current]
120						.__tickets[__round]);
121		if (__current == __last_node && (__current_expected & 1))
122		  {
123		    if (__phase.compare_exchange_strong(__expect, __full_step,
124						        memory_order_acq_rel))
125		      break;     // I'm 1 in 1, go to next __round
126		  }
127		else if (__phase.compare_exchange_strong(__expect, __half_step,
128						         memory_order_acq_rel))
129		  {
130		    return false; // I'm 1 in 2, done with arrival
131		  }
132		else if (__expect == __half_step)
133		  {
134		    if (__phase.compare_exchange_strong(__expect, __full_step,
135						        memory_order_acq_rel))
136		      break;    // I'm 2 in 2, go to next __round
137		  }
138	      }
139	    __current_expected = __last_node + 1;
140	    __current >>= 1;
141	  }
142      }
143
144    public:
145      using arrival_token = __barrier_phase_t;
146
147      static constexpr ptrdiff_t
148      max() noexcept
149      { return __PTRDIFF_MAX__; }
150
151      __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
152	  : _M_expected(__expected), _M_expected_adjustment(0),
153	    _M_completion(move(__completion)),
154	    _M_phase(static_cast<__barrier_phase_t>(0))
155      {
156	size_t const __count = (_M_expected + 1) >> 1;
157
158	_M_state = std::make_unique<__state_t[]>(__count);
159      }
160
161      [[nodiscard]] arrival_token
162      arrive(ptrdiff_t __update)
163      {
164	std::hash<std::thread::id> __hasher;
165	size_t __current = __hasher(std::this_thread::get_id());
166	__atomic_phase_ref_t __phase(_M_phase);
167	const auto __old_phase = __phase.load(memory_order_relaxed);
168	const auto __cur = static_cast<unsigned char>(__old_phase);
169	for(; __update; --__update)
170	  {
171	    if(_M_arrive(__old_phase, __current))
172	      {
173		_M_completion();
174		_M_expected += _M_expected_adjustment.load(memory_order_relaxed);
175		_M_expected_adjustment.store(0, memory_order_relaxed);
176		auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
177		__phase.store(__new_phase, memory_order_release);
178		__phase.notify_all();
179	      }
180	  }
181	return __old_phase;
182      }
183
184      void
185      wait(arrival_token&& __old_phase) const
186      {
187	__atomic_phase_const_ref_t __phase(_M_phase);
188	auto const __test_fn = [=]
189	  {
190	    return __phase.load(memory_order_acquire) != __old_phase;
191	  };
192	std::__atomic_wait_address(&_M_phase, __test_fn);
193      }
194
195      void
196      arrive_and_drop()
197      {
198	_M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
199	(void)arrive(1);
200      }
201    };
202
203  template<typename _CompletionF = __empty_completion>
204    class barrier
205    {
206      // Note, we may introduce a "central" barrier algorithm at some point
207      // for more space constrained targets
208      using __algorithm_t = __tree_barrier<_CompletionF>;
209      __algorithm_t _M_b;
210
211    public:
212      class arrival_token final
213      {
214      public:
215	arrival_token(arrival_token&&) = default;
216	arrival_token& operator=(arrival_token&&) = default;
217	~arrival_token() = default;
218
219      private:
220	friend class barrier;
221	using __token = typename __algorithm_t::arrival_token;
222	explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
223	__token _M_tok;
224      };
225
226      static constexpr ptrdiff_t
227      max() noexcept
228      { return __algorithm_t::max(); }
229
230      explicit
231      barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
232      : _M_b(__count, std::move(__completion))
233      { }
234
235      barrier(barrier const&) = delete;
236      barrier& operator=(barrier const&) = delete;
237
238      [[nodiscard]] arrival_token
239      arrive(ptrdiff_t __update = 1)
240      { return arrival_token{_M_b.arrive(__update)}; }
241
242      void
243      wait(arrival_token&& __phase) const
244      { _M_b.wait(std::move(__phase._M_tok)); }
245
246      void
247      arrive_and_wait()
248      { wait(arrive()); }
249
250      void
251      arrive_and_drop()
252      { _M_b.arrive_and_drop(); }
253    };
254
255_GLIBCXX_END_NAMESPACE_VERSION
256} // namespace
257#endif // __cpp_lib_atomic_wait && __cpp_aligned_new
258#endif // __cplusplus > 201703L
259#endif // _GLIBCXX_BARRIER
260