1 /*
2 //@HEADER
3 // ************************************************************************
4 //
5 //                        Kokkos v. 3.0
6 //       Copyright (2020) National Technology & Engineering
7 //               Solutions of Sandia, LLC (NTESS).
8 //
9 // Under the terms of Contract DE-NA0003525 with NTESS,
10 // the U.S. Government retains certain rights in this software.
11 //
12 // Redistribution and use in source and binary forms, with or without
13 // modification, are permitted provided that the following conditions are
14 // met:
15 //
16 // 1. Redistributions of source code must retain the above copyright
17 // notice, this list of conditions and the following disclaimer.
18 //
19 // 2. Redistributions in binary form must reproduce the above copyright
20 // notice, this list of conditions and the following disclaimer in the
21 // documentation and/or other materials provided with the distribution.
22 //
23 // 3. Neither the name of the Corporation nor the names of the
24 // contributors may be used to endorse or promote products derived from
25 // this software without specific prior written permission.
26 //
27 // THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 //
39 // Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40 //
41 // ************************************************************************
42 //@HEADER
43 */
44 
45 // Experimental unified task-data parallel manycore LDRD
46 
47 #ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP
48 #define KOKKOS_IMPL_LOCKFREEDEQUE_HPP
49 
50 #include <Kokkos_Macros.hpp>
51 #ifdef KOKKOS_ENABLE_TASKDAG
52 
53 #include <Kokkos_Core_fwd.hpp>
54 
55 #include <Kokkos_PointerOwnership.hpp>
56 #include <impl/Kokkos_OptionalRef.hpp>
57 #include <impl/Kokkos_Error.hpp>           // KOKKOS_EXPECTS
58 #include <impl/Kokkos_LinkedListNode.hpp>  // KOKKOS_EXPECTS
59 
60 #include <Kokkos_Atomic.hpp>  // atomic_compare_exchange, atomic_fence
61 #include "Kokkos_LIFO.hpp"
62 
63 //----------------------------------------------------------------------------
64 //----------------------------------------------------------------------------
65 
66 namespace Kokkos {
67 namespace Impl {
68 
69 //----------------------------------------------------------------------------
70 //----------------------------------------------------------------------------
71 
72 template <class NodeType, size_t CircularBufferSize, class SizeType = size_t>
73 struct fixed_size_circular_buffer {
74  public:
75   using node_type = NodeType;
76   using size_type = SizeType;
77 
78  private:
79   node_type* m_buffer[CircularBufferSize] = {nullptr};
80 
81  public:
82   fixed_size_circular_buffer()                                  = default;
83   fixed_size_circular_buffer(fixed_size_circular_buffer const&) = delete;
84   fixed_size_circular_buffer(fixed_size_circular_buffer&&)      = default;
85   fixed_size_circular_buffer& operator=(fixed_size_circular_buffer const&) =
86       delete;
87   fixed_size_circular_buffer& operator=(fixed_size_circular_buffer&&) = default;
88   ~fixed_size_circular_buffer()                                       = default;
89 
90   KOKKOS_FORCEINLINE_FUNCTION
sizeKokkos::Impl::fixed_size_circular_buffer91   static constexpr size_type size() noexcept {
92     return size_type(CircularBufferSize);
93   }
94 
95   KOKKOS_FORCEINLINE_FUNCTION
operator []Kokkos::Impl::fixed_size_circular_buffer96   node_type* operator[](size_type idx) const noexcept {
97     return m_buffer[idx % size()];
98   }
99 
100   KOKKOS_FORCEINLINE_FUNCTION
operator []Kokkos::Impl::fixed_size_circular_buffer101   node_type*& operator[](size_type idx) noexcept {
102     return m_buffer[idx % size()];
103   }
104 };
105 
106 template <class NodeType, class SizeType = size_t>
107 struct non_owning_variable_size_circular_buffer {
108  public:
109   using node_type = NodeType;
110   using size_type = SizeType;
111 
112  private:
113   ObservingRawPtr<node_type*> m_buffer = nullptr;
114   size_type m_size                     = 0;
115 
116  public:
117   KOKKOS_INLINE_FUNCTION
non_owning_variable_size_circular_bufferKokkos::Impl::non_owning_variable_size_circular_buffer118   non_owning_variable_size_circular_buffer(ObservingRawPtr<node_type*> buffer,
119                                            size_type arg_size) noexcept
120       : m_buffer(buffer), m_size(arg_size) {}
121 
122   non_owning_variable_size_circular_buffer() = default;
123   non_owning_variable_size_circular_buffer(
124       non_owning_variable_size_circular_buffer const&) = delete;
125   non_owning_variable_size_circular_buffer(
126       non_owning_variable_size_circular_buffer&&)      = default;
127   non_owning_variable_size_circular_buffer& operator   =(
128       non_owning_variable_size_circular_buffer const&) = delete;
129   non_owning_variable_size_circular_buffer& operator   =(
130       non_owning_variable_size_circular_buffer&&) = default;
131   ~non_owning_variable_size_circular_buffer()          = default;
132 
133   KOKKOS_FORCEINLINE_FUNCTION
sizeKokkos::Impl::non_owning_variable_size_circular_buffer134   constexpr size_type size() const noexcept { return m_size; }
135 
136   KOKKOS_FORCEINLINE_FUNCTION
operator []Kokkos::Impl::non_owning_variable_size_circular_buffer137   node_type* operator[](size_type idx) const noexcept {
138     return m_buffer[idx % size()];
139   }
140 
141   KOKKOS_FORCEINLINE_FUNCTION
operator []Kokkos::Impl::non_owning_variable_size_circular_buffer142   node_type*& operator[](size_type idx) noexcept {
143     return m_buffer[idx % size()];
144   }
145 };
146 
147 /** Based on "Correct and Efficient Work-Stealing for Weak Memory Models,"
148  * PPoPP '13, https://www.di.ens.fr/~zappa/readings/ppopp13.pdf
149  *
150  */
151 template <class T, class CircularBufferT, class SizeType = int32_t>
152 struct ChaseLevDeque {
153  public:
154   using size_type  = SizeType;
155   using value_type = T;
156   // Still using intrusive linked list for waiting queue
157   using node_type = SimpleSinglyLinkedListNode<>;
158 
159  private:
160   // TODO @tasking @new_feature DSH variable size circular buffer?
161 
162   CircularBufferT m_array;
163   size_type m_top    = 0;
164   size_type m_bottom = 0;
165 
166  public:
167   template <class _ignore = void,
168             class         = typename std::enable_if<
169                 std::is_default_constructible<CircularBufferT>::value>::type>
ChaseLevDequeKokkos::Impl::ChaseLevDeque170   ChaseLevDeque() : m_array() {}
171 
ChaseLevDequeKokkos::Impl::ChaseLevDeque172   explicit ChaseLevDeque(CircularBufferT buffer) : m_array(std::move(buffer)) {}
173 
174   KOKKOS_INLINE_FUNCTION
emptyKokkos::Impl::ChaseLevDeque175   bool empty() const {
176     // TODO @tasking @memory_order DSH memory order
177     return m_top > m_bottom - 1;
178   }
179 
180   KOKKOS_INLINE_FUNCTION
popKokkos::Impl::ChaseLevDeque181   OptionalRef<T> pop() {
182     auto b   = m_bottom - 1;  // atomic load relaxed
183     auto& a  = m_array;       // atomic load relaxed
184     m_bottom = b;             // atomic store relaxed
185     Kokkos::memory_fence();   // memory order seq_cst
186     auto t = m_top;           // atomic load relaxed
187     OptionalRef<T> return_value;
188     if (t <= b) {
189       /* non-empty queue */
190       return_value = *static_cast<T*>(a[b]);  // relaxed load
191       if (t == b) {
192         /* single last element in the queue. */
193 #ifdef _WIN32
194         Kokkos::memory_fence();
195         bool const success =
196             Kokkos::atomic_compare_exchange_strong(&m_top, t, t + 1);
197         Kokkos::memory_fence();
198         if (!success) {
199           return_value = nullptr;
200         }
201 #else
202         if (!Impl::atomic_compare_exchange_strong(
203                 &m_top, t, t + 1, memory_order_seq_cst, memory_order_relaxed)) {
204           /* failed race, someone else stole it */
205           return_value = nullptr;
206         }
207 #endif
208         m_bottom = b + 1;  // memory order relaxed
209       }
210     } else {
211       /* empty queue */
212       m_bottom = b + 1;  // memory order relaxed
213     }
214     return return_value;
215   }
216 
217   KOKKOS_INLINE_FUNCTION
pushKokkos::Impl::ChaseLevDeque218   bool push(node_type&& node) {
219     // Just forward to the lvalue version
220     return push(node);
221   }
222 
223   KOKKOS_INLINE_FUNCTION
pushKokkos::Impl::ChaseLevDeque224   bool push(node_type& node) {
225     auto b  = m_bottom;  // memory order relaxed
226     auto t  = Impl::atomic_load(&m_top, memory_order_acquire);
227     auto& a = m_array;
228     if (b - t > a.size() - 1) {
229       /* queue is full, resize */
230       // m_array = a->grow();
231       // a = m_array;
232       return false;
233     }
234     a[b] = &node;  // relaxed
235     Impl::atomic_store(&m_bottom, b + 1, memory_order_release);
236     return true;
237   }
238 
239   KOKKOS_INLINE_FUNCTION
stealKokkos::Impl::ChaseLevDeque240   OptionalRef<T> steal() {
241     auto t = m_top;  // TODO @tasking @memory_order DSH: atomic load acquire
242     Kokkos::memory_fence();  // seq_cst fence, so why does the above need to be
243                              // acquire?
244     auto b = Impl::atomic_load(&m_bottom, memory_order_acquire);
245     OptionalRef<T> return_value;
246     if (t < b) {
247       /* Non-empty queue */
248       auto& a = m_array;     // TODO @tasking @memory_order DSH: technically
249                              // consume ordered, but acquire should be fine
250       Kokkos::load_fence();  // TODO @tasking @memory_order DSH memory order
251                              // instead of fence
252       return_value = *static_cast<T*>(a[t]);  // relaxed
253 #ifdef _WIN32
254       Kokkos::memory_fence();
255       bool const success =
256           Kokkos::atomic_compare_exchange_strong(&m_top, t, t + 1);
257       Kokkos::memory_fence();
258       if (!success) {
259         return_value = nullptr;
260       }
261 #else
262       if (!Impl::atomic_compare_exchange_strong(
263               &m_top, t, t + 1, memory_order_seq_cst, memory_order_relaxed)) {
264         return_value = nullptr;
265       }
266 #endif
267     }
268     return return_value;
269   }
270 };
271 
272 /*
273       // The atomicity of this load was more important in the paper's version
274       // because that version had a circular buffer that could grow.  We're
275       // essentially using the memory order in this version as a fence, which
276       // may be unnecessary
277       auto buffer_ptr = (node_type***)&m_array.buffer;
278       auto a = Impl::atomic_load(buffer_ptr, memory_order_acquire); //
279    technically consume ordered, but acquire should be fine return_value =
280    *static_cast<T*>(a[t % m_array->size]); // relaxed; we'd have to replace the
281    m_array->size if we ever allow growth
282 */
283 
284 //----------------------------------------------------------------------------
285 //----------------------------------------------------------------------------
286 
287 template <size_t CircularBufferSize>
288 struct TaskQueueTraitsChaseLev {
289   template <class Task>
290   using ready_queue_type =
291       ChaseLevDeque<Task,
292                     fixed_size_circular_buffer<SimpleSinglyLinkedListNode<>,
293                                                CircularBufferSize, int32_t>,
294                     int32_t>;
295 
296   template <class Task>
297   using waiting_queue_type = SingleConsumeOperationLIFO<Task>;
298 
299   template <class Task>
300   using intrusive_task_base_type = typename ready_queue_type<Task>::node_type;
301 
302   static constexpr auto ready_queue_insertion_may_fail = true;
303 };
304 
305 }  // end namespace Impl
306 }  // end namespace Kokkos
307 
308 //----------------------------------------------------------------------------
309 //----------------------------------------------------------------------------
310 
311 #endif /* defined KOKKOS_ENABLE_TASKDAG */
312 #endif /* #ifndef KOKKOS_IMPL_LOCKFREEDEQUE_HPP */
313