1 /*
2  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #ifndef SHARE_VM_GC_SHARED_TASKQUEUE_INLINE_HPP
26 #define SHARE_VM_GC_SHARED_TASKQUEUE_INLINE_HPP
27 
28 #include "gc/shared/taskqueue.hpp"
29 #include "memory/allocation.inline.hpp"
30 #include "oops/oop.inline.hpp"
31 #include "runtime/atomic.hpp"
32 #include "runtime/orderAccess.hpp"
33 #include "utilities/debug.hpp"
34 #include "utilities/stack.inline.hpp"
35 
36 template <class T, MEMFLAGS F>
GenericTaskQueueSet(int n)37 inline GenericTaskQueueSet<T, F>::GenericTaskQueueSet(int n) : _n(n) {
38   typedef T* GenericTaskQueuePtr;
39   _queues = NEW_C_HEAP_ARRAY(GenericTaskQueuePtr, n, F);
40   for (int i = 0; i < n; i++) {
41     _queues[i] = NULL;
42   }
43 }
44 
45 template <class T, MEMFLAGS F>
~GenericTaskQueueSet()46 inline GenericTaskQueueSet<T, F>::~GenericTaskQueueSet() {
47   FREE_C_HEAP_ARRAY(T*, _queues);
48 }
49 
50 template<class E, MEMFLAGS F, unsigned int N>
initialize()51 inline void GenericTaskQueue<E, F, N>::initialize() {
52   _elems = ArrayAllocator<E>::allocate(N, F);
53 }
54 
55 template<class E, MEMFLAGS F, unsigned int N>
~GenericTaskQueue()56 inline GenericTaskQueue<E, F, N>::~GenericTaskQueue() {
57   ArrayAllocator<E>::free(const_cast<E*>(_elems), N);
58 }
59 
60 template<class E, MEMFLAGS F, unsigned int N>
push_slow(E t,uint dirty_n_elems)61 bool GenericTaskQueue<E, F, N>::push_slow(E t, uint dirty_n_elems) {
62   if (dirty_n_elems == N - 1) {
63     // Actually means 0, so do the push.
64     uint localBot = _bottom;
65     // g++ complains if the volatile result of the assignment is
66     // unused, so we cast the volatile away.  We cannot cast directly
67     // to void, because gcc treats that as not using the result of the
68     // assignment.  However, casting to E& means that we trigger an
69     // unused-value warning.  So, we cast the E& to void.
70     (void)const_cast<E&>(_elems[localBot] = t);
71     OrderAccess::release_store(&_bottom, increment_index(localBot));
72     TASKQUEUE_STATS_ONLY(stats.record_push());
73     return true;
74   }
75   return false;
76 }
77 
78 template<class E, MEMFLAGS F, unsigned int N> inline bool
push(E t)79 GenericTaskQueue<E, F, N>::push(E t) {
80   uint localBot = _bottom;
81   assert(localBot < N, "_bottom out of range.");
82   idx_t top = _age.top();
83   uint dirty_n_elems = dirty_size(localBot, top);
84   assert(dirty_n_elems < N, "n_elems out of range.");
85   if (dirty_n_elems < max_elems()) {
86     // g++ complains if the volatile result of the assignment is
87     // unused, so we cast the volatile away.  We cannot cast directly
88     // to void, because gcc treats that as not using the result of the
89     // assignment.  However, casting to E& means that we trigger an
90     // unused-value warning.  So, we cast the E& to void.
91     (void) const_cast<E&>(_elems[localBot] = t);
92     OrderAccess::release_store(&_bottom, increment_index(localBot));
93     TASKQUEUE_STATS_ONLY(stats.record_push());
94     return true;
95   } else {
96     return push_slow(t, dirty_n_elems);
97   }
98 }
99 
100 template <class E, MEMFLAGS F, unsigned int N>
push(E t)101 inline bool OverflowTaskQueue<E, F, N>::push(E t)
102 {
103   if (!taskqueue_t::push(t)) {
104     overflow_stack()->push(t);
105     TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->size()));
106   }
107   return true;
108 }
109 
110 template <class E, MEMFLAGS F, unsigned int N>
try_push_to_taskqueue(E t)111 inline bool OverflowTaskQueue<E, F, N>::try_push_to_taskqueue(E t) {
112   return taskqueue_t::push(t);
113 }
114 
115 // pop_local_slow() is done by the owning thread and is trying to
116 // get the last task in the queue.  It will compete with pop_global()
117 // that will be used by other threads.  The tag age is incremented
118 // whenever the queue goes empty which it will do here if this thread
119 // gets the last task or in pop_global() if the queue wraps (top == 0
120 // and pop_global() succeeds, see pop_global()).
121 template<class E, MEMFLAGS F, unsigned int N>
pop_local_slow(uint localBot,Age oldAge)122 bool GenericTaskQueue<E, F, N>::pop_local_slow(uint localBot, Age oldAge) {
123   // This queue was observed to contain exactly one element; either this
124   // thread will claim it, or a competing "pop_global".  In either case,
125   // the queue will be logically empty afterwards.  Create a new Age value
126   // that represents the empty queue for the given value of "_bottom".  (We
127   // must also increment "tag" because of the case where "bottom == 1",
128   // "top == 0".  A pop_global could read the queue element in that case,
129   // then have the owner thread do a pop followed by another push.  Without
130   // the incrementing of "tag", the pop_global's CAS could succeed,
131   // allowing it to believe it has claimed the stale element.)
132   Age newAge((idx_t)localBot, oldAge.tag() + 1);
133   // Perhaps a competing pop_global has already incremented "top", in which
134   // case it wins the element.
135   if (localBot == oldAge.top()) {
136     // No competing pop_global has yet incremented "top"; we'll try to
137     // install new_age, thus claiming the element.
138     Age tempAge = _age.cmpxchg(newAge, oldAge);
139     if (tempAge == oldAge) {
140       // We win.
141       assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
142       TASKQUEUE_STATS_ONLY(stats.record_pop_slow());
143       return true;
144     }
145   }
146   // We lose; a completing pop_global gets the element.  But the queue is empty
147   // and top is greater than bottom.  Fix this representation of the empty queue
148   // to become the canonical one.
149   _age.set(newAge);
150   assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
151   return false;
152 }
153 
154 template<class E, MEMFLAGS F, unsigned int N> inline bool
pop_local(volatile E & t,uint threshold)155 GenericTaskQueue<E, F, N>::pop_local(volatile E& t, uint threshold) {
156   uint localBot = _bottom;
157   // This value cannot be N-1.  That can only occur as a result of
158   // the assignment to bottom in this method.  If it does, this method
159   // resets the size to 0 before the next call (which is sequential,
160   // since this is pop_local.)
161   uint dirty_n_elems = dirty_size(localBot, _age.top());
162   assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
163   if (dirty_n_elems <= threshold) return false;
164   localBot = decrement_index(localBot);
165   _bottom = localBot;
166   // This is necessary to prevent any read below from being reordered
167   // before the store just above.
168   OrderAccess::fence();
169   // g++ complains if the volatile result of the assignment is
170   // unused, so we cast the volatile away.  We cannot cast directly
171   // to void, because gcc treats that as not using the result of the
172   // assignment.  However, casting to E& means that we trigger an
173   // unused-value warning.  So, we cast the E& to void.
174   (void) const_cast<E&>(t = _elems[localBot]);
175   // This is a second read of "age"; the "size()" above is the first.
176   // If there's still at least one element in the queue, based on the
177   // "_bottom" and "age" we've read, then there can be no interference with
178   // a "pop_global" operation, and we're done.
179   idx_t tp = _age.top();    // XXX
180   if (size(localBot, tp) > 0) {
181     assert(dirty_size(localBot, tp) != N - 1, "sanity");
182     TASKQUEUE_STATS_ONLY(stats.record_pop());
183     return true;
184   } else {
185     // Otherwise, the queue contained exactly one element; we take the slow
186     // path.
187 
188     // The barrier is required to prevent reordering the two reads of _age:
189     // one is the _age.get() below, and the other is _age.top() above the if-stmt.
190     // The algorithm may fail if _age.get() reads an older value than _age.top().
191     OrderAccess::loadload();
192     return pop_local_slow(localBot, _age.get());
193   }
194 }
195 
196 template <class E, MEMFLAGS F, unsigned int N>
pop_overflow(E & t)197 bool OverflowTaskQueue<E, F, N>::pop_overflow(E& t)
198 {
199   if (overflow_empty()) return false;
200   t = overflow_stack()->pop();
201   return true;
202 }
203 
204 template<class E, MEMFLAGS F, unsigned int N>
pop_global(volatile E & t)205 bool GenericTaskQueue<E, F, N>::pop_global(volatile E& t) {
206   Age oldAge = _age.get();
207   // Architectures with weak memory model require a barrier here
208   // to guarantee that bottom is not older than age,
209   // which is crucial for the correctness of the algorithm.
210 #if !(defined SPARC || defined IA32 || defined AMD64)
211   OrderAccess::fence();
212 #endif
213   uint localBot = OrderAccess::load_acquire(&_bottom);
214   uint n_elems = size(localBot, oldAge.top());
215   if (n_elems == 0) {
216     return false;
217   }
218 
219   // g++ complains if the volatile result of the assignment is
220   // unused, so we cast the volatile away.  We cannot cast directly
221   // to void, because gcc treats that as not using the result of the
222   // assignment.  However, casting to E& means that we trigger an
223   // unused-value warning.  So, we cast the E& to void.
224   (void) const_cast<E&>(t = _elems[oldAge.top()]);
225   Age newAge(oldAge);
226   newAge.increment();
227   Age resAge = _age.cmpxchg(newAge, oldAge);
228 
229   // Note that using "_bottom" here might fail, since a pop_local might
230   // have decremented it.
231   assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
232   return resAge == oldAge;
233 }
234 
235 template<class T, MEMFLAGS F> bool
steal_best_of_2(uint queue_num,int * seed,E & t)236 GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, int* seed, E& t) {
237   if (_n > 2) {
238     uint k1 = queue_num;
239     while (k1 == queue_num) k1 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
240     uint k2 = queue_num;
241     while (k2 == queue_num || k2 == k1) k2 = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
242     // Sample both and try the larger.
243     uint sz1 = _queues[k1]->size();
244     uint sz2 = _queues[k2]->size();
245     if (sz2 > sz1) return _queues[k2]->pop_global(t);
246     else return _queues[k1]->pop_global(t);
247   } else if (_n == 2) {
248     // Just try the other one.
249     uint k = (queue_num + 1) % 2;
250     return _queues[k]->pop_global(t);
251   } else {
252     assert(_n == 1, "can't be zero.");
253     return false;
254   }
255 }
256 
257 template<class T, MEMFLAGS F> bool
steal(uint queue_num,int * seed,E & t)258 GenericTaskQueueSet<T, F>::steal(uint queue_num, int* seed, E& t) {
259   for (uint i = 0; i < 2 * _n; i++) {
260     if (steal_best_of_2(queue_num, seed, t)) {
261       TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true));
262       return true;
263     }
264   }
265   TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false));
266   return false;
267 }
268 
269 template <unsigned int N, MEMFLAGS F>
cmpxchg(const Age new_age,const Age old_age)270 inline typename TaskQueueSuper<N, F>::Age TaskQueueSuper<N, F>::Age::cmpxchg(const Age new_age, const Age old_age) volatile {
271   return Atomic::cmpxchg(new_age._data, &_data, old_age._data);
272 }
273 
274 template<class E, MEMFLAGS F, unsigned int N>
275 template<class Fn>
iterate(Fn fn)276 inline void GenericTaskQueue<E, F, N>::iterate(Fn fn) {
277   uint iters = size();
278   uint index = _bottom;
279   for (uint i = 0; i < iters; ++i) {
280     index = decrement_index(index);
281     fn(const_cast<E&>(_elems[index])); // cast away volatility
282   }
283 }
284 
285 
286 #endif // SHARE_VM_GC_SHARED_TASKQUEUE_INLINE_HPP
287