1 /*
2 * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
26 #define SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
27
28 #include "gc/g1/g1BufferNodeList.hpp"
29 #include "gc/g1/g1FreeIdSet.hpp"
30 #include "gc/g1/g1ConcurrentRefineStats.hpp"
31 #include "gc/shared/ptrQueue.hpp"
32 #include "memory/allocation.hpp"
33 #include "memory/padded.hpp"
34
35 class G1ConcurrentRefineThread;
36 class G1DirtyCardQueueSet;
37 class G1RedirtyCardsQueueSet;
38 class Thread;
39
40 // A ptrQueue whose elements are "oops", pointers to object heads.
41 class G1DirtyCardQueue: public PtrQueue {
42 G1ConcurrentRefineStats* _refinement_stats;
43
44 protected:
45 virtual void handle_completed_buffer();
46
47 public:
48 G1DirtyCardQueue(G1DirtyCardQueueSet* qset);
49
50 // Flush before destroying; queue may be used to capture pending work while
51 // doing something else, with auto-flush on completion.
52 ~G1DirtyCardQueue();
53
54 // Process queue entries and release resources.
55 void flush();
56
57 inline G1DirtyCardQueueSet* dirty_card_qset() const;
58
refinement_stats() const59 G1ConcurrentRefineStats* refinement_stats() const {
60 return _refinement_stats;
61 }
62
63 // To be called by the barrier set's on_thread_detach, to notify this
64 // object of the corresponding state change of its owning thread.
65 void on_thread_detach();
66
67 // Compiler support.
byte_offset_of_index()68 static ByteSize byte_offset_of_index() {
69 return PtrQueue::byte_offset_of_index<G1DirtyCardQueue>();
70 }
71 using PtrQueue::byte_width_of_index;
72
byte_offset_of_buf()73 static ByteSize byte_offset_of_buf() {
74 return PtrQueue::byte_offset_of_buf<G1DirtyCardQueue>();
75 }
76 using PtrQueue::byte_width_of_buf;
77
78 };
79
80 class G1DirtyCardQueueSet: public PtrQueueSet {
81 // Head and tail of a list of BufferNodes, linked through their next()
82 // fields. Similar to G1BufferNodeList, but without the _entry_count.
83 struct HeadTail {
84 BufferNode* _head;
85 BufferNode* _tail;
HeadTailG1DirtyCardQueueSet::HeadTail86 HeadTail() : _head(NULL), _tail(NULL) {}
HeadTailG1DirtyCardQueueSet::HeadTail87 HeadTail(BufferNode* head, BufferNode* tail) : _head(head), _tail(tail) {}
88 };
89
90 // A lock-free FIFO of BufferNodes, linked through their next() fields.
91 // This class has a restriction that pop() may return NULL when there are
92 // buffers in the queue if there is a concurrent push/append operation.
93 class Queue {
94 BufferNode* volatile _head;
95 DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode*));
96 BufferNode* volatile _tail;
97 DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode*));
98
99 NONCOPYABLE(Queue);
100
101 public:
Queue()102 Queue() : _head(NULL), _tail(NULL) {}
103 DEBUG_ONLY(~Queue();)
104
105 // Return the first buffer in the queue.
106 // Thread-safe, but the result may change immediately.
107 BufferNode* top() const;
108
109 // Thread-safe add the buffer to the end of the queue.
push(BufferNode & node)110 void push(BufferNode& node) { append(node, node); }
111
112 // Thread-safe add the buffers from first to last to the end of the queue.
113 void append(BufferNode& first, BufferNode& last);
114
115 // Thread-safe attempt to remove and return the first buffer in the queue.
116 // Returns NULL if the queue is empty, or if a concurrent push/append
117 // interferes. Uses GlobalCounter critical sections to address the ABA
118 // problem; this works with the buffer allocator's use of GlobalCounter
119 // synchronization.
120 BufferNode* pop();
121
122 // Take all the buffers from the queue, leaving the queue empty.
123 // Not thread-safe.
124 HeadTail take_all();
125 };
126
127 // Concurrent refinement may stop processing in the middle of a buffer if
128 // there is a pending safepoint, to avoid long delays to safepoint. A
129 // partially processed buffer needs to be recorded for processing by the
130 // safepoint if it's a GC safepoint; otherwise it needs to be recorded for
131 // further concurrent refinement work after the safepoint. But if the
132 // buffer was obtained from the completed buffer queue then it can't simply
133 // be added back to the queue, as that would introduce a new source of ABA
134 // for the queue.
135 //
136 // The PausedBuffer object is used to record such buffers for the upcoming
137 // safepoint, and provides access to the buffers recorded for previous
138 // safepoints. Before obtaining a buffer from the completed buffers queue,
139 // we first transfer any buffers from previous safepoints to the queue.
140 // This is ABA-safe because threads cannot be in the midst of a queue pop
141 // across a safepoint.
142 //
143 // The paused buffers are conceptually an extension of the completed buffers
144 // queue, and operations which need to deal with all of the queued buffers
145 // (such as concatenate_logs) also need to deal with any paused buffers. In
146 // general, if a safepoint performs a GC then the paused buffers will be
147 // processed as part of it, and there won't be any paused buffers after a
148 // GC safepoint.
149 class PausedBuffers {
150 class PausedList : public CHeapObj<mtGC> {
151 BufferNode* volatile _head;
152 BufferNode* _tail;
153 size_t _safepoint_id;
154
155 NONCOPYABLE(PausedList);
156
157 public:
158 PausedList();
159 DEBUG_ONLY(~PausedList();)
160
161 // Return true if this list was created to hold buffers for the
162 // next safepoint.
163 // precondition: not at safepoint.
164 bool is_next() const;
165
166 // Thread-safe add the buffer to the list.
167 // precondition: not at safepoint.
168 // precondition: is_next().
169 void add(BufferNode* node);
170
171 // Take all the buffers from the list. Not thread-safe.
172 HeadTail take();
173 };
174
175 // The most recently created list, which might be for either the next or
176 // a previous safepoint, or might be NULL if the next list hasn't been
177 // created yet. We only need one list because of the requirement that
178 // threads calling add() must first ensure there are no paused buffers
179 // from a previous safepoint. There might be many list instances existing
180 // at the same time though; there can be many threads competing to create
181 // and install the next list, and meanwhile there can be a thread dealing
182 // with the previous list.
183 PausedList* volatile _plist;
184 DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(PausedList*));
185
186 NONCOPYABLE(PausedBuffers);
187
188 public:
189 PausedBuffers();
190 DEBUG_ONLY(~PausedBuffers();)
191
192 // Thread-safe add the buffer to paused list for next safepoint.
193 // precondition: not at safepoint.
194 // precondition: does not have paused buffers from a previous safepoint.
195 void add(BufferNode* node);
196
197 // Thread-safe take all paused buffers for previous safepoints.
198 // precondition: not at safepoint.
199 HeadTail take_previous();
200
201 // Take all the paused buffers.
202 // precondition: at safepoint.
203 HeadTail take_all();
204 };
205
206 // The primary refinement thread, for activation when the processing
207 // threshold is reached. NULL if there aren't any refinement threads.
208 G1ConcurrentRefineThread* _primary_refinement_thread;
209 DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(G1ConcurrentRefineThread*));
210 // Upper bound on the number of cards in the completed and paused buffers.
211 volatile size_t _num_cards;
212 DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(size_t));
213 // Buffers ready for refinement.
214 Queue _completed; // Has inner padding, including trailer.
215 // Buffers for which refinement is temporarily paused.
216 PausedBuffers _paused; // Has inner padding, including trailer.
217
218 G1FreeIdSet _free_ids;
219
220 // Activation threshold for the primary refinement thread.
221 size_t _process_cards_threshold;
222
223 // If the queue contains more cards than configured here, the
224 // mutator must start doing some of the concurrent refinement work.
225 size_t _max_cards;
226 volatile size_t _padded_max_cards;
227 static const size_t MaxCardsUnlimited = SIZE_MAX;
228
229 G1ConcurrentRefineStats _detached_refinement_stats;
230
231 // Verify _num_cards == sum of cards in the completed queue.
232 void verify_num_cards() const NOT_DEBUG_RETURN;
233
234 // Thread-safe add a buffer to paused list for next safepoint.
235 // precondition: not at safepoint.
236 void record_paused_buffer(BufferNode* node);
237 void enqueue_paused_buffers_aux(const HeadTail& paused);
238 // Thread-safe transfer paused buffers for previous safepoints to the queue.
239 // precondition: not at safepoint.
240 void enqueue_previous_paused_buffers();
241 // Transfer all paused buffers to the queue.
242 // precondition: at safepoint.
243 void enqueue_all_paused_buffers();
244
245 void abandon_completed_buffers();
246
247 // Refine the cards in "node" from its index to buffer_size.
248 // Stops processing if SuspendibleThreadSet::should_yield() is true.
249 // Returns true if the entire buffer was processed, false if there
250 // is a pending yield request. The node's index is updated to exclude
251 // the processed elements, e.g. up to the element before processing
252 // stopped, or one past the last element if the entire buffer was
253 // processed. Updates stats.
254 bool refine_buffer(BufferNode* node,
255 uint worker_id,
256 G1ConcurrentRefineStats* stats);
257
258 // Deal with buffer after a call to refine_buffer. If fully processed,
259 // deallocate the buffer. Otherwise, record it as paused.
260 void handle_refined_buffer(BufferNode* node, bool fully_processed);
261
262 // Remove and return a completed buffer from the list, or return NULL
263 // if none available.
264 BufferNode* get_completed_buffer();
265
266 public:
267 G1DirtyCardQueueSet(BufferNode::Allocator* allocator);
268 ~G1DirtyCardQueueSet();
269
set_primary_refinement_thread(G1ConcurrentRefineThread * thread)270 void set_primary_refinement_thread(G1ConcurrentRefineThread* thread) {
271 _primary_refinement_thread = thread;
272 }
273
274 // The number of parallel ids that can be claimed to allow collector or
275 // mutator threads to do card-processing work.
276 static uint num_par_ids();
277
278 static void handle_zero_index_for_thread(Thread* t);
279
280 virtual void enqueue_completed_buffer(BufferNode* node);
281
282 // Upper bound on the number of cards currently in in this queue set.
283 // Read without synchronization. The value may be high because there
284 // is a concurrent modification of the set of buffers.
num_cards() const285 size_t num_cards() const { return _num_cards; }
286
287 // Get/Set the number of cards that triggers log processing.
288 // Log processing should be done when the number of cards exceeds the
289 // threshold.
set_process_cards_threshold(size_t sz)290 void set_process_cards_threshold(size_t sz) {
291 _process_cards_threshold = sz;
292 }
process_cards_threshold() const293 size_t process_cards_threshold() const {
294 return _process_cards_threshold;
295 }
296 static const size_t ProcessCardsThresholdNever = SIZE_MAX;
297
298 // Notify the consumer if the number of buffers crossed the threshold
299 void notify_if_necessary();
300
301 void merge_bufferlists(G1RedirtyCardsQueueSet* src);
302
303 G1BufferNodeList take_all_completed_buffers();
304
305 // Helper for G1DirtyCardQueue::handle_completed_buffer().
306 // Enqueue the buffer, and optionally perform refinement by the mutator.
307 // Mutator refinement is only done by Java threads, and only if there
308 // are more than max_cards (possibly padded) cards in the completed
309 // buffers. Updates stats.
310 //
311 // Mutator refinement, if performed, stops processing a buffer if
312 // SuspendibleThreadSet::should_yield(), recording the incompletely
313 // processed buffer for later processing of the remainder.
314 void handle_completed_buffer(BufferNode* node, G1ConcurrentRefineStats* stats);
315
316 // If there are more than stop_at cards in the completed buffers, pop
317 // a buffer, refine its contents, and return true. Otherwise return
318 // false. Updates stats.
319 //
320 // Stops processing a buffer if SuspendibleThreadSet::should_yield(),
321 // recording the incompletely processed buffer for later processing of
322 // the remainder.
323 bool refine_completed_buffer_concurrently(uint worker_id,
324 size_t stop_at,
325 G1ConcurrentRefineStats* stats);
326
327 // If a full collection is happening, reset partial logs, and release
328 // completed ones: the full collection will make them all irrelevant.
329 void abandon_logs();
330
331 // If any threads have partial logs, add them to the global list of logs.
332 void concatenate_logs();
333
334 // Return the total of mutator refinement stats for all threads.
335 // Also resets the stats for the threads.
336 // precondition: at safepoint.
337 G1ConcurrentRefineStats get_and_reset_refinement_stats();
338
339 // Accumulate refinement stats from threads that are detaching.
340 void record_detached_refinement_stats(G1ConcurrentRefineStats* stats);
341
342 // Threshold for mutator threads to also do refinement when there
343 // are concurrent refinement threads.
344 size_t max_cards() const;
345
346 // Set threshold for mutator threads to also do refinement.
347 void set_max_cards(size_t value);
348
349 // Artificially increase mutator refinement threshold.
350 void set_max_cards_padding(size_t padding);
351
352 // Discard artificial increase of mutator refinement threshold.
353 void discard_max_cards_padding();
354 };
355
dirty_card_qset() const356 inline G1DirtyCardQueueSet* G1DirtyCardQueue::dirty_card_qset() const {
357 return static_cast<G1DirtyCardQueueSet*>(qset());
358 }
359
360 #endif // SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
361