1 /*
2  * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #ifndef SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
26 #define SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
27 
28 #include "gc/g1/g1BufferNodeList.hpp"
29 #include "gc/g1/g1FreeIdSet.hpp"
30 #include "gc/g1/g1ConcurrentRefineStats.hpp"
31 #include "gc/shared/ptrQueue.hpp"
32 #include "memory/allocation.hpp"
33 #include "memory/padded.hpp"
34 
35 class G1ConcurrentRefineThread;
36 class G1DirtyCardQueueSet;
37 class G1RedirtyCardsQueueSet;
38 class Thread;
39 
40 // A ptrQueue whose elements are "oops", pointers to object heads.
41 class G1DirtyCardQueue: public PtrQueue {
42   G1ConcurrentRefineStats* _refinement_stats;
43 
44 protected:
45   virtual void handle_completed_buffer();
46 
47 public:
48   G1DirtyCardQueue(G1DirtyCardQueueSet* qset);
49 
50   // Flush before destroying; queue may be used to capture pending work while
51   // doing something else, with auto-flush on completion.
52   ~G1DirtyCardQueue();
53 
54   // Process queue entries and release resources.
55   void flush();
56 
57   inline G1DirtyCardQueueSet* dirty_card_qset() const;
58 
refinement_stats() const59   G1ConcurrentRefineStats* refinement_stats() const {
60     return _refinement_stats;
61   }
62 
63   // To be called by the barrier set's on_thread_detach, to notify this
64   // object of the corresponding state change of its owning thread.
65   void on_thread_detach();
66 
67   // Compiler support.
byte_offset_of_index()68   static ByteSize byte_offset_of_index() {
69     return PtrQueue::byte_offset_of_index<G1DirtyCardQueue>();
70   }
71   using PtrQueue::byte_width_of_index;
72 
byte_offset_of_buf()73   static ByteSize byte_offset_of_buf() {
74     return PtrQueue::byte_offset_of_buf<G1DirtyCardQueue>();
75   }
76   using PtrQueue::byte_width_of_buf;
77 
78 };
79 
80 class G1DirtyCardQueueSet: public PtrQueueSet {
81   // Head and tail of a list of BufferNodes, linked through their next()
82   // fields.  Similar to G1BufferNodeList, but without the _entry_count.
83   struct HeadTail {
84     BufferNode* _head;
85     BufferNode* _tail;
HeadTailG1DirtyCardQueueSet::HeadTail86     HeadTail() : _head(NULL), _tail(NULL) {}
HeadTailG1DirtyCardQueueSet::HeadTail87     HeadTail(BufferNode* head, BufferNode* tail) : _head(head), _tail(tail) {}
88   };
89 
90   // A lock-free FIFO of BufferNodes, linked through their next() fields.
91   // This class has a restriction that pop() may return NULL when there are
92   // buffers in the queue if there is a concurrent push/append operation.
93   class Queue {
94     BufferNode* volatile _head;
95     DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode*));
96     BufferNode* volatile _tail;
97     DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode*));
98 
99     NONCOPYABLE(Queue);
100 
101   public:
Queue()102     Queue() : _head(NULL), _tail(NULL) {}
103     DEBUG_ONLY(~Queue();)
104 
105     // Return the first buffer in the queue.
106     // Thread-safe, but the result may change immediately.
107     BufferNode* top() const;
108 
109     // Thread-safe add the buffer to the end of the queue.
push(BufferNode & node)110     void push(BufferNode& node) { append(node, node); }
111 
112     // Thread-safe add the buffers from first to last to the end of the queue.
113     void append(BufferNode& first, BufferNode& last);
114 
115     // Thread-safe attempt to remove and return the first buffer in the queue.
116     // Returns NULL if the queue is empty, or if a concurrent push/append
117     // interferes.  Uses GlobalCounter critical sections to address the ABA
118     // problem; this works with the buffer allocator's use of GlobalCounter
119     // synchronization.
120     BufferNode* pop();
121 
122     // Take all the buffers from the queue, leaving the queue empty.
123     // Not thread-safe.
124     HeadTail take_all();
125   };
126 
127   // Concurrent refinement may stop processing in the middle of a buffer if
128   // there is a pending safepoint, to avoid long delays to safepoint.  A
129   // partially processed buffer needs to be recorded for processing by the
130   // safepoint if it's a GC safepoint; otherwise it needs to be recorded for
131   // further concurrent refinement work after the safepoint.  But if the
132   // buffer was obtained from the completed buffer queue then it can't simply
133   // be added back to the queue, as that would introduce a new source of ABA
134   // for the queue.
135   //
136   // The PausedBuffer object is used to record such buffers for the upcoming
137   // safepoint, and provides access to the buffers recorded for previous
138   // safepoints.  Before obtaining a buffer from the completed buffers queue,
139   // we first transfer any buffers from previous safepoints to the queue.
140   // This is ABA-safe because threads cannot be in the midst of a queue pop
141   // across a safepoint.
142   //
143   // The paused buffers are conceptually an extension of the completed buffers
144   // queue, and operations which need to deal with all of the queued buffers
145   // (such as concatenate_logs) also need to deal with any paused buffers.  In
146   // general, if a safepoint performs a GC then the paused buffers will be
147   // processed as part of it, and there won't be any paused buffers after a
148   // GC safepoint.
149   class PausedBuffers {
150     class PausedList : public CHeapObj<mtGC> {
151       BufferNode* volatile _head;
152       BufferNode* _tail;
153       size_t _safepoint_id;
154 
155       NONCOPYABLE(PausedList);
156 
157     public:
158       PausedList();
159       DEBUG_ONLY(~PausedList();)
160 
161       // Return true if this list was created to hold buffers for the
162       // next safepoint.
163       // precondition: not at safepoint.
164       bool is_next() const;
165 
166       // Thread-safe add the buffer to the list.
167       // precondition: not at safepoint.
168       // precondition: is_next().
169       void add(BufferNode* node);
170 
171       // Take all the buffers from the list.  Not thread-safe.
172       HeadTail take();
173     };
174 
175     // The most recently created list, which might be for either the next or
176     // a previous safepoint, or might be NULL if the next list hasn't been
177     // created yet.  We only need one list because of the requirement that
178     // threads calling add() must first ensure there are no paused buffers
179     // from a previous safepoint.  There might be many list instances existing
180     // at the same time though; there can be many threads competing to create
181     // and install the next list, and meanwhile there can be a thread dealing
182     // with the previous list.
183     PausedList* volatile _plist;
184     DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(PausedList*));
185 
186     NONCOPYABLE(PausedBuffers);
187 
188   public:
189     PausedBuffers();
190     DEBUG_ONLY(~PausedBuffers();)
191 
192     // Thread-safe add the buffer to paused list for next safepoint.
193     // precondition: not at safepoint.
194     // precondition: does not have paused buffers from a previous safepoint.
195     void add(BufferNode* node);
196 
197     // Thread-safe take all paused buffers for previous safepoints.
198     // precondition: not at safepoint.
199     HeadTail take_previous();
200 
201     // Take all the paused buffers.
202     // precondition: at safepoint.
203     HeadTail take_all();
204   };
205 
206   // The primary refinement thread, for activation when the processing
207   // threshold is reached.  NULL if there aren't any refinement threads.
208   G1ConcurrentRefineThread* _primary_refinement_thread;
209   DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(G1ConcurrentRefineThread*));
210   // Upper bound on the number of cards in the completed and paused buffers.
211   volatile size_t _num_cards;
212   DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(size_t));
213   // Buffers ready for refinement.
214   Queue _completed;           // Has inner padding, including trailer.
215   // Buffers for which refinement is temporarily paused.
216   PausedBuffers _paused;      // Has inner padding, including trailer.
217 
218   G1FreeIdSet _free_ids;
219 
220   // Activation threshold for the primary refinement thread.
221   size_t _process_cards_threshold;
222 
223   // If the queue contains more cards than configured here, the
224   // mutator must start doing some of the concurrent refinement work.
225   size_t _max_cards;
226   volatile size_t _padded_max_cards;
227   static const size_t MaxCardsUnlimited = SIZE_MAX;
228 
229   G1ConcurrentRefineStats _detached_refinement_stats;
230 
231   // Verify _num_cards == sum of cards in the completed queue.
232   void verify_num_cards() const NOT_DEBUG_RETURN;
233 
234   // Thread-safe add a buffer to paused list for next safepoint.
235   // precondition: not at safepoint.
236   void record_paused_buffer(BufferNode* node);
237   void enqueue_paused_buffers_aux(const HeadTail& paused);
238   // Thread-safe transfer paused buffers for previous safepoints to the queue.
239   // precondition: not at safepoint.
240   void enqueue_previous_paused_buffers();
241   // Transfer all paused buffers to the queue.
242   // precondition: at safepoint.
243   void enqueue_all_paused_buffers();
244 
245   void abandon_completed_buffers();
246 
247   // Refine the cards in "node" from its index to buffer_size.
248   // Stops processing if SuspendibleThreadSet::should_yield() is true.
249   // Returns true if the entire buffer was processed, false if there
250   // is a pending yield request.  The node's index is updated to exclude
251   // the processed elements, e.g. up to the element before processing
252   // stopped, or one past the last element if the entire buffer was
253   // processed. Updates stats.
254   bool refine_buffer(BufferNode* node,
255                      uint worker_id,
256                      G1ConcurrentRefineStats* stats);
257 
258   // Deal with buffer after a call to refine_buffer.  If fully processed,
259   // deallocate the buffer.  Otherwise, record it as paused.
260   void handle_refined_buffer(BufferNode* node, bool fully_processed);
261 
262   // Remove and return a completed buffer from the list, or return NULL
263   // if none available.
264   BufferNode* get_completed_buffer();
265 
266 public:
267   G1DirtyCardQueueSet(BufferNode::Allocator* allocator);
268   ~G1DirtyCardQueueSet();
269 
set_primary_refinement_thread(G1ConcurrentRefineThread * thread)270   void set_primary_refinement_thread(G1ConcurrentRefineThread* thread) {
271     _primary_refinement_thread = thread;
272   }
273 
274   // The number of parallel ids that can be claimed to allow collector or
275   // mutator threads to do card-processing work.
276   static uint num_par_ids();
277 
278   static void handle_zero_index_for_thread(Thread* t);
279 
280   virtual void enqueue_completed_buffer(BufferNode* node);
281 
282   // Upper bound on the number of cards currently in in this queue set.
283   // Read without synchronization.  The value may be high because there
284   // is a concurrent modification of the set of buffers.
num_cards() const285   size_t num_cards() const { return _num_cards; }
286 
287   // Get/Set the number of cards that triggers log processing.
288   // Log processing should be done when the number of cards exceeds the
289   // threshold.
set_process_cards_threshold(size_t sz)290   void set_process_cards_threshold(size_t sz) {
291     _process_cards_threshold = sz;
292   }
process_cards_threshold() const293   size_t process_cards_threshold() const {
294     return _process_cards_threshold;
295   }
296   static const size_t ProcessCardsThresholdNever = SIZE_MAX;
297 
298   // Notify the consumer if the number of buffers crossed the threshold
299   void notify_if_necessary();
300 
301   void merge_bufferlists(G1RedirtyCardsQueueSet* src);
302 
303   G1BufferNodeList take_all_completed_buffers();
304 
305   // Helper for G1DirtyCardQueue::handle_completed_buffer().
306   // Enqueue the buffer, and optionally perform refinement by the mutator.
307   // Mutator refinement is only done by Java threads, and only if there
308   // are more than max_cards (possibly padded) cards in the completed
309   // buffers.  Updates stats.
310   //
311   // Mutator refinement, if performed, stops processing a buffer if
312   // SuspendibleThreadSet::should_yield(), recording the incompletely
313   // processed buffer for later processing of the remainder.
314   void handle_completed_buffer(BufferNode* node, G1ConcurrentRefineStats* stats);
315 
316   // If there are more than stop_at cards in the completed buffers, pop
317   // a buffer, refine its contents, and return true.  Otherwise return
318   // false.  Updates stats.
319   //
320   // Stops processing a buffer if SuspendibleThreadSet::should_yield(),
321   // recording the incompletely processed buffer for later processing of
322   // the remainder.
323   bool refine_completed_buffer_concurrently(uint worker_id,
324                                             size_t stop_at,
325                                             G1ConcurrentRefineStats* stats);
326 
327   // If a full collection is happening, reset partial logs, and release
328   // completed ones: the full collection will make them all irrelevant.
329   void abandon_logs();
330 
331   // If any threads have partial logs, add them to the global list of logs.
332   void concatenate_logs();
333 
334   // Return the total of mutator refinement stats for all threads.
335   // Also resets the stats for the threads.
336   // precondition: at safepoint.
337   G1ConcurrentRefineStats get_and_reset_refinement_stats();
338 
339   // Accumulate refinement stats from threads that are detaching.
340   void record_detached_refinement_stats(G1ConcurrentRefineStats* stats);
341 
342   // Threshold for mutator threads to also do refinement when there
343   // are concurrent refinement threads.
344   size_t max_cards() const;
345 
346   // Set threshold for mutator threads to also do refinement.
347   void set_max_cards(size_t value);
348 
349   // Artificially increase mutator refinement threshold.
350   void set_max_cards_padding(size_t padding);
351 
352   // Discard artificial increase of mutator refinement threshold.
353   void discard_max_cards_padding();
354 };
355 
dirty_card_qset() const356 inline G1DirtyCardQueueSet* G1DirtyCardQueue::dirty_card_qset() const {
357   return static_cast<G1DirtyCardQueueSet*>(qset());
358 }
359 
360 #endif // SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
361