1 /*
2  * Copyright (c) 2001, 2021, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #ifndef SHARE_GC_G1_G1CONCURRENTMARK_HPP
26 #define SHARE_GC_G1_G1CONCURRENTMARK_HPP
27 
28 #include "gc/g1/g1ConcurrentMarkBitMap.hpp"
29 #include "gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp"
30 #include "gc/g1/g1HeapVerifier.hpp"
31 #include "gc/g1/g1RegionMarkStatsCache.hpp"
32 #include "gc/g1/heapRegionSet.hpp"
33 #include "gc/shared/gcCause.hpp"
34 #include "gc/shared/taskTerminator.hpp"
35 #include "gc/shared/taskqueue.hpp"
36 #include "gc/shared/verifyOption.hpp"
37 #include "gc/shared/workgroup.hpp"
38 #include "memory/allocation.hpp"
39 #include "utilities/compilerWarnings.hpp"
40 #include "utilities/numberSeq.hpp"
41 
42 class ConcurrentGCTimer;
43 class G1ConcurrentMarkThread;
44 class G1CollectedHeap;
45 class G1CMOopClosure;
46 class G1CMTask;
47 class G1ConcurrentMark;
48 class G1OldTracer;
49 class G1RegionToSpaceMapper;
50 class G1SurvivorRegions;
51 class ThreadClosure;
52 
53 // This is a container class for either an oop or a continuation address for
54 // mark stack entries. Both are pushed onto the mark stack.
55 class G1TaskQueueEntry {
56 private:
57   void* _holder;
58 
59   static const uintptr_t ArraySliceBit = 1;
60 
G1TaskQueueEntry(oop obj)61   G1TaskQueueEntry(oop obj) : _holder(obj) {
62     assert(_holder != NULL, "Not allowed to set NULL task queue element");
63   }
G1TaskQueueEntry(HeapWord * addr)64   G1TaskQueueEntry(HeapWord* addr) : _holder((void*)((uintptr_t)addr | ArraySliceBit)) { }
65 public:
66 
G1TaskQueueEntry()67   G1TaskQueueEntry() : _holder(NULL) { }
68   // Trivially copyable, for use in GenericTaskQueue.
69 
from_slice(HeapWord * what)70   static G1TaskQueueEntry from_slice(HeapWord* what) { return G1TaskQueueEntry(what); }
from_oop(oop obj)71   static G1TaskQueueEntry from_oop(oop obj) { return G1TaskQueueEntry(obj); }
72 
obj() const73   oop obj() const {
74     assert(!is_array_slice(), "Trying to read array slice " PTR_FORMAT " as oop", p2i(_holder));
75     return cast_to_oop(_holder);
76   }
77 
slice() const78   HeapWord* slice() const {
79     assert(is_array_slice(), "Trying to read oop " PTR_FORMAT " as array slice", p2i(_holder));
80     return (HeapWord*)((uintptr_t)_holder & ~ArraySliceBit);
81   }
82 
is_oop() const83   bool is_oop() const { return !is_array_slice(); }
is_array_slice() const84   bool is_array_slice() const { return ((uintptr_t)_holder & ArraySliceBit) != 0; }
is_null() const85   bool is_null() const { return _holder == NULL; }
86 };
87 
88 typedef GenericTaskQueue<G1TaskQueueEntry, mtGC> G1CMTaskQueue;
89 typedef GenericTaskQueueSet<G1CMTaskQueue, mtGC> G1CMTaskQueueSet;
90 
91 // Closure used by CM during concurrent reference discovery
92 // and reference processing (during remarking) to determine
93 // if a particular object is alive. It is primarily used
94 // to determine if referents of discovered reference objects
95 // are alive. An instance is also embedded into the
96 // reference processor as the _is_alive_non_header field
97 class G1CMIsAliveClosure : public BoolObjectClosure {
98   G1CollectedHeap* _g1h;
99 public:
G1CMIsAliveClosure(G1CollectedHeap * g1h)100   G1CMIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) { }
101   bool do_object_b(oop obj);
102 };
103 
104 class G1CMSubjectToDiscoveryClosure : public BoolObjectClosure {
105   G1CollectedHeap* _g1h;
106 public:
G1CMSubjectToDiscoveryClosure(G1CollectedHeap * g1h)107   G1CMSubjectToDiscoveryClosure(G1CollectedHeap* g1h) : _g1h(g1h) { }
108   bool do_object_b(oop obj);
109 };
110 
111 // Represents the overflow mark stack used by concurrent marking.
112 //
113 // Stores oops in a huge buffer in virtual memory that is always fully committed.
114 // Resizing may only happen during a STW pause when the stack is empty.
115 //
116 // Memory is allocated on a "chunk" basis, i.e. a set of oops. For this, the mark
117 // stack memory is split into evenly sized chunks of oops. Users can only
118 // add or remove entries on that basis.
119 // Chunks are filled in increasing address order. Not completely filled chunks
120 // have a NULL element as a terminating element.
121 //
122 // Every chunk has a header containing a single pointer element used for memory
123 // management. This wastes some space, but is negligible (< .1% with current sizing).
124 //
125 // Memory management is done using a mix of tracking a high water-mark indicating
126 // that all chunks at a lower address are valid chunks, and a singly linked free
127 // list connecting all empty chunks.
128 class G1CMMarkStack {
129 public:
130   // Number of TaskQueueEntries that can fit in a single chunk.
131   static const size_t EntriesPerChunk = 1024 - 1 /* One reference for the next pointer */;
132 private:
133   struct TaskQueueEntryChunk {
134     TaskQueueEntryChunk* next;
135     G1TaskQueueEntry data[EntriesPerChunk];
136   };
137 
138   size_t _max_chunk_capacity;    // Maximum number of TaskQueueEntryChunk elements on the stack.
139 
140   TaskQueueEntryChunk* _base;    // Bottom address of allocated memory area.
141   size_t _chunk_capacity;        // Current maximum number of TaskQueueEntryChunk elements.
142 
143   char _pad0[DEFAULT_CACHE_LINE_SIZE];
144   TaskQueueEntryChunk* volatile _free_list;  // Linked list of free chunks that can be allocated by users.
145   char _pad1[DEFAULT_CACHE_LINE_SIZE - sizeof(TaskQueueEntryChunk*)];
146   TaskQueueEntryChunk* volatile _chunk_list; // List of chunks currently containing data.
147   volatile size_t _chunks_in_chunk_list;
148   char _pad2[DEFAULT_CACHE_LINE_SIZE - sizeof(TaskQueueEntryChunk*) - sizeof(size_t)];
149 
150   volatile size_t _hwm;          // High water mark within the reserved space.
151   char _pad4[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)];
152 
153   // Allocate a new chunk from the reserved memory, using the high water mark. Returns
154   // NULL if out of memory.
155   TaskQueueEntryChunk* allocate_new_chunk();
156 
157   // Atomically add the given chunk to the list.
158   void add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem);
159   // Atomically remove and return a chunk from the given list. Returns NULL if the
160   // list is empty.
161   TaskQueueEntryChunk* remove_chunk_from_list(TaskQueueEntryChunk* volatile* list);
162 
163   void add_chunk_to_chunk_list(TaskQueueEntryChunk* elem);
164   void add_chunk_to_free_list(TaskQueueEntryChunk* elem);
165 
166   TaskQueueEntryChunk* remove_chunk_from_chunk_list();
167   TaskQueueEntryChunk* remove_chunk_from_free_list();
168 
169   // Resizes the mark stack to the given new capacity. Releases any previous
170   // memory if successful.
171   bool resize(size_t new_capacity);
172 
173  public:
174   G1CMMarkStack();
175   ~G1CMMarkStack();
176 
177   // Alignment and minimum capacity of this mark stack in number of oops.
178   static size_t capacity_alignment();
179 
180   // Allocate and initialize the mark stack with the given number of oops.
181   bool initialize(size_t initial_capacity, size_t max_capacity);
182 
183   // Pushes the given buffer containing at most EntriesPerChunk elements on the mark
184   // stack. If less than EntriesPerChunk elements are to be pushed, the array must
185   // be terminated with a NULL.
186   // Returns whether the buffer contents were successfully pushed to the global mark
187   // stack.
188   bool par_push_chunk(G1TaskQueueEntry* buffer);
189 
190   // Pops a chunk from this mark stack, copying them into the given buffer. This
191   // chunk may contain up to EntriesPerChunk elements. If there are less, the last
192   // element in the array is a NULL pointer.
193   bool par_pop_chunk(G1TaskQueueEntry* buffer);
194 
195   // Return whether the chunk list is empty. Racy due to unsynchronized access to
196   // _chunk_list.
is_empty() const197   bool is_empty() const { return _chunk_list == NULL; }
198 
capacity() const199   size_t capacity() const  { return _chunk_capacity; }
200 
201   // Expand the stack, typically in response to an overflow condition
202   void expand();
203 
204   // Return the approximate number of oops on this mark stack. Racy due to
205   // unsynchronized access to _chunks_in_chunk_list.
size() const206   size_t size() const { return _chunks_in_chunk_list * EntriesPerChunk; }
207 
208   void set_empty();
209 
210   // Apply Fn to every oop on the mark stack. The mark stack must not
211   // be modified while iterating.
212   template<typename Fn> void iterate(Fn fn) const PRODUCT_RETURN;
213 };
214 
215 // Root MemRegions are memory areas that contain objects which references are
216 // roots wrt to the marking. They must be scanned before marking to maintain the
217 // SATB invariant.
218 // Typically they contain the areas from nTAMS to top of the regions.
219 // We could scan and mark through these objects during the concurrent start pause,
220 // but for pause time reasons we move this work to the concurrent phase.
221 // We need to complete this procedure before the next GC because it might determine
222 // that some of these "root objects" are dead, potentially dropping some required
223 // references.
224 // Root MemRegions comprise of the contents of survivor regions at the end
225 // of the GC, and any objects copied into the old gen during GC.
226 class G1CMRootMemRegions {
227   // The set of root MemRegions.
228   MemRegion*   _root_regions;
229   size_t const _max_regions;
230 
231   volatile size_t _num_root_regions; // Actual number of root regions.
232 
233   volatile size_t _claimed_root_regions; // Number of root regions currently claimed.
234 
235   volatile bool _scan_in_progress;
236   volatile bool _should_abort;
237 
238   void notify_scan_done();
239 
240 public:
241   G1CMRootMemRegions(uint const max_regions);
242   ~G1CMRootMemRegions();
243 
244   // Reset the data structure to allow addition of new root regions.
245   void reset();
246 
247   void add(HeapWord* start, HeapWord* end);
248 
249   // Reset the claiming / scanning of the root regions.
250   void prepare_for_scan();
251 
252   // Forces get_next() to return NULL so that the iteration aborts early.
abort()253   void abort() { _should_abort = true; }
254 
255   // Return true if the CM thread are actively scanning root regions,
256   // false otherwise.
scan_in_progress()257   bool scan_in_progress() { return _scan_in_progress; }
258 
259   // Claim the next root MemRegion to scan atomically, or return NULL if
260   // all have been claimed.
261   const MemRegion* claim_next();
262 
263   // The number of root regions to scan.
264   uint num_root_regions() const;
265 
266   void cancel_scan();
267 
268   // Flag that we're done with root region scanning and notify anyone
269   // who's waiting on it. If aborted is false, assume that all regions
270   // have been claimed.
271   void scan_finished();
272 
273   // If CM threads are still scanning root regions, wait until they
274   // are done. Return true if we had to wait, false otherwise.
275   bool wait_until_scan_finished();
276 };
277 
278 // This class manages data structures and methods for doing liveness analysis in
279 // G1's concurrent cycle.
280 class G1ConcurrentMark : public CHeapObj<mtGC> {
281   friend class G1CMBitMapClosure;
282   friend class G1CMConcurrentMarkingTask;
283   friend class G1CMDrainMarkingStackClosure;
284   friend class G1CMKeepAliveAndDrainClosure;
285   friend class G1CMRefProcProxyTask;
286   friend class G1CMRemarkTask;
287   friend class G1CMTask;
288   friend class G1ConcurrentMarkThread;
289 
290   G1ConcurrentMarkThread* _cm_thread;     // The thread doing the work
291   G1CollectedHeap*        _g1h;           // The heap
292 
293   // Concurrent marking support structures
294   G1CMBitMap              _mark_bitmap_1;
295   G1CMBitMap              _mark_bitmap_2;
296   G1CMBitMap*             _prev_mark_bitmap; // Completed mark bitmap
297   G1CMBitMap*             _next_mark_bitmap; // Under-construction mark bitmap
298 
299   // Heap bounds
300   MemRegion const         _heap;
301 
302   // Root region tracking and claiming
303   G1CMRootMemRegions      _root_regions;
304 
305   // For grey objects
306   G1CMMarkStack           _global_mark_stack; // Grey objects behind global finger
307   HeapWord* volatile      _finger;            // The global finger, region aligned,
308                                               // always pointing to the end of the
309                                               // last claimed region
310 
311   uint                    _worker_id_offset;
312   uint                    _max_num_tasks;    // Maximum number of marking tasks
313   uint                    _num_active_tasks; // Number of tasks currently active
314   G1CMTask**              _tasks;            // Task queue array (max_worker_id length)
315 
316   G1CMTaskQueueSet*       _task_queues; // Task queue set
317   TaskTerminator          _terminator;  // For termination
318 
319   // Two sync barriers that are used to synchronize tasks when an
320   // overflow occurs. The algorithm is the following. All tasks enter
321   // the first one to ensure that they have all stopped manipulating
322   // the global data structures. After they exit it, they re-initialize
323   // their data structures and task 0 re-initializes the global data
324   // structures. Then, they enter the second sync barrier. This
325   // ensure, that no task starts doing work before all data
326   // structures (local and global) have been re-initialized. When they
327   // exit it, they are free to start working again.
328   WorkGangBarrierSync     _first_overflow_barrier_sync;
329   WorkGangBarrierSync     _second_overflow_barrier_sync;
330 
331   // This is set by any task, when an overflow on the global data
332   // structures is detected
333   volatile bool           _has_overflown;
334   // True: marking is concurrent, false: we're in remark
335   volatile bool           _concurrent;
336   // Set at the end of a Full GC so that marking aborts
337   volatile bool           _has_aborted;
338 
339   // Used when remark aborts due to an overflow to indicate that
340   // another concurrent marking phase should start
341   volatile bool           _restart_for_overflow;
342 
343   ConcurrentGCTimer*      _gc_timer_cm;
344 
345   G1OldTracer*            _gc_tracer_cm;
346 
347   // Timing statistics. All of them are in ms
348   NumberSeq _init_times;
349   NumberSeq _remark_times;
350   NumberSeq _remark_mark_times;
351   NumberSeq _remark_weak_ref_times;
352   NumberSeq _cleanup_times;
353   double    _total_cleanup_time;
354 
355   double*   _accum_task_vtime;   // Accumulated task vtime
356 
357   WorkGang* _concurrent_workers;
358   uint      _num_concurrent_workers; // The number of marking worker threads we're using
359   uint      _max_concurrent_workers; // Maximum number of marking worker threads
360 
361   void verify_during_pause(G1HeapVerifier::G1VerifyType type, VerifyOption vo, const char* caller);
362 
363   void finalize_marking();
364 
365   void weak_refs_work_parallel_part(BoolObjectClosure* is_alive, bool purged_classes);
366   void weak_refs_work(bool clear_all_soft_refs);
367 
368   void report_object_count(bool mark_completed);
369 
370   void reclaim_empty_regions();
371 
372   // After reclaiming empty regions, update heap sizes.
373   void compute_new_sizes();
374 
375   // Clear statistics gathered during the concurrent cycle for the given region after
376   // it has been reclaimed.
377   void clear_statistics(HeapRegion* r);
378 
379   // Resets the global marking data structures, as well as the
380   // task local ones; should be called during concurrent start.
381   void reset();
382 
383   // Resets all the marking data structures. Called when we have to restart
384   // marking or when marking completes (via set_non_marking_state below).
385   void reset_marking_for_restart();
386 
387   // We do this after we're done with marking so that the marking data
388   // structures are initialized to a sensible and predictable state.
389   void reset_at_marking_complete();
390 
391   // Called to indicate how many threads are currently active.
392   void set_concurrency(uint active_tasks);
393 
394   // Should be called to indicate which phase we're in (concurrent
395   // mark or remark) and how many threads are currently active.
396   void set_concurrency_and_phase(uint active_tasks, bool concurrent);
397 
398   // Prints all gathered CM-related statistics
399   void print_stats();
400 
finger()401   HeapWord*           finger()       { return _finger;   }
concurrent()402   bool                concurrent()   { return _concurrent; }
active_tasks()403   uint                active_tasks() { return _num_active_tasks; }
terminator()404   TaskTerminator*     terminator()   { return &_terminator; }
405 
406   // Claims the next available region to be scanned by a marking
407   // task/thread. It might return NULL if the next region is empty or
408   // we have run out of regions. In the latter case, out_of_regions()
409   // determines whether we've really run out of regions or the task
410   // should call claim_region() again. This might seem a bit
411   // awkward. Originally, the code was written so that claim_region()
412   // either successfully returned with a non-empty region or there
413   // were no more regions to be claimed. The problem with this was
414   // that, in certain circumstances, it iterated over large chunks of
415   // the heap finding only empty regions and, while it was working, it
416   // was preventing the calling task to call its regular clock
417   // method. So, this way, each task will spend very little time in
418   // claim_region() and is allowed to call the regular clock method
419   // frequently.
420   HeapRegion* claim_region(uint worker_id);
421 
422   // Determines whether we've run out of regions to scan. Note that
423   // the finger can point past the heap end in case the heap was expanded
424   // to satisfy an allocation without doing a GC. This is fine, because all
425   // objects in those regions will be considered live anyway because of
426   // SATB guarantees (i.e. their TAMS will be equal to bottom).
out_of_regions()427   bool out_of_regions() { return _finger >= _heap.end(); }
428 
429   // Returns the task with the given id
task(uint id)430   G1CMTask* task(uint id) {
431     // During concurrent start we use the parallel gc threads to do some work, so
432     // we can only compare against _max_num_tasks.
433     assert(id < _max_num_tasks, "Task id %u not within bounds up to %u", id, _max_num_tasks);
434     return _tasks[id];
435   }
436 
437   // Access / manipulation of the overflow flag which is set to
438   // indicate that the global stack has overflown
has_overflown()439   bool has_overflown()           { return _has_overflown; }
set_has_overflown()440   void set_has_overflown()       { _has_overflown = true; }
clear_has_overflown()441   void clear_has_overflown()     { _has_overflown = false; }
restart_for_overflow()442   bool restart_for_overflow()    { return _restart_for_overflow; }
443 
444   // Methods to enter the two overflow sync barriers
445   void enter_first_sync_barrier(uint worker_id);
446   void enter_second_sync_barrier(uint worker_id);
447 
448   // Clear the given bitmap in parallel using the given WorkGang. If may_yield is
449   // true, periodically insert checks to see if this method should exit prematurely.
450   void clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield);
451 
452   // Region statistics gathered during marking.
453   G1RegionMarkStats* _region_mark_stats;
454   // Top pointer for each region at the start of the rebuild remembered set process
455   // for regions which remembered sets need to be rebuilt. A NULL for a given region
456   // means that this region does not be scanned during the rebuilding remembered
457   // set phase at all.
458   HeapWord* volatile* _top_at_rebuild_starts;
459   // True when Remark pause selected regions for rebuilding.
460   bool _needs_remembered_set_rebuild;
461 public:
462   void add_to_liveness(uint worker_id, oop const obj, size_t size);
463   // Live words in the given region as determined by concurrent marking, i.e. the amount of
464   // live words between bottom and nTAMS.
live_words(uint region) const465   size_t live_words(uint region) const { return _region_mark_stats[region]._live_words; }
466   // Returns the liveness value in bytes.
live_bytes(uint region) const467   size_t live_bytes(uint region) const { return live_words(region) * HeapWordSize; }
468 
469   // Sets the internal top_at_region_start for the given region to current top of the region.
470   inline void update_top_at_rebuild_start(HeapRegion* r);
471   // TARS for the given region during remembered set rebuilding.
472   inline HeapWord* top_at_rebuild_start(uint region) const;
473 
474   // Clear statistics gathered during the concurrent cycle for the given region after
475   // it has been reclaimed.
476   void clear_statistics_in_region(uint region_idx);
477   // Notification for eagerly reclaimed regions to clean up.
478   void humongous_object_eagerly_reclaimed(HeapRegion* r);
479   // Manipulation of the global mark stack.
480   // The push and pop operations are used by tasks for transfers
481   // between task-local queues and the global mark stack.
mark_stack_push(G1TaskQueueEntry * arr)482   bool mark_stack_push(G1TaskQueueEntry* arr) {
483     if (!_global_mark_stack.par_push_chunk(arr)) {
484       set_has_overflown();
485       return false;
486     }
487     return true;
488   }
mark_stack_pop(G1TaskQueueEntry * arr)489   bool mark_stack_pop(G1TaskQueueEntry* arr) {
490     return _global_mark_stack.par_pop_chunk(arr);
491   }
mark_stack_size() const492   size_t mark_stack_size() const                { return _global_mark_stack.size(); }
partial_mark_stack_size_target() const493   size_t partial_mark_stack_size_target() const { return _global_mark_stack.capacity() / 3; }
mark_stack_empty() const494   bool mark_stack_empty() const                 { return _global_mark_stack.is_empty(); }
495 
root_regions()496   G1CMRootMemRegions* root_regions() { return &_root_regions; }
497 
498   void concurrent_cycle_start();
499   // Abandon current marking iteration due to a Full GC.
500   void concurrent_cycle_abort();
501   void concurrent_cycle_end();
502 
update_accum_task_vtime(int i,double vtime)503   void update_accum_task_vtime(int i, double vtime) {
504     _accum_task_vtime[i] += vtime;
505   }
506 
all_task_accum_vtime()507   double all_task_accum_vtime() {
508     double ret = 0.0;
509     for (uint i = 0; i < _max_num_tasks; ++i)
510       ret += _accum_task_vtime[i];
511     return ret;
512   }
513 
514   // Attempts to steal an object from the task queues of other tasks
515   bool try_stealing(uint worker_id, G1TaskQueueEntry& task_entry);
516 
517   G1ConcurrentMark(G1CollectedHeap* g1h,
518                    G1RegionToSpaceMapper* prev_bitmap_storage,
519                    G1RegionToSpaceMapper* next_bitmap_storage);
520   ~G1ConcurrentMark();
521 
cm_thread()522   G1ConcurrentMarkThread* cm_thread() { return _cm_thread; }
523 
prev_mark_bitmap() const524   const G1CMBitMap* const prev_mark_bitmap() const { return _prev_mark_bitmap; }
next_mark_bitmap() const525   G1CMBitMap* next_mark_bitmap() const { return _next_mark_bitmap; }
526 
527   // Calculates the number of concurrent GC threads to be used in the marking phase.
528   uint calc_active_marking_workers();
529 
530   // Moves all per-task cached data into global state.
531   void flush_all_task_caches();
532   // Prepare internal data structures for the next mark cycle. This includes clearing
533   // the next mark bitmap and some internal data structures. This method is intended
534   // to be called concurrently to the mutator. It will yield to safepoint requests.
535   void cleanup_for_next_mark();
536 
537   // Clear the next marking bitmap during safepoint.
538   void clear_next_bitmap(WorkGang* workers);
539 
540   // These two methods do the work that needs to be done at the start and end of the
541   // concurrent start pause.
542   void pre_concurrent_start(GCCause::Cause cause);
543   void post_concurrent_mark_start();
544   void post_concurrent_undo_start();
545 
546   // Scan all the root regions and mark everything reachable from
547   // them.
548   void scan_root_regions();
549 
550   // Scan a single root MemRegion to mark everything reachable from it.
551   void scan_root_region(const MemRegion* region, uint worker_id);
552 
553   // Do concurrent phase of marking, to a tentative transitive closure.
554   void mark_from_roots();
555 
556   // Do concurrent preclean work.
557   void preclean();
558 
559   void remark();
560 
561   void swap_mark_bitmaps();
562 
563   void cleanup();
564   // Mark in the previous bitmap. Caution: the prev bitmap is usually read-only, so use
565   // this carefully.
566   inline void mark_in_prev_bitmap(oop p);
567 
568   // Clears marks for all objects in the given range, for the prev or
569   // next bitmaps.  Caution: the previous bitmap is usually
570   // read-only, so use this carefully!
571   void clear_range_in_prev_bitmap(MemRegion mr);
572 
573   inline bool is_marked_in_prev_bitmap(oop p) const;
574 
575   // Verify that there are no collection set oops on the stacks (taskqueues /
576   // global mark stack) and fingers (global / per-task).
577   // If marking is not in progress, it's a no-op.
578   void verify_no_collection_set_oops() PRODUCT_RETURN;
579 
580   inline bool do_yield_check();
581 
has_aborted()582   bool has_aborted()      { return _has_aborted; }
583 
584   void print_summary_info();
585 
586   void threads_do(ThreadClosure* tc) const;
587 
588   void print_on_error(outputStream* st) const;
589 
590   // Mark the given object on the next bitmap if it is below nTAMS.
591   inline bool mark_in_next_bitmap(uint worker_id, HeapRegion* const hr, oop const obj);
592   inline bool mark_in_next_bitmap(uint worker_id, oop const obj);
593 
594   inline bool is_marked_in_next_bitmap(oop p) const;
595 
gc_timer_cm() const596   ConcurrentGCTimer* gc_timer_cm() const { return _gc_timer_cm; }
597 
598 private:
599   // Rebuilds the remembered sets for chosen regions in parallel and concurrently to the application.
600   void rebuild_rem_set_concurrently();
601 
needs_remembered_set_rebuild() const602   uint needs_remembered_set_rebuild() const { return _needs_remembered_set_rebuild; }
603 
604 };
605 
606 // A class representing a marking task.
607 class G1CMTask : public TerminatorTerminator {
608 private:
609   enum PrivateConstants {
610     // The regular clock call is called once the scanned words reaches
611     // this limit
612     words_scanned_period          = 12*1024,
613     // The regular clock call is called once the number of visited
614     // references reaches this limit
615     refs_reached_period           = 1024,
616     // Initial value for the hash seed, used in the work stealing code
617     init_hash_seed                = 17
618   };
619 
620   G1CMObjArrayProcessor       _objArray_processor;
621 
622   uint                        _worker_id;
623   G1CollectedHeap*            _g1h;
624   G1ConcurrentMark*           _cm;
625   G1CMBitMap*                 _next_mark_bitmap;
626   // the task queue of this task
627   G1CMTaskQueue*              _task_queue;
628 
629   G1RegionMarkStatsCache      _mark_stats_cache;
630   // Number of calls to this task
631   uint                        _calls;
632 
633   // When the virtual timer reaches this time, the marking step should exit
634   double                      _time_target_ms;
635   // Start time of the current marking step
636   double                      _start_time_ms;
637 
638   // Oop closure used for iterations over oops
639   G1CMOopClosure*             _cm_oop_closure;
640 
641   // Region this task is scanning, NULL if we're not scanning any
642   HeapRegion*                 _curr_region;
643   // Local finger of this task, NULL if we're not scanning a region
644   HeapWord*                   _finger;
645   // Limit of the region this task is scanning, NULL if we're not scanning one
646   HeapWord*                   _region_limit;
647 
648   // Number of words this task has scanned
649   size_t                      _words_scanned;
650   // When _words_scanned reaches this limit, the regular clock is
651   // called. Notice that this might be decreased under certain
652   // circumstances (i.e. when we believe that we did an expensive
653   // operation).
654   size_t                      _words_scanned_limit;
655   // Initial value of _words_scanned_limit (i.e. what it was
656   // before it was decreased).
657   size_t                      _real_words_scanned_limit;
658 
659   // Number of references this task has visited
660   size_t                      _refs_reached;
661   // When _refs_reached reaches this limit, the regular clock is
662   // called. Notice this this might be decreased under certain
663   // circumstances (i.e. when we believe that we did an expensive
664   // operation).
665   size_t                      _refs_reached_limit;
666   // Initial value of _refs_reached_limit (i.e. what it was before
667   // it was decreased).
668   size_t                      _real_refs_reached_limit;
669 
670   // If true, then the task has aborted for some reason
671   bool                        _has_aborted;
672   // Set when the task aborts because it has met its time quota
673   bool                        _has_timed_out;
674   // True when we're draining SATB buffers; this avoids the task
675   // aborting due to SATB buffers being available (as we're already
676   // dealing with them)
677   bool                        _draining_satb_buffers;
678 
679   // Number sequence of past step times
680   NumberSeq                   _step_times_ms;
681   // Elapsed time of this task
682   double                      _elapsed_time_ms;
683   // Termination time of this task
684   double                      _termination_time_ms;
685   // When this task got into the termination protocol
686   double                      _termination_start_time_ms;
687 
688   TruncatedSeq                _marking_step_diff_ms;
689 
690   // Updates the local fields after this task has claimed
691   // a new region to scan
692   void setup_for_region(HeapRegion* hr);
693   // Makes the limit of the region up-to-date
694   void update_region_limit();
695 
696   // Called when either the words scanned or the refs visited limit
697   // has been reached
698   void reached_limit();
699   // Recalculates the words scanned and refs visited limits
700   void recalculate_limits();
701   // Decreases the words scanned and refs visited limits when we reach
702   // an expensive operation
703   void decrease_limits();
704   // Checks whether the words scanned or refs visited reached their
705   // respective limit and calls reached_limit() if they have
check_limits()706   void check_limits() {
707     if (_words_scanned >= _words_scanned_limit ||
708         _refs_reached >= _refs_reached_limit) {
709       reached_limit();
710     }
711   }
712   // Supposed to be called regularly during a marking step as
713   // it checks a bunch of conditions that might cause the marking step
714   // to abort
715   // Return true if the marking step should continue. Otherwise, return false to abort
716   bool regular_clock_call();
717 
718   // Set abort flag if regular_clock_call() check fails
719   inline void abort_marking_if_regular_check_fail();
720 
721   // Test whether obj might have already been passed over by the
722   // mark bitmap scan, and so needs to be pushed onto the mark stack.
723   bool is_below_finger(oop obj, HeapWord* global_finger) const;
724 
725   template<bool scan> void process_grey_task_entry(G1TaskQueueEntry task_entry);
726 public:
727   // Apply the closure on the given area of the objArray. Return the number of words
728   // scanned.
729   inline size_t scan_objArray(objArrayOop obj, MemRegion mr);
730   // Resets the task; should be called right at the beginning of a marking phase.
731   void reset(G1CMBitMap* next_mark_bitmap);
732   // Clears all the fields that correspond to a claimed region.
733   void clear_region_fields();
734 
735   // The main method of this class which performs a marking step
736   // trying not to exceed the given duration. However, it might exit
737   // prematurely, according to some conditions (i.e. SATB buffers are
738   // available for processing).
739   void do_marking_step(double target_ms,
740                        bool do_termination,
741                        bool is_serial);
742 
743   // These two calls start and stop the timer
record_start_time()744   void record_start_time() {
745     _elapsed_time_ms = os::elapsedTime() * 1000.0;
746   }
record_end_time()747   void record_end_time() {
748     _elapsed_time_ms = os::elapsedTime() * 1000.0 - _elapsed_time_ms;
749   }
750 
751   // Returns the worker ID associated with this task.
worker_id()752   uint worker_id() { return _worker_id; }
753 
754   // From TerminatorTerminator. It determines whether this task should
755   // exit the termination protocol after it's entered it.
756   virtual bool should_exit_termination();
757 
758   // Resets the local region fields after a task has finished scanning a
759   // region; or when they have become stale as a result of the region
760   // being evacuated.
761   void giveup_current_region();
762 
finger()763   HeapWord* finger()            { return _finger; }
764 
has_aborted()765   bool has_aborted()            { return _has_aborted; }
set_has_aborted()766   void set_has_aborted()        { _has_aborted = true; }
clear_has_aborted()767   void clear_has_aborted()      { _has_aborted = false; }
768 
769   void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
770 
771   // Increment the number of references this task has visited.
increment_refs_reached()772   void increment_refs_reached() { ++_refs_reached; }
773 
774   // Grey the object by marking it.  If not already marked, push it on
775   // the local queue if below the finger. obj is required to be below its region's NTAMS.
776   // Returns whether there has been a mark to the bitmap.
777   inline bool make_reference_grey(oop obj);
778 
779   // Grey the object (by calling make_grey_reference) if required,
780   // e.g. obj is below its containing region's NTAMS.
781   // Precondition: obj is a valid heap object.
782   // Returns true if the reference caused a mark to be set in the next bitmap.
783   template <class T>
784   inline bool deal_with_reference(T* p);
785 
786   // Scans an object and visits its children.
787   inline void scan_task_entry(G1TaskQueueEntry task_entry);
788 
789   // Pushes an object on the local queue.
790   inline void push(G1TaskQueueEntry task_entry);
791 
792   // Move entries to the global stack.
793   void move_entries_to_global_stack();
794   // Move entries from the global stack, return true if we were successful to do so.
795   bool get_entries_from_global_stack();
796 
797   // Pops and scans objects from the local queue. If partially is
798   // true, then it stops when the queue size is of a given limit. If
799   // partially is false, then it stops when the queue is empty.
800   void drain_local_queue(bool partially);
801   // Moves entries from the global stack to the local queue and
802   // drains the local queue. If partially is true, then it stops when
803   // both the global stack and the local queue reach a given size. If
804   // partially if false, it tries to empty them totally.
805   void drain_global_stack(bool partially);
806   // Keeps picking SATB buffers and processing them until no SATB
807   // buffers are available.
808   void drain_satb_buffers();
809 
810   // Moves the local finger to a new location
move_finger_to(HeapWord * new_finger)811   inline void move_finger_to(HeapWord* new_finger) {
812     assert(new_finger >= _finger && new_finger < _region_limit, "invariant");
813     _finger = new_finger;
814   }
815 
816   G1CMTask(uint worker_id,
817            G1ConcurrentMark *cm,
818            G1CMTaskQueue* task_queue,
819            G1RegionMarkStats* mark_stats);
820 
821   inline void update_liveness(oop const obj, size_t const obj_size);
822 
823   // Clear (without flushing) the mark cache entry for the given region.
824   void clear_mark_stats_cache(uint region_idx);
825   // Evict the whole statistics cache into the global statistics. Returns the
826   // number of cache hits and misses so far.
827   Pair<size_t, size_t> flush_mark_stats_cache();
828   // Prints statistics associated with this task
829   void print_stats();
830 };
831 
832 // Class that's used to to print out per-region liveness
833 // information. It's currently used at the end of marking and also
834 // after we sort the old regions at the end of the cleanup operation.
835 class G1PrintRegionLivenessInfoClosure : public HeapRegionClosure {
836   // Accumulators for these values.
837   size_t _total_used_bytes;
838   size_t _total_capacity_bytes;
839   size_t _total_prev_live_bytes;
840   size_t _total_next_live_bytes;
841 
842   // Accumulator for the remembered set size
843   size_t _total_remset_bytes;
844 
845   // Accumulator for strong code roots memory size
846   size_t _total_strong_code_roots_bytes;
847 
bytes_to_mb(size_t val)848   static double bytes_to_mb(size_t val) {
849     return (double) val / (double) M;
850   }
851 
852 public:
853   // The header and footer are printed in the constructor and
854   // destructor respectively.
855   G1PrintRegionLivenessInfoClosure(const char* phase_name);
856   virtual bool do_heap_region(HeapRegion* r);
857   ~G1PrintRegionLivenessInfoClosure();
858 };
859 
860 #endif // SHARE_GC_G1_G1CONCURRENTMARK_HPP
861