1 /*
2  * Copyright (c) 2006, 2016, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #ifndef SHARE_VM_GC_PARALLEL_MUTABLENUMASPACE_HPP
26 #define SHARE_VM_GC_PARALLEL_MUTABLENUMASPACE_HPP
27 
28 #include "gc/parallel/mutableSpace.hpp"
29 #include "gc/shared/gcUtil.hpp"
30 #include "utilities/macros.hpp"
31 
32 /*
33  *    The NUMA-aware allocator (MutableNUMASpace) is basically a modification
34  * of MutableSpace which preserves interfaces but implements different
35  * functionality. The space is split into chunks for each locality group
36  * (resizing for adaptive size policy is also supported). For each thread
37  * allocations are performed in the chunk corresponding to the home locality
38  * group of the thread. Whenever any chunk fills-in the young generation
39  * collection occurs.
40  *   The chunks can be also be adaptively resized. The idea behind the adaptive
41  * sizing is to reduce the loss of the space in the eden due to fragmentation.
42  * The main cause of fragmentation is uneven allocation rates of threads.
43  * The allocation rate difference between locality groups may be caused either by
44  * application specifics or by uneven LWP distribution by the OS. Besides,
45  * application can have less threads then the number of locality groups.
46  * In order to resize the chunk we measure the allocation rate of the
47  * application between collections. After that we reshape the chunks to reflect
48  * the allocation rate pattern. The AdaptiveWeightedAverage exponentially
49  * decaying average is used to smooth the measurements. The NUMASpaceResizeRate
50  * parameter is used to control the adaptation speed by restricting the number of
51  * bytes that can be moved during the adaptation phase.
52  *   Chunks may contain pages from a wrong locality group. The page-scanner has
53  * been introduced to address the problem. Remote pages typically appear due to
54  * the memory shortage in the target locality group. Besides Solaris would
55  * allocate a large page from the remote locality group even if there are small
56  * local pages available. The page-scanner scans the pages right after the
57  * collection and frees remote pages in hope that subsequent reallocation would
58  * be more successful. This approach proved to be useful on systems with high
59  * load where multiple processes are competing for the memory.
60  */
61 
62 class MutableNUMASpace : public MutableSpace {
63   friend class VMStructs;
64 
65   class LGRPSpace : public CHeapObj<mtGC> {
66     int _lgrp_id;
67     MutableSpace* _space;
68     MemRegion _invalid_region;
69     AdaptiveWeightedAverage *_alloc_rate;
70     bool _allocation_failed;
71 
72     struct SpaceStats {
73       size_t _local_space, _remote_space, _unbiased_space, _uncommited_space;
74       size_t _large_pages, _small_pages;
75 
SpaceStatsMutableNUMASpace::LGRPSpace::SpaceStats76       SpaceStats() {
77         _local_space = 0;
78         _remote_space = 0;
79         _unbiased_space = 0;
80         _uncommited_space = 0;
81         _large_pages = 0;
82         _small_pages = 0;
83       }
84     };
85 
86     SpaceStats _space_stats;
87 
88     char* _last_page_scanned;
last_page_scanned()89     char* last_page_scanned()            { return _last_page_scanned; }
set_last_page_scanned(char * p)90     void set_last_page_scanned(char* p)  { _last_page_scanned = p;    }
91    public:
LGRPSpace(int l,size_t alignment)92     LGRPSpace(int l, size_t alignment) : _lgrp_id(l), _last_page_scanned(NULL), _allocation_failed(false) {
93       _space = new MutableSpace(alignment);
94       _alloc_rate = new AdaptiveWeightedAverage(NUMAChunkResizeWeight);
95     }
~LGRPSpace()96     ~LGRPSpace() {
97       delete _space;
98       delete _alloc_rate;
99     }
100 
add_invalid_region(MemRegion r)101     void add_invalid_region(MemRegion r) {
102       if (!_invalid_region.is_empty()) {
103       _invalid_region.set_start(MIN2(_invalid_region.start(), r.start()));
104       _invalid_region.set_end(MAX2(_invalid_region.end(), r.end()));
105       } else {
106       _invalid_region = r;
107       }
108     }
109 
equals(void * lgrp_id_value,LGRPSpace * p)110     static bool equals(void* lgrp_id_value, LGRPSpace* p) {
111       return *(int*)lgrp_id_value == p->lgrp_id();
112     }
113 
114     // Report a failed allocation.
set_allocation_failed()115     void set_allocation_failed() { _allocation_failed = true;  }
116 
sample()117     void sample() {
118       // If there was a failed allocation make allocation rate equal
119       // to the size of the whole chunk. This ensures the progress of
120       // the adaptation process.
121       size_t alloc_rate_sample;
122       if (_allocation_failed) {
123         alloc_rate_sample = space()->capacity_in_bytes();
124         _allocation_failed = false;
125       } else {
126         alloc_rate_sample = space()->used_in_bytes();
127       }
128       alloc_rate()->sample(alloc_rate_sample);
129     }
130 
invalid_region() const131     MemRegion invalid_region() const                { return _invalid_region;      }
set_invalid_region(MemRegion r)132     void set_invalid_region(MemRegion r)            { _invalid_region = r;         }
lgrp_id() const133     int lgrp_id() const                             { return _lgrp_id;             }
space() const134     MutableSpace* space() const                     { return _space;               }
alloc_rate() const135     AdaptiveWeightedAverage* alloc_rate() const     { return _alloc_rate;          }
clear_alloc_rate()136     void clear_alloc_rate()                         { _alloc_rate->clear();        }
space_stats()137     SpaceStats* space_stats()                       { return &_space_stats;        }
clear_space_stats()138     void clear_space_stats()                        { _space_stats = SpaceStats(); }
139 
140     void accumulate_statistics(size_t page_size);
141     void scan_pages(size_t page_size, size_t page_count);
142   };
143 
144   GrowableArray<LGRPSpace*>* _lgrp_spaces;
145   size_t _page_size;
146   unsigned _adaptation_cycles, _samples_count;
147 
148   bool _must_use_large_pages;
149 
set_page_size(size_t psz)150   void set_page_size(size_t psz)                     { _page_size = psz;          }
page_size() const151   size_t page_size() const                           { return _page_size;         }
152 
adaptation_cycles()153   unsigned adaptation_cycles()                       { return _adaptation_cycles; }
set_adaptation_cycles(int v)154   void set_adaptation_cycles(int v)                  { _adaptation_cycles = v;    }
155 
samples_count()156   unsigned samples_count()                           { return _samples_count;     }
increment_samples_count()157   void increment_samples_count()                     { ++_samples_count;          }
158 
159   size_t _base_space_size;
set_base_space_size(size_t v)160   void set_base_space_size(size_t v)                 { _base_space_size = v;      }
base_space_size() const161   size_t base_space_size() const                     { return _base_space_size;   }
162 
163   // Check if the NUMA topology has changed. Add and remove spaces if needed.
164   // The update can be forced by setting the force parameter equal to true.
165   bool update_layout(bool force);
166   // Bias region towards the lgrp.
167   void bias_region(MemRegion mr, int lgrp_id);
168   // Free pages in a given region.
169   void free_region(MemRegion mr);
170   // Get current chunk size.
171   size_t current_chunk_size(int i);
172   // Get default chunk size (equally divide the space).
173   size_t default_chunk_size();
174   // Adapt the chunk size to follow the allocation rate.
175   size_t adaptive_chunk_size(int i, size_t limit);
176   // Scan and free invalid pages.
177   void scan_pages(size_t page_count);
178   // Return the bottom_region and the top_region. Align them to page_size() boundary.
179   // |------------------new_region---------------------------------|
180   // |----bottom_region--|---intersection---|------top_region------|
181   void select_tails(MemRegion new_region, MemRegion intersection,
182                     MemRegion* bottom_region, MemRegion *top_region);
183   // Try to merge the invalid region with the bottom or top region by decreasing
184   // the intersection area. Return the invalid_region aligned to the page_size()
185   // boundary if it's inside the intersection. Return non-empty invalid_region
186   // if it lies inside the intersection (also page-aligned).
187   // |------------------new_region---------------------------------|
188   // |----------------|-------invalid---|--------------------------|
189   // |----bottom_region--|---intersection---|------top_region------|
190   void merge_regions(MemRegion new_region, MemRegion* intersection,
191                      MemRegion *invalid_region);
192 
193  public:
lgrp_spaces() const194   GrowableArray<LGRPSpace*>* lgrp_spaces() const     { return _lgrp_spaces;       }
195   MutableNUMASpace(size_t alignment);
196   virtual ~MutableNUMASpace();
197   // Space initialization.
198   virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space, bool setup_pages = SetupPages);
199   // Update space layout if necessary. Do all adaptive resizing job.
200   virtual void update();
201   // Update allocation rate averages.
202   virtual void accumulate_statistics();
203 
204   virtual void clear(bool mangle_space);
205   virtual void mangle_unused_area() PRODUCT_RETURN;
206   virtual void mangle_unused_area_complete() PRODUCT_RETURN;
207   virtual void mangle_region(MemRegion mr) PRODUCT_RETURN;
208   virtual void check_mangled_unused_area(HeapWord* limit) PRODUCT_RETURN;
209   virtual void check_mangled_unused_area_complete() PRODUCT_RETURN;
210   virtual void set_top_for_allocations(HeapWord* v) PRODUCT_RETURN;
211   virtual void set_top_for_allocations() PRODUCT_RETURN;
212 
213   virtual void ensure_parsability();
214   virtual size_t used_in_words() const;
215   virtual size_t free_in_words() const;
216 
217   using MutableSpace::capacity_in_words;
218   virtual size_t capacity_in_words(Thread* thr) const;
219   virtual size_t tlab_capacity(Thread* thr) const;
220   virtual size_t tlab_used(Thread* thr) const;
221   virtual size_t unsafe_max_tlab_alloc(Thread* thr) const;
222 
223   // Allocation (return NULL if full)
224   virtual HeapWord* allocate(size_t word_size);
225   virtual HeapWord* cas_allocate(size_t word_size);
226 
227   // Debugging
228   virtual void print_on(outputStream* st) const;
229   virtual void print_short_on(outputStream* st) const;
230   virtual void verify();
231 
232   virtual void set_top(HeapWord* value);
233 };
234 
235 #endif // SHARE_VM_GC_PARALLEL_MUTABLENUMASPACE_HPP
236