1 // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2 // Copyright (c) 2008, Google Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // ---
32 // Author: Sanjay Ghemawat <opensource@google.com>
33 //
34 // Common definitions for tcmalloc code.
35
36 #ifndef TCMALLOC_COMMON_H_
37 #define TCMALLOC_COMMON_H_
38
39 #include "config.h"
40 #include <stddef.h> // for size_t
41 #ifdef HAVE_STDINT_H
42 #include <stdint.h> // for uintptr_t, uint64_t
43 #endif
44 #include "internal_logging.h" // for ASSERT, etc
45 #include "base/basictypes.h" // for LIKELY, etc
46
47 // Type that can hold a page number
48 typedef uintptr_t PageID;
49
50 // Type that can hold the length of a run of pages
51 typedef uintptr_t Length;
52
53 //-------------------------------------------------------------------
54 // Configuration
55 //-------------------------------------------------------------------
56
57 #if defined(TCMALLOC_ALIGN_8BYTES)
58 // Unless we force to use 8 bytes alignment we use an alignment of
59 // at least 16 bytes to statisfy requirements for some SSE types.
60 // Keep in mind when using the 16 bytes alignment you can have a space
61 // waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
62 static const size_t kMinAlign = 8;
63 #else
64 static const size_t kMinAlign = 16;
65 #endif
66
67 // Using large pages speeds up the execution at a cost of larger memory use.
68 // Deallocation may speed up by a factor as the page map gets 8x smaller, so
69 // lookups in the page map result in fewer L2 cache misses, which translates to
70 // speedup for application/platform combinations with high L2 cache pressure.
71 // As the number of size classes increases with large pages, we increase
72 // the thread cache allowance to avoid passing more free ranges to and from
73 // central lists. Also, larger pages are less likely to get freed.
74 // These two factors cause a bounded increase in memory use.
75 #if defined(TCMALLOC_32K_PAGES)
76 static const size_t kPageShift = 15;
77 #elif defined(TCMALLOC_64K_PAGES)
78 static const size_t kPageShift = 16;
79 #else
80 static const size_t kPageShift = 13;
81 #endif
82
83 static const size_t kClassSizesMax = 96;
84
85 static const size_t kMaxThreadCacheSize = 4 << 20;
86
87 static const size_t kPageSize = 1 << kPageShift;
88 static const size_t kMaxSize = 256 * 1024;
89 static const size_t kAlignment = 8;
90 // For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap.
91 static const size_t kMaxPages = 1 << (20 - kPageShift);
92
93 // Default bound on the total amount of thread caches.
94 #ifdef TCMALLOC_SMALL_BUT_SLOW
95 // Make the overall thread cache no bigger than that of a single thread
96 // for the small memory footprint case.
97 static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
98 #else
99 static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
100 #endif
101
102 // Lower bound on the per-thread cache sizes
103 static const size_t kMinThreadCacheSize = kMaxSize * 2;
104
105 // The number of bytes one ThreadCache will steal from another when
106 // the first ThreadCache is forced to Scavenge(), delaying the
107 // next call to Scavenge for this thread.
108 static const size_t kStealAmount = 1 << 16;
109
110 // The number of times that a deallocation can cause a freelist to
111 // go over its max_length() before shrinking max_length().
112 static const int kMaxOverages = 3;
113
114 // Maximum length we allow a per-thread free-list to have before we
115 // move objects from it into the corresponding central free-list. We
116 // want this big to avoid locking the central free-list too often. It
117 // should not hurt to make this list somewhat big because the
118 // scavenging code will shrink it down when its contents are not in use.
119 static const int kMaxDynamicFreeListLength = 8192;
120
121 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
122
123 #if __aarch64__ || __x86_64__ || _M_AMD64 || _M_ARM64
124 // All current x86_64 processors only look at the lower 48 bits in
125 // virtual to physical address translation. The top 16 are all same as
126 // bit 47. And bit 47 value 1 reserved for kernel-space addresses in
127 // practice. So it is actually 47 usable bits from malloc
128 // perspective. This lets us use faster two level page maps on this
129 // architecture.
130 //
131 // There is very similar story on 64-bit arms except it has full 48
132 // bits for user-space. Because of that, and because in principle OSes
133 // can start giving some of highest-bit-set addresses to user-space,
134 // we don't bother to limit x86 to 47 bits.
135 //
136 // As of now there are published plans to add more bits to x86-64
137 // virtual address space, but since 48 bits has been norm for long
138 // time and lots of software is relying on it, it will be opt-in from
139 // OS perspective. So we can keep doing "48 bits" at least for now.
140 static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
141 #else
142 // mipsen and ppcs have more general hardware so we have to support
143 // full 64-bits of addresses.
144 static const int kAddressBits = 8 * sizeof(void*);
145 #endif
146
147 namespace tcmalloc {
148
149 // Convert byte size into pages. This won't overflow, but may return
150 // an unreasonably large value if bytes is huge enough.
pages(size_t bytes)151 inline Length pages(size_t bytes) {
152 return (bytes >> kPageShift) +
153 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
154 }
155
156 // For larger allocation sizes, we use larger memory alignments to
157 // reduce the number of size classes.
158 int AlignmentForSize(size_t size);
159
160 // Size-class information + mapping
161 class SizeMap {
162 private:
163 //-------------------------------------------------------------------
164 // Mapping from size to size_class and vice versa
165 //-------------------------------------------------------------------
166
167 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
168 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
169 // So for these larger sizes we have an array indexed by ceil(size/128).
170 //
171 // We flatten both logical arrays into one physical array and use
172 // arithmetic to compute an appropriate index. The constants used by
173 // ClassIndex() were selected to make the flattening work.
174 //
175 // Examples:
176 // Size Expression Index
177 // -------------------------------------------------------
178 // 0 (0 + 7) / 8 0
179 // 1 (1 + 7) / 8 1
180 // ...
181 // 1024 (1024 + 7) / 8 128
182 // 1025 (1025 + 127 + (120<<7)) / 128 129
183 // ...
184 // 32768 (32768 + 127 + (120<<7)) / 128 376
185 static const int kMaxSmallSize = 1024;
186 static const size_t kClassArraySize =
187 ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
188 unsigned char class_array_[kClassArraySize];
189
SmallSizeClass(size_t s)190 static inline size_t SmallSizeClass(size_t s) {
191 return (static_cast<uint32_t>(s) + 7) >> 3;
192 }
193
LargeSizeClass(size_t s)194 static inline size_t LargeSizeClass(size_t s) {
195 return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
196 }
197
198 // If size is no more than kMaxSize, compute index of the
199 // class_array[] entry for it, putting the class index in output
200 // parameter idx and returning true. Otherwise return false.
ClassIndexMaybe(size_t s,uint32 * idx)201 static inline bool ATTRIBUTE_ALWAYS_INLINE ClassIndexMaybe(size_t s,
202 uint32* idx) {
203 if (PREDICT_TRUE(s <= kMaxSmallSize)) {
204 *idx = (static_cast<uint32>(s) + 7) >> 3;
205 return true;
206 } else if (s <= kMaxSize) {
207 *idx = (static_cast<uint32>(s) + 127 + (120 << 7)) >> 7;
208 return true;
209 }
210 return false;
211 }
212
213 // Compute index of the class_array[] entry for a given size
ClassIndex(size_t s)214 static inline size_t ClassIndex(size_t s) {
215 // Use unsigned arithmetic to avoid unnecessary sign extensions.
216 ASSERT(0 <= s);
217 ASSERT(s <= kMaxSize);
218 if (PREDICT_TRUE(s <= kMaxSmallSize)) {
219 return SmallSizeClass(s);
220 } else {
221 return LargeSizeClass(s);
222 }
223 }
224
225 // Number of objects to move between a per-thread list and a central
226 // list in one shot. We want this to be not too small so we can
227 // amortize the lock overhead for accessing the central list. Making
228 // it too big may temporarily cause unnecessary memory wastage in the
229 // per-thread free list until the scavenger cleans up the list.
230 int num_objects_to_move_[kClassSizesMax];
231
232 int NumMoveSize(size_t size);
233
234 // Mapping from size class to max size storable in that class
235 int32 class_to_size_[kClassSizesMax];
236
237 // Mapping from size class to number of pages to allocate at a time
238 size_t class_to_pages_[kClassSizesMax];
239
240 public:
241 size_t num_size_classes;
242
243 // Constructor should do nothing since we rely on explicit Init()
244 // call, which may or may not be called before the constructor runs.
SizeMap()245 SizeMap() { }
246
247 // Initialize the mapping arrays
248 void Init();
249
SizeClass(size_t size)250 inline int SizeClass(size_t size) {
251 return class_array_[ClassIndex(size)];
252 }
253
254 // Check if size is small enough to be representable by a size
255 // class, and if it is, put matching size class into *cl. Returns
256 // true iff matching size class was found.
GetSizeClass(size_t size,uint32 * cl)257 inline bool ATTRIBUTE_ALWAYS_INLINE GetSizeClass(size_t size, uint32* cl) {
258 uint32 idx;
259 if (!ClassIndexMaybe(size, &idx)) {
260 return false;
261 }
262 *cl = class_array_[idx];
263 return true;
264 }
265
266 // Get the byte-size for a specified class
ByteSizeForClass(uint32 cl)267 inline int32 ATTRIBUTE_ALWAYS_INLINE ByteSizeForClass(uint32 cl) {
268 return class_to_size_[cl];
269 }
270
271 // Mapping from size class to max size storable in that class
class_to_size(uint32 cl)272 inline int32 class_to_size(uint32 cl) {
273 return class_to_size_[cl];
274 }
275
276 // Mapping from size class to number of pages to allocate at a time
class_to_pages(uint32 cl)277 inline size_t class_to_pages(uint32 cl) {
278 return class_to_pages_[cl];
279 }
280
281 // Number of objects to move between a per-thread list and a central
282 // list in one shot. We want this to be not too small so we can
283 // amortize the lock overhead for accessing the central list. Making
284 // it too big may temporarily cause unnecessary memory wastage in the
285 // per-thread free list until the scavenger cleans up the list.
num_objects_to_move(uint32 cl)286 inline int num_objects_to_move(uint32 cl) {
287 return num_objects_to_move_[cl];
288 }
289 };
290
291 // Allocates "bytes" worth of memory and returns it. Increments
292 // metadata_system_bytes appropriately. May return NULL if allocation
293 // fails. Requires pageheap_lock is held.
294 void* MetaDataAlloc(size_t bytes);
295
296 // Returns the total number of bytes allocated from the system.
297 // Requires pageheap_lock is held.
298 uint64_t metadata_system_bytes();
299
300 // size/depth are made the same size as a pointer so that some generic
301 // code below can conveniently cast them back and forth to void*.
302 static const int kMaxStackDepth = 31;
303 struct StackTrace {
304 uintptr_t size; // Size of object
305 uintptr_t depth; // Number of PC values stored in array below
306 void* stack[kMaxStackDepth];
307 };
308
309 } // namespace tcmalloc
310
311 #endif // TCMALLOC_COMMON_H_
312