1 // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2 // Copyright (c) 2008, Google Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // ---
32 // Author: Sanjay Ghemawat <opensource@google.com>
33 //
34 // Common definitions for tcmalloc code.
35 
36 #ifndef TCMALLOC_COMMON_H_
37 #define TCMALLOC_COMMON_H_
38 
39 #include "config.h"
40 #include <stddef.h>                     // for size_t
41 #ifdef HAVE_STDINT_H
42 #include <stdint.h>                     // for uintptr_t, uint64_t
43 #endif
44 #include "internal_logging.h"  // for ASSERT, etc
45 #include "base/basictypes.h"   // for LIKELY, etc
46 
47 // Type that can hold a page number
48 typedef uintptr_t PageID;
49 
50 // Type that can hold the length of a run of pages
51 typedef uintptr_t Length;
52 
53 //-------------------------------------------------------------------
54 // Configuration
55 //-------------------------------------------------------------------
56 
57 #if defined(TCMALLOC_ALIGN_8BYTES)
58 // Unless we force to use 8 bytes alignment we use an alignment of
59 // at least 16 bytes to statisfy requirements for some SSE types.
60 // Keep in mind when using the 16 bytes alignment you can have a space
61 // waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
62 static const size_t kMinAlign   = 8;
63 #else
64 static const size_t kMinAlign   = 16;
65 #endif
66 
67 // Using large pages speeds up the execution at a cost of larger memory use.
68 // Deallocation may speed up by a factor as the page map gets 8x smaller, so
69 // lookups in the page map result in fewer L2 cache misses, which translates to
70 // speedup for application/platform combinations with high L2 cache pressure.
71 // As the number of size classes increases with large pages, we increase
72 // the thread cache allowance to avoid passing more free ranges to and from
73 // central lists.  Also, larger pages are less likely to get freed.
74 // These two factors cause a bounded increase in memory use.
75 #if defined(TCMALLOC_32K_PAGES)
76 static const size_t kPageShift  = 15;
77 #elif defined(TCMALLOC_64K_PAGES)
78 static const size_t kPageShift  = 16;
79 #else
80 static const size_t kPageShift  = 13;
81 #endif
82 
83 static const size_t kClassSizesMax = 96;
84 
85 static const size_t kMaxThreadCacheSize = 4 << 20;
86 
87 static const size_t kPageSize   = 1 << kPageShift;
88 static const size_t kMaxSize    = 256 * 1024;
89 static const size_t kAlignment  = 8;
90 // For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap.
91 static const size_t kMaxPages = 1 << (20 - kPageShift);
92 
93 // Default bound on the total amount of thread caches.
94 #ifdef TCMALLOC_SMALL_BUT_SLOW
95 // Make the overall thread cache no bigger than that of a single thread
96 // for the small memory footprint case.
97 static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
98 #else
99 static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
100 #endif
101 
102 // Lower bound on the per-thread cache sizes
103 static const size_t kMinThreadCacheSize = kMaxSize * 2;
104 
105 // The number of bytes one ThreadCache will steal from another when
106 // the first ThreadCache is forced to Scavenge(), delaying the
107 // next call to Scavenge for this thread.
108 static const size_t kStealAmount = 1 << 16;
109 
110 // The number of times that a deallocation can cause a freelist to
111 // go over its max_length() before shrinking max_length().
112 static const int kMaxOverages = 3;
113 
114 // Maximum length we allow a per-thread free-list to have before we
115 // move objects from it into the corresponding central free-list.  We
116 // want this big to avoid locking the central free-list too often.  It
117 // should not hurt to make this list somewhat big because the
118 // scavenging code will shrink it down when its contents are not in use.
119 static const int kMaxDynamicFreeListLength = 8192;
120 
121 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
122 
123 #if __aarch64__ || __x86_64__ || _M_AMD64 || _M_ARM64
124 // All current x86_64 processors only look at the lower 48 bits in
125 // virtual to physical address translation. The top 16 are all same as
126 // bit 47. And bit 47 value 1 reserved for kernel-space addresses in
127 // practice. So it is actually 47 usable bits from malloc
128 // perspective. This lets us use faster two level page maps on this
129 // architecture.
130 //
131 // There is very similar story on 64-bit arms except it has full 48
132 // bits for user-space. Because of that, and because in principle OSes
133 // can start giving some of highest-bit-set addresses to user-space,
134 // we don't bother to limit x86 to 47 bits.
135 //
136 // As of now there are published plans to add more bits to x86-64
137 // virtual address space, but since 48 bits has been norm for long
138 // time and lots of software is relying on it, it will be opt-in from
139 // OS perspective. So we can keep doing "48 bits" at least for now.
140 static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
141 #else
142 // mipsen and ppcs have more general hardware so we have to support
143 // full 64-bits of addresses.
144 static const int kAddressBits = 8 * sizeof(void*);
145 #endif
146 
147 namespace tcmalloc {
148 
149 // Convert byte size into pages.  This won't overflow, but may return
150 // an unreasonably large value if bytes is huge enough.
pages(size_t bytes)151 inline Length pages(size_t bytes) {
152   return (bytes >> kPageShift) +
153       ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
154 }
155 
156 // For larger allocation sizes, we use larger memory alignments to
157 // reduce the number of size classes.
158 int AlignmentForSize(size_t size);
159 
160 // Size-class information + mapping
161 class SizeMap {
162  private:
163   //-------------------------------------------------------------------
164   // Mapping from size to size_class and vice versa
165   //-------------------------------------------------------------------
166 
167   // Sizes <= 1024 have an alignment >= 8.  So for such sizes we have an
168   // array indexed by ceil(size/8).  Sizes > 1024 have an alignment >= 128.
169   // So for these larger sizes we have an array indexed by ceil(size/128).
170   //
171   // We flatten both logical arrays into one physical array and use
172   // arithmetic to compute an appropriate index.  The constants used by
173   // ClassIndex() were selected to make the flattening work.
174   //
175   // Examples:
176   //   Size       Expression                      Index
177   //   -------------------------------------------------------
178   //   0          (0 + 7) / 8                     0
179   //   1          (1 + 7) / 8                     1
180   //   ...
181   //   1024       (1024 + 7) / 8                  128
182   //   1025       (1025 + 127 + (120<<7)) / 128   129
183   //   ...
184   //   32768      (32768 + 127 + (120<<7)) / 128  376
185   static const int kMaxSmallSize = 1024;
186   static const size_t kClassArraySize =
187       ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
188   unsigned char class_array_[kClassArraySize];
189 
SmallSizeClass(size_t s)190   static inline size_t SmallSizeClass(size_t s) {
191     return (static_cast<uint32_t>(s) + 7) >> 3;
192   }
193 
LargeSizeClass(size_t s)194   static inline size_t LargeSizeClass(size_t s) {
195     return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
196   }
197 
198   // If size is no more than kMaxSize, compute index of the
199   // class_array[] entry for it, putting the class index in output
200   // parameter idx and returning true. Otherwise return false.
ClassIndexMaybe(size_t s,uint32 * idx)201   static inline bool ATTRIBUTE_ALWAYS_INLINE ClassIndexMaybe(size_t s,
202                                                              uint32* idx) {
203     if (PREDICT_TRUE(s <= kMaxSmallSize)) {
204       *idx = (static_cast<uint32>(s) + 7) >> 3;
205       return true;
206     } else if (s <= kMaxSize) {
207       *idx = (static_cast<uint32>(s) + 127 + (120 << 7)) >> 7;
208       return true;
209     }
210     return false;
211   }
212 
213   // Compute index of the class_array[] entry for a given size
ClassIndex(size_t s)214   static inline size_t ClassIndex(size_t s) {
215     // Use unsigned arithmetic to avoid unnecessary sign extensions.
216     ASSERT(0 <= s);
217     ASSERT(s <= kMaxSize);
218     if (PREDICT_TRUE(s <= kMaxSmallSize)) {
219       return SmallSizeClass(s);
220     } else {
221       return LargeSizeClass(s);
222     }
223   }
224 
225   // Number of objects to move between a per-thread list and a central
226   // list in one shot.  We want this to be not too small so we can
227   // amortize the lock overhead for accessing the central list.  Making
228   // it too big may temporarily cause unnecessary memory wastage in the
229   // per-thread free list until the scavenger cleans up the list.
230   int num_objects_to_move_[kClassSizesMax];
231 
232   int NumMoveSize(size_t size);
233 
234   // Mapping from size class to max size storable in that class
235   int32 class_to_size_[kClassSizesMax];
236 
237   // Mapping from size class to number of pages to allocate at a time
238   size_t class_to_pages_[kClassSizesMax];
239 
240  public:
241   size_t num_size_classes;
242 
243   // Constructor should do nothing since we rely on explicit Init()
244   // call, which may or may not be called before the constructor runs.
SizeMap()245   SizeMap() { }
246 
247   // Initialize the mapping arrays
248   void Init();
249 
SizeClass(size_t size)250   inline int SizeClass(size_t size) {
251     return class_array_[ClassIndex(size)];
252   }
253 
254   // Check if size is small enough to be representable by a size
255   // class, and if it is, put matching size class into *cl. Returns
256   // true iff matching size class was found.
GetSizeClass(size_t size,uint32 * cl)257   inline bool ATTRIBUTE_ALWAYS_INLINE GetSizeClass(size_t size, uint32* cl) {
258     uint32 idx;
259     if (!ClassIndexMaybe(size, &idx)) {
260       return false;
261     }
262     *cl = class_array_[idx];
263     return true;
264   }
265 
266   // Get the byte-size for a specified class
ByteSizeForClass(uint32 cl)267   inline int32 ATTRIBUTE_ALWAYS_INLINE ByteSizeForClass(uint32 cl) {
268     return class_to_size_[cl];
269   }
270 
271   // Mapping from size class to max size storable in that class
class_to_size(uint32 cl)272   inline int32 class_to_size(uint32 cl) {
273     return class_to_size_[cl];
274   }
275 
276   // Mapping from size class to number of pages to allocate at a time
class_to_pages(uint32 cl)277   inline size_t class_to_pages(uint32 cl) {
278     return class_to_pages_[cl];
279   }
280 
281   // Number of objects to move between a per-thread list and a central
282   // list in one shot.  We want this to be not too small so we can
283   // amortize the lock overhead for accessing the central list.  Making
284   // it too big may temporarily cause unnecessary memory wastage in the
285   // per-thread free list until the scavenger cleans up the list.
num_objects_to_move(uint32 cl)286   inline int num_objects_to_move(uint32 cl) {
287     return num_objects_to_move_[cl];
288   }
289 };
290 
291 // Allocates "bytes" worth of memory and returns it.  Increments
292 // metadata_system_bytes appropriately.  May return NULL if allocation
293 // fails.  Requires pageheap_lock is held.
294 void* MetaDataAlloc(size_t bytes);
295 
296 // Returns the total number of bytes allocated from the system.
297 // Requires pageheap_lock is held.
298 uint64_t metadata_system_bytes();
299 
300 // size/depth are made the same size as a pointer so that some generic
301 // code below can conveniently cast them back and forth to void*.
302 static const int kMaxStackDepth = 31;
303 struct StackTrace {
304   uintptr_t size;          // Size of object
305   uintptr_t depth;         // Number of PC values stored in array below
306   void*     stack[kMaxStackDepth];
307 };
308 
309 }  // namespace tcmalloc
310 
311 #endif  // TCMALLOC_COMMON_H_
312